Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
toarstats
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
toar-public
toarstats
Commits
cd1d0537
Commit
cd1d0537
authored
2 years ago
by
Niklas Selke
Browse files
Options
Downloads
Patches
Plain Diff
Fixed the tests.
parent
c0437e65
No related branches found
No related tags found
1 merge request
!5
Modified the test for the 'value_count' statistic. Now all available sampling...
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
tests/test_input_checks.py
+26
-16
26 additions, 16 deletions
tests/test_input_checks.py
tests/test_interface.py
+1
-1
1 addition, 1 deletion
tests/test_interface.py
tests/test_stats.py
+37
-33
37 additions, 33 deletions
tests/test_stats.py
tests/test_toarstats.py
+11
-5
11 additions, 5 deletions
tests/test_toarstats.py
with
75 additions
and
55 deletions
tests/test_input_checks.py
+
26
−
16
View file @
cd1d0537
...
@@ -4,12 +4,25 @@ import numpy as np
...
@@ -4,12 +4,25 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
import
pytest
import
pytest
from
toarstats.input_checks
import
(
from
toarstats.metrics.input_checks
import
(
check_crops
,
check_data
,
check_data_capture
,
check_index
,
check_crops
,
check_input_parameters
,
check_metadata
,
check_required_parameters
,
check_data
,
check_sampling
,
check_seasons
,
check_station_climatic_zone
,
check_data_capture
,
check_station_latitude
,
check_station_longitude
,
check_statistics
,
check_index
,
check_values
,
from_pandas
,
is_correct_type
,
is_empty
,
is_in_range
,
check_input_parameters
,
check_metadata
,
check_required_parameters
,
check_sampling
,
check_seasons
,
check_station_climatic_zone
,
check_station_latitude
,
check_station_longitude
,
check_statistics
,
check_values
,
from_pandas
,
is_correct_type
,
is_empty
,
is_in_range
,
to_collection
to_collection
)
)
...
@@ -260,16 +273,14 @@ class TestCheckData:
...
@@ -260,16 +273,14 @@ class TestCheckData:
start
=
"
2000
"
,
periods
=
3
,
freq
=
"
H
"
,
tz
=
"
Europe/Berlin
"
start
=
"
2000
"
,
periods
=
3
,
freq
=
"
H
"
,
tz
=
"
Europe/Berlin
"
)
)
values
=
np
.
array
([
5.
,
6.1
,
7
])
values
=
np
.
array
([
5.
,
6.1
,
7
])
expected
=
pd
.
DataFrame
(
expected
=
pd
.
Series
(
values
,
index
=
index
.
tz_localize
(
None
))
{
"
values
"
:
values
},
index
=
index
.
tz_localize
(
None
)
pd
.
testing
.
assert_series_equal
(
)
pd
.
testing
.
assert_frame_equal
(
check_data
(
pd
.
DataFrame
(
values
,
index
=
index
),
None
,
None
),
expected
check_data
(
pd
.
DataFrame
(
values
,
index
=
index
),
None
,
None
),
expected
)
)
pd
.
testing
.
assert_
frame
_equal
(
pd
.
testing
.
assert_
series
_equal
(
check_data
(
pd
.
Series
(
values
,
index
=
index
),
None
,
None
),
expected
check_data
(
pd
.
Series
(
values
,
index
=
index
),
None
,
None
),
expected
)
)
pd
.
testing
.
assert_
frame
_equal
(
pd
.
testing
.
assert_
series
_equal
(
check_data
(
None
,
index
,
values
),
expected
check_data
(
None
,
index
,
values
),
expected
)
)
...
@@ -593,9 +604,8 @@ class TestCheckInputParameters:
...
@@ -593,9 +604,8 @@ class TestCheckInputParameters:
assert
error_msg
==
str
(
excinfo
.
value
)
assert
error_msg
==
str
(
excinfo
.
value
)
def
test_check_input_parameters_correct_input
(
self
):
def
test_check_input_parameters_correct_input
(
self
):
data
=
pd
.
DataFrame
(
data
=
pd
.
Series
(
{
"
values
"
:
range
(
10
)},
range
(
10
),
index
=
pd
.
date_range
(
start
=
"
2000
"
,
periods
=
10
,
freq
=
"
H
"
)
index
=
pd
.
date_range
(
start
=
"
2000
"
,
periods
=
10
,
freq
=
"
H
"
)
)
)
expected_required
=
namedtuple
(
expected_required
=
namedtuple
(
"
Required
"
,
"
Required
"
,
...
@@ -604,7 +614,7 @@ class TestCheckInputParameters:
...
@@ -604,7 +614,7 @@ class TestCheckInputParameters:
result
=
check_input_parameters
(
"
annual
"
,
"
median
"
,
data
,
*
[
None
]
*
9
)
result
=
check_input_parameters
(
"
annual
"
,
"
median
"
,
data
,
*
[
None
]
*
9
)
assert
result
.
sampling
==
"
annual
"
assert
result
.
sampling
==
"
annual
"
assert
result
.
statistics
==
[
"
median
"
]
assert
result
.
statistics
==
[
"
median
"
]
pd
.
testing
.
assert_
frame
_equal
(
result
.
data
,
data
)
pd
.
testing
.
assert_
series
_equal
(
result
.
data
,
data
)
assert
result
.
metadata
is
None
assert
result
.
metadata
is
None
assert
result
.
seasons
is
None
assert
result
.
seasons
is
None
assert
result
.
crops
is
None
assert
result
.
crops
is
None
...
...
This diff is collapsed.
Click to expand it.
tests/test_interface.py
+
1
−
1
View file @
cd1d0537
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
toarstats.interface
import
calculate_statistics
from
toarstats.
metrics.
interface
import
calculate_statistics
class
TestCalculateStatistics
:
class
TestCalculateStatistics
:
...
...
This diff is collapsed.
Click to expand it.
tests/test_stats.py
+
37
−
33
View file @
cd1d0537
...
@@ -2,16 +2,20 @@ import numpy as np
...
@@ -2,16 +2,20 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
import
pytest
import
pytest
from
toarstats.constants
import
ALLOWED_SAMPLING_VALUES
,
RSTAGS
,
SEASON_DICT
from
toarstats.metrics.constants
import
(
from
toarstats.defaults
import
DEFAULT_DATA_CAPTURE
ALLOWED_SAMPLING_VALUES
,
from
toarstats.interface
import
calculate_statistics
RSTAGS
,
from
toarstats.stats_utils
import
create_reference_data_frame
SEASON_DICT
)
from
toarstats.metrics.defaults
import
DEFAULT_DATA_CAPTURE
from
toarstats.metrics.interface
import
calculate_statistics
from
toarstats.metrics.stats_utils
import
create_reference_series
data
=
pd
.
read_csv
(
data
=
pd
.
read_csv
(
"
tests/time_series.csv
"
,
header
=
None
,
names
=
[
None
,
"
values
"
],
"
tests/time_series.csv
"
,
header
=
None
,
names
=
[
None
,
"
values
"
],
index_col
=
0
,
parse_dates
=
True
,
infer_datetime_format
=
True
index_col
=
0
,
parse_dates
=
True
,
infer_datetime_format
=
True
)
)
ref_data
=
create_reference_
data_frame
(
data
.
index
)
ref_data
=
create_reference_
series
(
data
.
index
)
metadata
=
{
"
station_lat
"
:
50.906389
,
metadata
=
{
"
station_lat
"
:
50.906389
,
"
station_lon
"
:
6.403889
,
"
station_lon
"
:
6.403889
,
"
station_climatic_zone
"
:
"
cool temperate moist
"
}
"
station_climatic_zone
"
:
"
cool temperate moist
"
}
...
@@ -52,12 +56,12 @@ def test_data_capture():
...
@@ -52,12 +56,12 @@ def test_data_capture():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
count
=
cur_data
.
resample
(
offset
).
count
().
squeeze
(
"
columns
"
)
count
=
cur_data
.
resample
(
offset
).
count
().
squeeze
(
"
columns
"
)
expected
[
name
]
=
count
.
divide
(
expected
[
name
]
=
count
.
divide
(
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
cur_ref
.
resample
(
offset
).
count
()
).
reindex
(
count
.
index
)
).
reindex
(
count
.
index
)
else
:
else
:
count
=
data
.
resample
(
offset
).
count
().
squeeze
(
"
columns
"
)
count
=
data
.
resample
(
offset
).
count
().
squeeze
(
"
columns
"
)
expected
[
"
data_capture
"
]
=
count
.
divide
(
expected
[
"
data_capture
"
]
=
count
.
divide
(
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_data
.
resample
(
offset
).
count
()
).
reindex
(
count
.
index
)
).
reindex
(
count
.
index
)
pd
.
testing
.
assert_frame_equal
(
result
,
expected
)
pd
.
testing
.
assert_frame_equal
(
result
,
expected
)
...
@@ -132,7 +136,7 @@ def test_median():
...
@@ -132,7 +136,7 @@ def test_median():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
median
().
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
median
().
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -140,7 +144,7 @@ def test_median():
...
@@ -140,7 +144,7 @@ def test_median():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
median
().
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
median
().
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -164,7 +168,7 @@ def test_maximum():
...
@@ -164,7 +168,7 @@ def test_maximum():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
max
().
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
max
().
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -172,7 +176,7 @@ def test_maximum():
...
@@ -172,7 +176,7 @@ def test_maximum():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
max
().
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
max
().
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -196,7 +200,7 @@ def test_minimum():
...
@@ -196,7 +200,7 @@ def test_minimum():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
min
().
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
min
().
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -204,7 +208,7 @@ def test_minimum():
...
@@ -204,7 +208,7 @@ def test_minimum():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
min
().
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
min
().
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -228,7 +232,7 @@ def test_perc05():
...
@@ -228,7 +232,7 @@ def test_perc05():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.05
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.05
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -236,7 +240,7 @@ def test_perc05():
...
@@ -236,7 +240,7 @@ def test_perc05():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.05
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.05
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -260,7 +264,7 @@ def test_perc10():
...
@@ -260,7 +264,7 @@ def test_perc10():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.1
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.1
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -268,7 +272,7 @@ def test_perc10():
...
@@ -268,7 +272,7 @@ def test_perc10():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.1
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.1
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -292,7 +296,7 @@ def test_perc25():
...
@@ -292,7 +296,7 @@ def test_perc25():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.25
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.25
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -300,7 +304,7 @@ def test_perc25():
...
@@ -300,7 +304,7 @@ def test_perc25():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.25
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.25
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -324,7 +328,7 @@ def test_perc75():
...
@@ -324,7 +328,7 @@ def test_perc75():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.75
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.75
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -332,7 +336,7 @@ def test_perc75():
...
@@ -332,7 +336,7 @@ def test_perc75():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.75
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.75
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -356,7 +360,7 @@ def test_perc90():
...
@@ -356,7 +360,7 @@ def test_perc90():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.9
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.9
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -364,7 +368,7 @@ def test_perc90():
...
@@ -364,7 +368,7 @@ def test_perc90():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.9
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.9
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -388,7 +392,7 @@ def test_perc95():
...
@@ -388,7 +392,7 @@ def test_perc95():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.95
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.95
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -396,7 +400,7 @@ def test_perc95():
...
@@ -396,7 +400,7 @@ def test_perc95():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.95
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.95
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -420,7 +424,7 @@ def test_perc98():
...
@@ -420,7 +424,7 @@ def test_perc98():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.98
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.98
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -428,7 +432,7 @@ def test_perc98():
...
@@ -428,7 +432,7 @@ def test_perc98():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.98
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.98
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -452,7 +456,7 @@ def test_perc99():
...
@@ -452,7 +456,7 @@ def test_perc99():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.99
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.99
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -460,7 +464,7 @@ def test_perc99():
...
@@ -460,7 +464,7 @@ def test_perc99():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
0.99
).
reindex
(
frac
.
index
)
tmp_res
=
data_rs
.
quantile
(
0.99
).
reindex
(
frac
.
index
)
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
tmp_res
[
frac
<
DEFAULT_DATA_CAPTURE
]
=
np
.
nan
...
@@ -484,7 +488,7 @@ def test_percentiles1():
...
@@ -484,7 +488,7 @@ def test_percentiles1():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
tmp_res
=
data_rs
.
quantile
(
int
(
name
.
split
(
"
-
"
,
1
)[
0
][
1
:])
/
100
int
(
name
.
split
(
"
-
"
,
1
)[
0
][
1
:])
/
100
...
@@ -494,7 +498,7 @@ def test_percentiles1():
...
@@ -494,7 +498,7 @@ def test_percentiles1():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
for
name
in
result
.
columns
.
to_list
():
for
name
in
result
.
columns
.
to_list
():
tmp_res
=
data_rs
.
quantile
(
int
(
name
[
1
:])
/
100
).
reindex
(
tmp_res
=
data_rs
.
quantile
(
int
(
name
[
1
:])
/
100
).
reindex
(
...
@@ -521,7 +525,7 @@ def test_percentiles2():
...
@@ -521,7 +525,7 @@ def test_percentiles2():
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
cur_ref
.
index
+=
pd
.
Timedelta
(
182
,
"
days
"
)
data_rs
=
cur_data
.
resample
(
offset
)
data_rs
=
cur_data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
cur_ref
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
tmp_res
=
data_rs
.
quantile
(
tmp_res
=
data_rs
.
quantile
(
int
(
name
.
split
(
"
-
"
,
1
)[
0
][
1
:])
/
100
int
(
name
.
split
(
"
-
"
,
1
)[
0
][
1
:])
/
100
...
@@ -531,7 +535,7 @@ def test_percentiles2():
...
@@ -531,7 +535,7 @@ def test_percentiles2():
else
:
else
:
data_rs
=
data
.
resample
(
offset
)
data_rs
=
data
.
resample
(
offset
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
data_count
=
data_rs
.
count
().
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
.
squeeze
(
"
columns
"
)
ref_count
=
ref_data
.
resample
(
offset
).
count
()
frac
=
data_count
/
ref_count
frac
=
data_count
/
ref_count
for
name
in
result
.
columns
.
to_list
():
for
name
in
result
.
columns
.
to_list
():
tmp_res
=
data_rs
.
quantile
(
int
(
name
[
1
:])
/
100
).
reindex
(
tmp_res
=
data_rs
.
quantile
(
int
(
name
[
1
:])
/
100
).
reindex
(
...
...
This diff is collapsed.
Click to expand it.
tests/test_toarstats.py
+
11
−
5
View file @
cd1d0537
...
@@ -23,7 +23,8 @@ import numpy as np
...
@@ -23,7 +23,8 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
import
pytest
import
pytest
from
toarstats.interface
import
calculate_statistics
from
tests.create_sample_data_and_reference_results
import
create_sample_data
from
toarstats.metrics.interface
import
calculate_statistics
def
get_all_statistics
():
def
get_all_statistics
():
...
@@ -33,11 +34,11 @@ def get_all_statistics():
...
@@ -33,11 +34,11 @@ def get_all_statistics():
"""
"""
statistics
=
set
()
statistics
=
set
()
for
file
in
Path
(
Path
(
__file__
).
resolve
().
parents
[
1
],
for
file
in
Path
(
Path
(
__file__
).
resolve
().
parents
[
1
],
"
toarstats
"
).
glob
(
"
*.py
"
):
"
toarstats
/metrics
"
).
glob
(
"
*.py
"
):
for
node
in
ast
.
parse
(
file
.
read_text
(),
file
).
body
:
for
node
in
ast
.
parse
(
file
.
read_text
(),
file
).
body
:
if
(
isinstance
(
node
,
ast
.
FunctionDef
)
if
(
isinstance
(
node
,
ast
.
FunctionDef
)
and
[
el
.
arg
for
el
in
node
.
args
.
args
]
and
[
el
.
arg
for
el
in
node
.
args
.
args
]
==
[
"
df
"
,
"
df
ref
"
,
"
mtype
"
,
"
metadata
"
,
"
seasons
"
,
==
[
"
ser
"
,
"
ref
"
,
"
mtype
"
,
"
metadata
"
,
"
seasons
"
,
"
data_capture
"
]):
"
data_capture
"
]):
statistics
.
add
(
node
.
name
)
statistics
.
add
(
node
.
name
)
return
statistics
return
statistics
...
@@ -50,7 +51,7 @@ def get_all_samplings():
...
@@ -50,7 +51,7 @@ def get_all_samplings():
"""
"""
samplings
=
set
()
samplings
=
set
()
for
file
in
Path
(
Path
(
__file__
).
resolve
().
parents
[
1
],
for
file
in
Path
(
Path
(
__file__
).
resolve
().
parents
[
1
],
"
toarstats
"
).
glob
(
"
*.py
"
):
"
toarstats
/metrics
"
).
glob
(
"
*.py
"
):
for
node
in
ast
.
parse
(
file
.
read_text
(),
file
).
body
:
for
node
in
ast
.
parse
(
file
.
read_text
(),
file
).
body
:
if
(
isinstance
(
node
,
ast
.
Assign
)
if
(
isinstance
(
node
,
ast
.
Assign
)
and
isinstance
(
node
.
value
,
ast
.
Dict
)
and
isinstance
(
node
.
value
,
ast
.
Dict
)
...
@@ -68,8 +69,13 @@ def sample_data():
...
@@ -68,8 +69,13 @@ def sample_data():
:return: A data frame with sample data
:return: A data frame with sample data
"""
"""
sample_data_file
=
Path
(
Path
(
__file__
).
resolve
().
parent
,
"
sample_data/sample_data.csv
"
)
if
not
sample_data_file
.
is_file
():
create_sample_data
(
sample_data_file
.
parent
)
return
pd
.
read_csv
(
return
pd
.
read_csv
(
Path
(
Path
(
__file__
).
resolve
().
parent
,
"
sample_data/sample_data.csv
"
)
,
sample_data_file
,
header
=
None
,
index_col
=
0
,
parse_dates
=
True
header
=
None
,
index_col
=
0
,
parse_dates
=
True
)
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment