Skip to content
Snippets Groups Projects
Commit cd1d0537 authored by Niklas Selke's avatar Niklas Selke
Browse files

Fixed the tests.

parent c0437e65
No related branches found
No related tags found
1 merge request!5Modified the test for the 'value_count' statistic. Now all available sampling...
...@@ -4,12 +4,25 @@ import numpy as np ...@@ -4,12 +4,25 @@ import numpy as np
import pandas as pd import pandas as pd
import pytest import pytest
from toarstats.input_checks import ( from toarstats.metrics.input_checks import (
check_crops, check_data, check_data_capture, check_index, check_crops,
check_input_parameters, check_metadata, check_required_parameters, check_data,
check_sampling, check_seasons, check_station_climatic_zone, check_data_capture,
check_station_latitude, check_station_longitude, check_statistics, check_index,
check_values, from_pandas, is_correct_type, is_empty, is_in_range, check_input_parameters,
check_metadata,
check_required_parameters,
check_sampling,
check_seasons,
check_station_climatic_zone,
check_station_latitude,
check_station_longitude,
check_statistics,
check_values,
from_pandas,
is_correct_type,
is_empty,
is_in_range,
to_collection to_collection
) )
...@@ -260,16 +273,14 @@ class TestCheckData: ...@@ -260,16 +273,14 @@ class TestCheckData:
start="2000", periods=3, freq="H", tz="Europe/Berlin" start="2000", periods=3, freq="H", tz="Europe/Berlin"
) )
values = np.array([5., 6.1, 7]) values = np.array([5., 6.1, 7])
expected = pd.DataFrame( expected = pd.Series(values, index=index.tz_localize(None))
{"values": values}, index=index.tz_localize(None) pd.testing.assert_series_equal(
)
pd.testing.assert_frame_equal(
check_data(pd.DataFrame(values, index=index), None, None), expected check_data(pd.DataFrame(values, index=index), None, None), expected
) )
pd.testing.assert_frame_equal( pd.testing.assert_series_equal(
check_data(pd.Series(values, index=index), None, None), expected check_data(pd.Series(values, index=index), None, None), expected
) )
pd.testing.assert_frame_equal( pd.testing.assert_series_equal(
check_data(None, index, values), expected check_data(None, index, values), expected
) )
...@@ -593,9 +604,8 @@ class TestCheckInputParameters: ...@@ -593,9 +604,8 @@ class TestCheckInputParameters:
assert error_msg == str(excinfo.value) assert error_msg == str(excinfo.value)
def test_check_input_parameters_correct_input(self): def test_check_input_parameters_correct_input(self):
data = pd.DataFrame( data = pd.Series(
{"values": range(10)}, range(10), index=pd.date_range(start="2000", periods=10, freq="H")
index=pd.date_range(start="2000", periods=10, freq="H")
) )
expected_required = namedtuple( expected_required = namedtuple(
"Required", "Required",
...@@ -604,7 +614,7 @@ class TestCheckInputParameters: ...@@ -604,7 +614,7 @@ class TestCheckInputParameters:
result = check_input_parameters("annual", "median", data, *[None]*9) result = check_input_parameters("annual", "median", data, *[None]*9)
assert result.sampling == "annual" assert result.sampling == "annual"
assert result.statistics == ["median"] assert result.statistics == ["median"]
pd.testing.assert_frame_equal(result.data, data) pd.testing.assert_series_equal(result.data, data)
assert result.metadata is None assert result.metadata is None
assert result.seasons is None assert result.seasons is None
assert result.crops is None assert result.crops is None
......
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from toarstats.interface import calculate_statistics from toarstats.metrics.interface import calculate_statistics
class TestCalculateStatistics: class TestCalculateStatistics:
......
...@@ -2,16 +2,20 @@ import numpy as np ...@@ -2,16 +2,20 @@ import numpy as np
import pandas as pd import pandas as pd
import pytest import pytest
from toarstats.constants import ALLOWED_SAMPLING_VALUES, RSTAGS, SEASON_DICT from toarstats.metrics.constants import (
from toarstats.defaults import DEFAULT_DATA_CAPTURE ALLOWED_SAMPLING_VALUES,
from toarstats.interface import calculate_statistics RSTAGS,
from toarstats.stats_utils import create_reference_data_frame SEASON_DICT
)
from toarstats.metrics.defaults import DEFAULT_DATA_CAPTURE
from toarstats.metrics.interface import calculate_statistics
from toarstats.metrics.stats_utils import create_reference_series
data = pd.read_csv( data = pd.read_csv(
"tests/time_series.csv", header=None, names=[None, "values"], "tests/time_series.csv", header=None, names=[None, "values"],
index_col=0, parse_dates=True, infer_datetime_format=True index_col=0, parse_dates=True, infer_datetime_format=True
) )
ref_data = create_reference_data_frame(data.index) ref_data = create_reference_series(data.index)
metadata = {"station_lat": 50.906389, metadata = {"station_lat": 50.906389,
"station_lon": 6.403889, "station_lon": 6.403889,
"station_climatic_zone": "cool temperate moist"} "station_climatic_zone": "cool temperate moist"}
...@@ -52,12 +56,12 @@ def test_data_capture(): ...@@ -52,12 +56,12 @@ def test_data_capture():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
count = cur_data.resample(offset).count().squeeze("columns") count = cur_data.resample(offset).count().squeeze("columns")
expected[name] = count.divide( expected[name] = count.divide(
cur_ref.resample(offset).count().squeeze("columns") cur_ref.resample(offset).count()
).reindex(count.index) ).reindex(count.index)
else: else:
count = data.resample(offset).count().squeeze("columns") count = data.resample(offset).count().squeeze("columns")
expected["data_capture"] = count.divide( expected["data_capture"] = count.divide(
ref_data.resample(offset).count().squeeze("columns") ref_data.resample(offset).count()
).reindex(count.index) ).reindex(count.index)
pd.testing.assert_frame_equal(result, expected) pd.testing.assert_frame_equal(result, expected)
...@@ -132,7 +136,7 @@ def test_median(): ...@@ -132,7 +136,7 @@ def test_median():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.median().reindex(frac.index) tmp_res = data_rs.median().reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -140,7 +144,7 @@ def test_median(): ...@@ -140,7 +144,7 @@ def test_median():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.median().reindex(frac.index) tmp_res = data_rs.median().reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -164,7 +168,7 @@ def test_maximum(): ...@@ -164,7 +168,7 @@ def test_maximum():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.max().reindex(frac.index) tmp_res = data_rs.max().reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -172,7 +176,7 @@ def test_maximum(): ...@@ -172,7 +176,7 @@ def test_maximum():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.max().reindex(frac.index) tmp_res = data_rs.max().reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -196,7 +200,7 @@ def test_minimum(): ...@@ -196,7 +200,7 @@ def test_minimum():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.min().reindex(frac.index) tmp_res = data_rs.min().reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -204,7 +208,7 @@ def test_minimum(): ...@@ -204,7 +208,7 @@ def test_minimum():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.min().reindex(frac.index) tmp_res = data_rs.min().reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -228,7 +232,7 @@ def test_perc05(): ...@@ -228,7 +232,7 @@ def test_perc05():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.05).reindex(frac.index) tmp_res = data_rs.quantile(0.05).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -236,7 +240,7 @@ def test_perc05(): ...@@ -236,7 +240,7 @@ def test_perc05():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.05).reindex(frac.index) tmp_res = data_rs.quantile(0.05).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -260,7 +264,7 @@ def test_perc10(): ...@@ -260,7 +264,7 @@ def test_perc10():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.1).reindex(frac.index) tmp_res = data_rs.quantile(0.1).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -268,7 +272,7 @@ def test_perc10(): ...@@ -268,7 +272,7 @@ def test_perc10():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.1).reindex(frac.index) tmp_res = data_rs.quantile(0.1).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -292,7 +296,7 @@ def test_perc25(): ...@@ -292,7 +296,7 @@ def test_perc25():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.25).reindex(frac.index) tmp_res = data_rs.quantile(0.25).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -300,7 +304,7 @@ def test_perc25(): ...@@ -300,7 +304,7 @@ def test_perc25():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.25).reindex(frac.index) tmp_res = data_rs.quantile(0.25).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -324,7 +328,7 @@ def test_perc75(): ...@@ -324,7 +328,7 @@ def test_perc75():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.75).reindex(frac.index) tmp_res = data_rs.quantile(0.75).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -332,7 +336,7 @@ def test_perc75(): ...@@ -332,7 +336,7 @@ def test_perc75():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.75).reindex(frac.index) tmp_res = data_rs.quantile(0.75).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -356,7 +360,7 @@ def test_perc90(): ...@@ -356,7 +360,7 @@ def test_perc90():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.9).reindex(frac.index) tmp_res = data_rs.quantile(0.9).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -364,7 +368,7 @@ def test_perc90(): ...@@ -364,7 +368,7 @@ def test_perc90():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.9).reindex(frac.index) tmp_res = data_rs.quantile(0.9).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -388,7 +392,7 @@ def test_perc95(): ...@@ -388,7 +392,7 @@ def test_perc95():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.95).reindex(frac.index) tmp_res = data_rs.quantile(0.95).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -396,7 +400,7 @@ def test_perc95(): ...@@ -396,7 +400,7 @@ def test_perc95():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.95).reindex(frac.index) tmp_res = data_rs.quantile(0.95).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -420,7 +424,7 @@ def test_perc98(): ...@@ -420,7 +424,7 @@ def test_perc98():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.98).reindex(frac.index) tmp_res = data_rs.quantile(0.98).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -428,7 +432,7 @@ def test_perc98(): ...@@ -428,7 +432,7 @@ def test_perc98():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.98).reindex(frac.index) tmp_res = data_rs.quantile(0.98).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -452,7 +456,7 @@ def test_perc99(): ...@@ -452,7 +456,7 @@ def test_perc99():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.99).reindex(frac.index) tmp_res = data_rs.quantile(0.99).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -460,7 +464,7 @@ def test_perc99(): ...@@ -460,7 +464,7 @@ def test_perc99():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile(0.99).reindex(frac.index) tmp_res = data_rs.quantile(0.99).reindex(frac.index)
tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan tmp_res[frac < DEFAULT_DATA_CAPTURE] = np.nan
...@@ -484,7 +488,7 @@ def test_percentiles1(): ...@@ -484,7 +488,7 @@ def test_percentiles1():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile( tmp_res = data_rs.quantile(
int(name.split("-", 1)[0][1:]) / 100 int(name.split("-", 1)[0][1:]) / 100
...@@ -494,7 +498,7 @@ def test_percentiles1(): ...@@ -494,7 +498,7 @@ def test_percentiles1():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
for name in result.columns.to_list(): for name in result.columns.to_list():
tmp_res = data_rs.quantile(int(name[1:]) / 100).reindex( tmp_res = data_rs.quantile(int(name[1:]) / 100).reindex(
...@@ -521,7 +525,7 @@ def test_percentiles2(): ...@@ -521,7 +525,7 @@ def test_percentiles2():
cur_ref.index += pd.Timedelta(182, "days") cur_ref.index += pd.Timedelta(182, "days")
data_rs = cur_data.resample(offset) data_rs = cur_data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = cur_ref.resample(offset).count().squeeze("columns") ref_count = cur_ref.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
tmp_res = data_rs.quantile( tmp_res = data_rs.quantile(
int(name.split("-", 1)[0][1:]) / 100 int(name.split("-", 1)[0][1:]) / 100
...@@ -531,7 +535,7 @@ def test_percentiles2(): ...@@ -531,7 +535,7 @@ def test_percentiles2():
else: else:
data_rs = data.resample(offset) data_rs = data.resample(offset)
data_count = data_rs.count().squeeze("columns") data_count = data_rs.count().squeeze("columns")
ref_count = ref_data.resample(offset).count().squeeze("columns") ref_count = ref_data.resample(offset).count()
frac = data_count / ref_count frac = data_count / ref_count
for name in result.columns.to_list(): for name in result.columns.to_list():
tmp_res = data_rs.quantile(int(name[1:]) / 100).reindex( tmp_res = data_rs.quantile(int(name[1:]) / 100).reindex(
......
...@@ -23,7 +23,8 @@ import numpy as np ...@@ -23,7 +23,8 @@ import numpy as np
import pandas as pd import pandas as pd
import pytest import pytest
from toarstats.interface import calculate_statistics from tests.create_sample_data_and_reference_results import create_sample_data
from toarstats.metrics.interface import calculate_statistics
def get_all_statistics(): def get_all_statistics():
...@@ -33,11 +34,11 @@ def get_all_statistics(): ...@@ -33,11 +34,11 @@ def get_all_statistics():
""" """
statistics = set() statistics = set()
for file in Path(Path(__file__).resolve().parents[1], for file in Path(Path(__file__).resolve().parents[1],
"toarstats").glob("*.py"): "toarstats/metrics").glob("*.py"):
for node in ast.parse(file.read_text(), file).body: for node in ast.parse(file.read_text(), file).body:
if (isinstance(node, ast.FunctionDef) if (isinstance(node, ast.FunctionDef)
and [el.arg for el in node.args.args] and [el.arg for el in node.args.args]
== ["df", "dfref", "mtype", "metadata", "seasons", == ["ser", "ref", "mtype", "metadata", "seasons",
"data_capture"]): "data_capture"]):
statistics.add(node.name) statistics.add(node.name)
return statistics return statistics
...@@ -50,7 +51,7 @@ def get_all_samplings(): ...@@ -50,7 +51,7 @@ def get_all_samplings():
""" """
samplings = set() samplings = set()
for file in Path(Path(__file__).resolve().parents[1], for file in Path(Path(__file__).resolve().parents[1],
"toarstats").glob("*.py"): "toarstats/metrics").glob("*.py"):
for node in ast.parse(file.read_text(), file).body: for node in ast.parse(file.read_text(), file).body:
if (isinstance(node, ast.Assign) if (isinstance(node, ast.Assign)
and isinstance(node.value, ast.Dict) and isinstance(node.value, ast.Dict)
...@@ -68,8 +69,13 @@ def sample_data(): ...@@ -68,8 +69,13 @@ def sample_data():
:return: A data frame with sample data :return: A data frame with sample data
""" """
sample_data_file = Path(
Path(__file__).resolve().parent, "sample_data/sample_data.csv"
)
if not sample_data_file.is_file():
create_sample_data(sample_data_file.parent)
return pd.read_csv( return pd.read_csv(
Path(Path(__file__).resolve().parent, "sample_data/sample_data.csv"), sample_data_file,
header=None, index_col=0, parse_dates=True header=None, index_col=0, parse_dates=True
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment