diff --git a/requirements.txt b/requirements.txt index 723a98d138b6a0742f9ff9683f18fe8ae0de7e4d..4ebb321a37d598d71a5360f7439bd939e4962a1f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -numpy==1.24.2 -pandas==1.5.3 -statsmodels==0.13.5 +numpy==1.24.3 +pandas==2.0.1 +statsmodels==0.14.0 diff --git a/tests/test_metrics/create_sample_data_and_reference_results.py b/tests/test_metrics/create_sample_data_and_reference_results.py deleted file mode 100644 index e81ea32d776b3d98ef56a6a182b375c2b6b3f5a6..0000000000000000000000000000000000000000 --- a/tests/test_metrics/create_sample_data_and_reference_results.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python3 - -"""Create sample data and reference results for the toarstats package. - -Without any arguments only sample data will be created. - -To run this script use: -create_sample_data_and_reference_results.py [-h] [-t TOARSTATS] -[-p PYTHON] - -optional arguments: - -h, --help - show help message and exit - -t TOARSTATS, --toarstats TOARSTATS - paths to the different toarstats versions - -p PYTHON, --python PYTHON - paths to the python interpreters which should be used for the - different toarstats versions -""" - -from argparse import ArgumentParser -import json -from pathlib import Path -import subprocess - -import numpy as np -import pandas as pd - - -GET_STATISTICS_AND_SAMPLINGS = """ -import ast -import sys - - -source = sys.argv[1] -filename = sys.argv[2] - -statistics = set() -samplings = set() -for node in ast.parse(source, filename).body: - if (isinstance(node, ast.FunctionDef) - and [el.arg for el in node.args.args] == ["df", "dfref", "mtype", - "varname", "varunits", - "metadata", "seasons", - "data_capture"]): - statistics.add(node.name) - elif (isinstance(node, ast.Assign) and isinstance(node.value, ast.Dict) - and node.targets[0].id == "RSTAGS"): - samplings.update([sampling.s for sampling in node.value.keys]) -if "seasonal" in samplings and "vegseason" not in samplings: - samplings.add("vegseason") -print([list(statistics), list(samplings)]) -""" - - -CALCULATE_STATISTICS = """ -from collections import namedtuple -from configparser import ConfigParser -import json -import os.path -import sys - -import pandas as pd - -from toarstats_version.stats_main import stats_wrapper - - -class DataSlice: - def __init__(self, index, values): - self.x = index - self.y = values - self.yattr = {} - - -data_path = sys.argv[1] -metadata_path = sys.argv[2] -results_dir = sys.argv[3] -statistics = json.loads(sys.argv[4].replace("'", '"')) -samplings = json.loads(sys.argv[5].replace("'", '"')) - -failed_combinations = [] -for statistic in statistics: - for sampling in samplings: - data = pd.read_csv(data_path, header=None, index_col=0, squeeze=True, - parse_dates=True) - parser = ConfigParser() - parser.read(metadata_path) - Metadata = namedtuple("Metadata", ["station_lat", "station_lon", - "station_climatic_zone"]) - metadata = Metadata(parser.getfloat("METADATA", "station_lat"), - parser.getfloat("METADATA", "station_lon"), - parser.getint("METADATA", "station_climatic_zone")) - - try: - results = stats_wrapper(sampling, [statistic], - DataSlice(data.index, data.values), - metadata) - except ValueError: - failed_combinations.append([statistic, sampling]) - continue - pd.DataFrame( - {i: pd.Series(res.y, res.x) for i, res in enumerate(results)} - ).to_csv(os.path.join(results_dir, statistic+"-"+sampling+".csv"), - header=False) -print(failed_combinations) -""" - - -def get_command_line_arguments(): - """Parse command line arguments. - - :return: A zip of the paths to ``toarstats`` versions and their - respective python interpreters - """ - parser = ArgumentParser(description="Create sample data and reference" - " results for the toarstats package.") - parser.add_argument("-t", "--toarstats", action="append", default=[], - help="paths to the different toarstats versions") - parser.add_argument("-p", "--python", action="append", default=[], - help="paths to the python interpreters which should be" - " used for the different toarstats versions") - args = parser.parse_args() - return zip([Path(el).resolve() for el in args.toarstats], args.python) - - -def create_sample_data(sample_data_dir): - """Create sample data. - - :param sample_data_dir: path to the sample data directory - """ - sample_data_dir.mkdir(exist_ok=True) - datetime_index = pd.date_range(start="2011-04-17 09:00", periods=100000, - freq="H") - values = np.random.default_rng().uniform(13.4, 61.7, len(datetime_index)) - values[np.random.default_rng().choice(values.size, - size=int(0.085*values.size), - replace=False)] = np.nan - pd.Series(values, datetime_index).dropna().to_csv( - Path(sample_data_dir, "sample_data.csv"), header=False - ) - Path(sample_data_dir, "sample_metadata.cfg").write_text( - "[METADATA]\n" - "station_lat: 50.906389\n" - "station_lon: 6.403889\n" - "station_climatic_zone: 3\n", - encoding="utf-8" - ) - - -def get_statistics_and_samplings(toarstats_version, python_interpreter): - """Collect statistics and samplings from older package version. - - :param toarstats_version: path to the old package version - :param python_interpreter: path to the interpreter to use - - :return: A list of all statistics and a list of all samplings - """ - all_statistics = set() - all_samplings = set() - for file in toarstats_version.glob("*.py"): - try: - content = file.read_text() - except UnicodeDecodeError: - try: - content = file.read_text(encoding="cp1252") - except UnicodeError: - print(f"WARNING: ignoring {file}; unknown encoding") - continue - try: - statistics, samplings = json.loads(subprocess.run( - [python_interpreter, "-c", GET_STATISTICS_AND_SAMPLINGS, - content, file], capture_output=True, check=True, text=True - ).stdout.replace("'", '"')) - except subprocess.CalledProcessError: - statistics, samplings = json.loads(subprocess.run( - [python_interpreter, "-c", - GET_STATISTICS_AND_SAMPLINGS.replace("el.arg", "el.id"), - content, file], capture_output=True, check=True, text=True - ).stdout.replace("'", '"')) - all_statistics.update(statistics) - all_samplings.update(samplings) - return list(all_statistics), list(all_samplings) - - -def create_reference_results(reference_versions, sample_data_dir): - """Create reference results. - - :param reference_versions: zip of old package versions and - interpreters to use - :param sample_data_dir: path to the sample data directory - """ - for toarstats_version, python_interpreter in reference_versions: - statistics, samplings = get_statistics_and_samplings( - toarstats_version, python_interpreter - ) - results_dir = Path(sample_data_dir.parent, "reference_results", - toarstats_version.name) - results_dir.mkdir(parents=True, exist_ok=True) - cur_script = CALCULATE_STATISTICS.replace("toarstats_version", - toarstats_version.name) - try: - failed_combinations = json.loads(subprocess.run( - [python_interpreter, "-c", cur_script, - Path(sample_data_dir, "sample_data.csv"), - Path(sample_data_dir, "sample_metadata.cfg"), results_dir, - str(statistics), str(samplings)], capture_output=True, - cwd=toarstats_version.parent, check=True, text=True - ).stdout.replace("'", '"')) - except subprocess.CalledProcessError: - failed_combinations = json.loads(subprocess.run( - [python_interpreter, "-c", - cur_script.replace("configparser", "ConfigParser"), - Path(sample_data_dir, "sample_data.csv"), - Path(sample_data_dir, "sample_metadata.cfg"), results_dir, - str(statistics), str(samplings)], capture_output=True, - cwd=toarstats_version.parent, check=True, text=True - ).stdout.replace("u'", "'").replace("'", '"')) - if failed_combinations: - print(toarstats_version.name) - for combination in failed_combinations: - print(*combination) - - -def main(): - """Main function for the script.""" - reference_versions = get_command_line_arguments() - sample_data_dir = Path(Path(__file__).resolve().parent, "sample_data") - create_sample_data(sample_data_dir) - create_reference_results(reference_versions, sample_data_dir) - - -if __name__ == "__main__": - main() diff --git a/tests/test_metrics/test_input_checks.py b/tests/test_metrics/test_input_checks.py index d1d23501f6d10c306e54e48e3a39f5647b96c51f..ab7da5a9e95cbe2a61c2fbe307f8a33cd39d3bb4 100644 --- a/tests/test_metrics/test_input_checks.py +++ b/tests/test_metrics/test_input_checks.py @@ -190,7 +190,7 @@ class TestFromPandas: np.testing.assert_array_equal([5., 2.], out_value3) pd.testing.assert_index_equal(out_index4, index) np.testing.assert_array_equal([], out_value4) - pd.testing.assert_index_equal(out_index5, pd.Index([])) + pd.testing.assert_index_equal(out_index5, pd.RangeIndex(0)) np.testing.assert_array_equal([], out_value5) diff --git a/tests/test_metrics/test_metrics.py b/tests/test_metrics/test_metrics.py index d5d07d41415c9a9c0732f8538c77639497e52cf5..d7b68939d0b0b28d4aa6687306dfb9d8e6408612 100644 --- a/tests/test_metrics/test_metrics.py +++ b/tests/test_metrics/test_metrics.py @@ -1,9 +1,10 @@ -"""Tests for the package as a whole. +"""Tests for the metrics subpackage as a whole. This module contains tests to check if everything from older package versions is implemented and if the results are still the same. This module contains the following functions: +create_sample_data - create sample data get_all_statistics - get all implemented statistics get_all_samplings - get all implemented samplings sample_data - get sample data frame @@ -23,13 +24,34 @@ import numpy as np import pandas as pd import pytest -from tests.test_metrics.create_sample_data_and_reference_results import ( - create_sample_data -) from toarstats.metrics.interface import calculate_statistics from toarstats.metrics.stats_utils import STATS_LIST +def create_sample_data(sample_data_dir): + """Create sample data. + + :param sample_data_dir: path to the sample data directory + """ + sample_data_dir.mkdir(exist_ok=True) + datetime_index = pd.date_range(start="2011-04-17 09:00", periods=100000, + freq="H") + values = np.random.default_rng().uniform(13.4, 61.7, len(datetime_index)) + values[np.random.default_rng().choice(values.size, + size=int(0.085*values.size), + replace=False)] = np.nan + pd.Series(values, datetime_index).dropna().to_csv( + Path(sample_data_dir, "sample_data.csv"), header=False + ) + Path(sample_data_dir, "sample_metadata.cfg").write_text( + "[METADATA]\n" + "station_lat: 50.906389\n" + "station_lon: 6.403889\n" + "station_climatic_zone: 3\n", + encoding="utf-8" + ) + + def get_all_statistics(): """Get all implemented statistics. diff --git a/tests/test_metrics/test_stats.py b/tests/test_metrics/test_stats.py index e1e64f54a069cee17d1138d458a766b681ce8776..349cf26993852c8dd4302e75ad2171cf4c4a977d 100644 --- a/tests/test_metrics/test_stats.py +++ b/tests/test_metrics/test_stats.py @@ -13,7 +13,7 @@ from toarstats.metrics.stats_utils import create_reference_series data = pd.read_csv( "tests/test_metrics/time_series.csv", header=None, names=[None, "values"], - index_col=0, parse_dates=True, infer_datetime_format=True + index_col=0, parse_dates=True, date_format="%Y-%m-%d %H:%M:%S" ) ref_data = create_reference_series(data.index) metadata = {"station_lat": 50.906389,