Ghost User
--- a/tests/test_metrics/create_sample_data_and_reference_results.py deleted 100644 → 0

+ 0

− 233
+++ b/tests/test_metrics/create_sample_data_and_reference_results.py deleted 100644 → 0

+ 0

− 233
-#!/usr/bin/env python3
-
-"""Create sample data and reference results for the toarstats package.
-
-Without any arguments only sample data will be created.
-
-To run this script use:
-create_sample_data_and_reference_results.py [-h] [-t TOARSTATS]
-[-p PYTHON]
-
-optional arguments:
-  -h, --help
-    show help message and exit
-  -t TOARSTATS, --toarstats TOARSTATS
-    paths to the different toarstats versions
-  -p PYTHON, --python PYTHON
-    paths to the python interpreters which should be used for the
-    different toarstats versions
-"""
-
-from argparse import ArgumentParser
-import json
-from pathlib import Path
-import subprocess
-
-import numpy as np
-import pandas as pd
-
-
-GET_STATISTICS_AND_SAMPLINGS = """
-import ast
-import sys
-
-
-source = sys.argv[1]
-filename = sys.argv[2]
-
-statistics = set()
-samplings = set()
-for node in ast.parse(source, filename).body:
-    if (isinstance(node, ast.FunctionDef)
-            and [el.arg for el in node.args.args] == ["df", "dfref", "mtype",
-                                                      "varname", "varunits",
-                                                      "metadata", "seasons",
-                                                      "data_capture"]):
-        statistics.add(node.name)
-    elif (isinstance(node, ast.Assign) and isinstance(node.value, ast.Dict)
-            and node.targets[0].id == "RSTAGS"):
-        samplings.update([sampling.s for sampling in node.value.keys])
-if "seasonal" in samplings and "vegseason" not in samplings:
-    samplings.add("vegseason")
-print([list(statistics), list(samplings)])
-"""
-
-
-CALCULATE_STATISTICS = """
-from collections import namedtuple
-from configparser import ConfigParser
-import json
-import os.path
-import sys
-
-import pandas as pd
-
-from toarstats_version.stats_main import stats_wrapper
-
-
-class DataSlice:
-    def __init__(self, index, values):
-        self.x = index
-        self.y = values
-        self.yattr = {}
-
-
-data_path = sys.argv[1]
-metadata_path = sys.argv[2]
-results_dir = sys.argv[3]
-statistics = json.loads(sys.argv[4].replace("'", '"'))
-samplings = json.loads(sys.argv[5].replace("'", '"'))
-
-failed_combinations = []
-for statistic in statistics:
-    for sampling in samplings:
-        data = pd.read_csv(data_path, header=None, index_col=0, squeeze=True,
-                           parse_dates=True)
-        parser = ConfigParser()
-        parser.read(metadata_path)
-        Metadata = namedtuple("Metadata", ["station_lat", "station_lon",
-                                           "station_climatic_zone"])
-        metadata = Metadata(parser.getfloat("METADATA", "station_lat"),
-                            parser.getfloat("METADATA", "station_lon"),
-                            parser.getint("METADATA", "station_climatic_zone"))
-
-        try:
-            results = stats_wrapper(sampling, [statistic],
-                                    DataSlice(data.index, data.values),
-                                    metadata)
-        except ValueError:
-            failed_combinations.append([statistic, sampling])
-            continue
-        pd.DataFrame(
-            {i: pd.Series(res.y, res.x) for i, res in enumerate(results)}
-        ).to_csv(os.path.join(results_dir, statistic+"-"+sampling+".csv"),
-                 header=False)
-print(failed_combinations)
-"""
-
-
-def get_command_line_arguments():
-    """Parse command line arguments.
-
-    :return: A zip of the paths to ``toarstats`` versions and their
-             respective python interpreters
-    """
-    parser = ArgumentParser(description="Create sample data and reference"
-                                        " results for the toarstats package.")
-    parser.add_argument("-t", "--toarstats", action="append", default=[],
-                        help="paths to the different toarstats versions")
-    parser.add_argument("-p", "--python", action="append", default=[],
-                        help="paths to the python interpreters which should be"
-                             " used for the different toarstats versions")
-    args = parser.parse_args()
-    return zip([Path(el).resolve() for el in args.toarstats], args.python)
-
-
-def create_sample_data(sample_data_dir):
-    """Create sample data.
-
-    :param sample_data_dir: path to the sample data directory
-    """
-    sample_data_dir.mkdir(exist_ok=True)
-    datetime_index = pd.date_range(start="2011-04-17 09:00", periods=100000,
-                                   freq="H")
-    values = np.random.default_rng().uniform(13.4, 61.7, len(datetime_index))
-    values[np.random.default_rng().choice(values.size,
-                                          size=int(0.085*values.size),
-                                          replace=False)] = np.nan
-    pd.Series(values, datetime_index).dropna().to_csv(
-        Path(sample_data_dir, "sample_data.csv"), header=False
-    )
-    Path(sample_data_dir, "sample_metadata.cfg").write_text(
-        "[METADATA]\n"
-        "station_lat: 50.906389\n"
-        "station_lon: 6.403889\n"
-        "station_climatic_zone: 3\n",
-        encoding="utf-8"
-    )
-
-
-def get_statistics_and_samplings(toarstats_version, python_interpreter):
-    """Collect statistics and samplings from older package version.
-
-    :param toarstats_version: path to the old package version
-    :param python_interpreter: path to the interpreter to use
-
-    :return: A list of all statistics and a list of all samplings
-    """
-    all_statistics = set()
-    all_samplings = set()
-    for file in toarstats_version.glob("*.py"):
-        try:
-            content = file.read_text()
-        except UnicodeDecodeError:
-            try:
-                content = file.read_text(encoding="cp1252")
-            except UnicodeError:
-                print(f"WARNING: ignoring {file}; unknown encoding")
-                continue
-        try:
-            statistics, samplings = json.loads(subprocess.run(
-                [python_interpreter, "-c", GET_STATISTICS_AND_SAMPLINGS,
-                 content, file], capture_output=True, check=True, text=True
-            ).stdout.replace("'", '"'))
-        except subprocess.CalledProcessError:
-            statistics, samplings = json.loads(subprocess.run(
-                [python_interpreter, "-c",
-                 GET_STATISTICS_AND_SAMPLINGS.replace("el.arg", "el.id"),
-                 content, file], capture_output=True, check=True, text=True
-            ).stdout.replace("'", '"'))
-        all_statistics.update(statistics)
-        all_samplings.update(samplings)
-    return list(all_statistics), list(all_samplings)
-
-
-def create_reference_results(reference_versions, sample_data_dir):
-    """Create reference results.
-
-    :param reference_versions: zip of old package versions and
-                               interpreters to use
-    :param sample_data_dir: path to the sample data directory
-    """
-    for toarstats_version, python_interpreter in reference_versions:
-        statistics, samplings = get_statistics_and_samplings(
-            toarstats_version, python_interpreter
-        )
-        results_dir = Path(sample_data_dir.parent, "reference_results",
-                           toarstats_version.name)
-        results_dir.mkdir(parents=True, exist_ok=True)
-        cur_script = CALCULATE_STATISTICS.replace("toarstats_version",
-                                                  toarstats_version.name)
-        try:
-            failed_combinations = json.loads(subprocess.run(
-                [python_interpreter, "-c", cur_script,
-                 Path(sample_data_dir, "sample_data.csv"),
-                 Path(sample_data_dir, "sample_metadata.cfg"), results_dir,
-                 str(statistics), str(samplings)], capture_output=True,
-                cwd=toarstats_version.parent, check=True, text=True
-            ).stdout.replace("'", '"'))
-        except subprocess.CalledProcessError:
-            failed_combinations = json.loads(subprocess.run(
-                [python_interpreter, "-c",
-                 cur_script.replace("configparser", "ConfigParser"),
-                 Path(sample_data_dir, "sample_data.csv"),
-                 Path(sample_data_dir, "sample_metadata.cfg"), results_dir,
-                 str(statistics), str(samplings)], capture_output=True,
-                cwd=toarstats_version.parent, check=True, text=True
-            ).stdout.replace("u'", "'").replace("'", '"'))
-        if failed_combinations:
-            print(toarstats_version.name)
-            for combination in failed_combinations:
-                print(*combination)
-
-
-def main():
-    """Main function for the script."""
-    reference_versions = get_command_line_arguments()
-    sample_data_dir = Path(Path(__file__).resolve().parent, "sample_data")
-    create_sample_data(sample_data_dir)
-    create_reference_results(reference_versions, sample_data_dir)
-
-
-if __name__ == "__main__":
-    main()