Skip to content
Snippets Groups Projects

Niklas issue005 refac refactor and expand testing

Merged Ghost User requested to merge niklas_issue005_refac_refactor-and-expand-testing into develop
5 files
+ 31
242
Compare changes
  • Side-by-side
  • Inline
Files
5
#!/usr/bin/env python3
"""Create sample data and reference results for the toarstats package.
Without any arguments only sample data will be created.
To run this script use:
create_sample_data_and_reference_results.py [-h] [-t TOARSTATS]
[-p PYTHON]
optional arguments:
-h, --help
show help message and exit
-t TOARSTATS, --toarstats TOARSTATS
paths to the different toarstats versions
-p PYTHON, --python PYTHON
paths to the python interpreters which should be used for the
different toarstats versions
"""
from argparse import ArgumentParser
import json
from pathlib import Path
import subprocess
import numpy as np
import pandas as pd
GET_STATISTICS_AND_SAMPLINGS = """
import ast
import sys
source = sys.argv[1]
filename = sys.argv[2]
statistics = set()
samplings = set()
for node in ast.parse(source, filename).body:
if (isinstance(node, ast.FunctionDef)
and [el.arg for el in node.args.args] == ["df", "dfref", "mtype",
"varname", "varunits",
"metadata", "seasons",
"data_capture"]):
statistics.add(node.name)
elif (isinstance(node, ast.Assign) and isinstance(node.value, ast.Dict)
and node.targets[0].id == "RSTAGS"):
samplings.update([sampling.s for sampling in node.value.keys])
if "seasonal" in samplings and "vegseason" not in samplings:
samplings.add("vegseason")
print([list(statistics), list(samplings)])
"""
CALCULATE_STATISTICS = """
from collections import namedtuple
from configparser import ConfigParser
import json
import os.path
import sys
import pandas as pd
from toarstats_version.stats_main import stats_wrapper
class DataSlice:
def __init__(self, index, values):
self.x = index
self.y = values
self.yattr = {}
data_path = sys.argv[1]
metadata_path = sys.argv[2]
results_dir = sys.argv[3]
statistics = json.loads(sys.argv[4].replace("'", '"'))
samplings = json.loads(sys.argv[5].replace("'", '"'))
failed_combinations = []
for statistic in statistics:
for sampling in samplings:
data = pd.read_csv(data_path, header=None, index_col=0, squeeze=True,
parse_dates=True)
parser = ConfigParser()
parser.read(metadata_path)
Metadata = namedtuple("Metadata", ["station_lat", "station_lon",
"station_climatic_zone"])
metadata = Metadata(parser.getfloat("METADATA", "station_lat"),
parser.getfloat("METADATA", "station_lon"),
parser.getint("METADATA", "station_climatic_zone"))
try:
results = stats_wrapper(sampling, [statistic],
DataSlice(data.index, data.values),
metadata)
except ValueError:
failed_combinations.append([statistic, sampling])
continue
pd.DataFrame(
{i: pd.Series(res.y, res.x) for i, res in enumerate(results)}
).to_csv(os.path.join(results_dir, statistic+"-"+sampling+".csv"),
header=False)
print(failed_combinations)
"""
def get_command_line_arguments():
"""Parse command line arguments.
:return: A zip of the paths to ``toarstats`` versions and their
respective python interpreters
"""
parser = ArgumentParser(description="Create sample data and reference"
" results for the toarstats package.")
parser.add_argument("-t", "--toarstats", action="append", default=[],
help="paths to the different toarstats versions")
parser.add_argument("-p", "--python", action="append", default=[],
help="paths to the python interpreters which should be"
" used for the different toarstats versions")
args = parser.parse_args()
return zip([Path(el).resolve() for el in args.toarstats], args.python)
def create_sample_data(sample_data_dir):
"""Create sample data.
:param sample_data_dir: path to the sample data directory
"""
sample_data_dir.mkdir(exist_ok=True)
datetime_index = pd.date_range(start="2011-04-17 09:00", periods=100000,
freq="H")
values = np.random.default_rng().uniform(13.4, 61.7, len(datetime_index))
values[np.random.default_rng().choice(values.size,
size=int(0.085*values.size),
replace=False)] = np.nan
pd.Series(values, datetime_index).dropna().to_csv(
Path(sample_data_dir, "sample_data.csv"), header=False
)
Path(sample_data_dir, "sample_metadata.cfg").write_text(
"[METADATA]\n"
"station_lat: 50.906389\n"
"station_lon: 6.403889\n"
"station_climatic_zone: 3\n",
encoding="utf-8"
)
def get_statistics_and_samplings(toarstats_version, python_interpreter):
"""Collect statistics and samplings from older package version.
:param toarstats_version: path to the old package version
:param python_interpreter: path to the interpreter to use
:return: A list of all statistics and a list of all samplings
"""
all_statistics = set()
all_samplings = set()
for file in toarstats_version.glob("*.py"):
try:
content = file.read_text()
except UnicodeDecodeError:
try:
content = file.read_text(encoding="cp1252")
except UnicodeError:
print(f"WARNING: ignoring {file}; unknown encoding")
continue
try:
statistics, samplings = json.loads(subprocess.run(
[python_interpreter, "-c", GET_STATISTICS_AND_SAMPLINGS,
content, file], capture_output=True, check=True, text=True
).stdout.replace("'", '"'))
except subprocess.CalledProcessError:
statistics, samplings = json.loads(subprocess.run(
[python_interpreter, "-c",
GET_STATISTICS_AND_SAMPLINGS.replace("el.arg", "el.id"),
content, file], capture_output=True, check=True, text=True
).stdout.replace("'", '"'))
all_statistics.update(statistics)
all_samplings.update(samplings)
return list(all_statistics), list(all_samplings)
def create_reference_results(reference_versions, sample_data_dir):
"""Create reference results.
:param reference_versions: zip of old package versions and
interpreters to use
:param sample_data_dir: path to the sample data directory
"""
for toarstats_version, python_interpreter in reference_versions:
statistics, samplings = get_statistics_and_samplings(
toarstats_version, python_interpreter
)
results_dir = Path(sample_data_dir.parent, "reference_results",
toarstats_version.name)
results_dir.mkdir(parents=True, exist_ok=True)
cur_script = CALCULATE_STATISTICS.replace("toarstats_version",
toarstats_version.name)
try:
failed_combinations = json.loads(subprocess.run(
[python_interpreter, "-c", cur_script,
Path(sample_data_dir, "sample_data.csv"),
Path(sample_data_dir, "sample_metadata.cfg"), results_dir,
str(statistics), str(samplings)], capture_output=True,
cwd=toarstats_version.parent, check=True, text=True
).stdout.replace("'", '"'))
except subprocess.CalledProcessError:
failed_combinations = json.loads(subprocess.run(
[python_interpreter, "-c",
cur_script.replace("configparser", "ConfigParser"),
Path(sample_data_dir, "sample_data.csv"),
Path(sample_data_dir, "sample_metadata.cfg"), results_dir,
str(statistics), str(samplings)], capture_output=True,
cwd=toarstats_version.parent, check=True, text=True
).stdout.replace("u'", "'").replace("'", '"'))
if failed_combinations:
print(toarstats_version.name)
for combination in failed_combinations:
print(*combination)
def main():
"""Main function for the script."""
reference_versions = get_command_line_arguments()
sample_data_dir = Path(Path(__file__).resolve().parent, "sample_data")
create_sample_data(sample_data_dir)
create_reference_results(reference_versions, sample_data_dir)
if __name__ == "__main__":
main()
Loading