Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
toarstats
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
esde
toar-public
toarstats
Merge requests
!7
Niklas issue005 refac refactor and expand testing
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Niklas issue005 refac refactor and expand testing
niklas_issue005_refac_refactor-and-expand-testing
into
develop
Overview
0
Commits
8
Pipelines
0
Changes
5
Merged
Ghost User
requested to merge
niklas_issue005_refac_refactor-and-expand-testing
into
develop
1 year ago
Overview
0
Commits
8
Pipelines
0
Changes
5
Expand
Closes
#5 (closed)
0
0
Merge request reports
Viewing commit
186d1d80
Prev
Next
Show latest version
5 files
+
31
−
242
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
5
Search (e.g. *.vue) (Ctrl+P)
186d1d80
Modified the tests for the 'metrics' subpackage to work under pandas>=2.0.
· 186d1d80
Niklas Selke
authored
1 year ago
tests/test_metrics/create_sample_data_and_reference_results.py deleted
100644 → 0
+
0
−
233
Options
#!/usr/bin/env python3
"""
Create sample data and reference results for the toarstats package.
Without any arguments only sample data will be created.
To run this script use:
create_sample_data_and_reference_results.py [-h] [-t TOARSTATS]
[-p PYTHON]
optional arguments:
-h, --help
show help message and exit
-t TOARSTATS, --toarstats TOARSTATS
paths to the different toarstats versions
-p PYTHON, --python PYTHON
paths to the python interpreters which should be used for the
different toarstats versions
"""
from
argparse
import
ArgumentParser
import
json
from
pathlib
import
Path
import
subprocess
import
numpy
as
np
import
pandas
as
pd
GET_STATISTICS_AND_SAMPLINGS
=
"""
import ast
import sys
source = sys.argv[1]
filename = sys.argv[2]
statistics = set()
samplings = set()
for node in ast.parse(source, filename).body:
if (isinstance(node, ast.FunctionDef)
and [el.arg for el in node.args.args] == [
"
df
"
,
"
dfref
"
,
"
mtype
"
,
"
varname
"
,
"
varunits
"
,
"
metadata
"
,
"
seasons
"
,
"
data_capture
"
]):
statistics.add(node.name)
elif (isinstance(node, ast.Assign) and isinstance(node.value, ast.Dict)
and node.targets[0].id ==
"
RSTAGS
"
):
samplings.update([sampling.s for sampling in node.value.keys])
if
"
seasonal
"
in samplings and
"
vegseason
"
not in samplings:
samplings.add(
"
vegseason
"
)
print([list(statistics), list(samplings)])
"""
CALCULATE_STATISTICS
=
"""
from collections import namedtuple
from configparser import ConfigParser
import json
import os.path
import sys
import pandas as pd
from toarstats_version.stats_main import stats_wrapper
class DataSlice:
def __init__(self, index, values):
self.x = index
self.y = values
self.yattr = {}
data_path = sys.argv[1]
metadata_path = sys.argv[2]
results_dir = sys.argv[3]
statistics = json.loads(sys.argv[4].replace(
"'"
,
'"'
))
samplings = json.loads(sys.argv[5].replace(
"'"
,
'"'
))
failed_combinations = []
for statistic in statistics:
for sampling in samplings:
data = pd.read_csv(data_path, header=None, index_col=0, squeeze=True,
parse_dates=True)
parser = ConfigParser()
parser.read(metadata_path)
Metadata = namedtuple(
"
Metadata
"
, [
"
station_lat
"
,
"
station_lon
"
,
"
station_climatic_zone
"
])
metadata = Metadata(parser.getfloat(
"
METADATA
"
,
"
station_lat
"
),
parser.getfloat(
"
METADATA
"
,
"
station_lon
"
),
parser.getint(
"
METADATA
"
,
"
station_climatic_zone
"
))
try:
results = stats_wrapper(sampling, [statistic],
DataSlice(data.index, data.values),
metadata)
except ValueError:
failed_combinations.append([statistic, sampling])
continue
pd.DataFrame(
{i: pd.Series(res.y, res.x) for i, res in enumerate(results)}
).to_csv(os.path.join(results_dir, statistic+
"
-
"
+sampling+
"
.csv
"
),
header=False)
print(failed_combinations)
"""
def
get_command_line_arguments
():
"""
Parse command line arguments.
:return: A zip of the paths to ``toarstats`` versions and their
respective python interpreters
"""
parser
=
ArgumentParser
(
description
=
"
Create sample data and reference
"
"
results for the toarstats package.
"
)
parser
.
add_argument
(
"
-t
"
,
"
--toarstats
"
,
action
=
"
append
"
,
default
=
[],
help
=
"
paths to the different toarstats versions
"
)
parser
.
add_argument
(
"
-p
"
,
"
--python
"
,
action
=
"
append
"
,
default
=
[],
help
=
"
paths to the python interpreters which should be
"
"
used for the different toarstats versions
"
)
args
=
parser
.
parse_args
()
return
zip
([
Path
(
el
).
resolve
()
for
el
in
args
.
toarstats
],
args
.
python
)
def
create_sample_data
(
sample_data_dir
):
"""
Create sample data.
:param sample_data_dir: path to the sample data directory
"""
sample_data_dir
.
mkdir
(
exist_ok
=
True
)
datetime_index
=
pd
.
date_range
(
start
=
"
2011-04-17 09:00
"
,
periods
=
100000
,
freq
=
"
H
"
)
values
=
np
.
random
.
default_rng
().
uniform
(
13.4
,
61.7
,
len
(
datetime_index
))
values
[
np
.
random
.
default_rng
().
choice
(
values
.
size
,
size
=
int
(
0.085
*
values
.
size
),
replace
=
False
)]
=
np
.
nan
pd
.
Series
(
values
,
datetime_index
).
dropna
().
to_csv
(
Path
(
sample_data_dir
,
"
sample_data.csv
"
),
header
=
False
)
Path
(
sample_data_dir
,
"
sample_metadata.cfg
"
).
write_text
(
"
[METADATA]
\n
"
"
station_lat: 50.906389
\n
"
"
station_lon: 6.403889
\n
"
"
station_climatic_zone: 3
\n
"
,
encoding
=
"
utf-8
"
)
def
get_statistics_and_samplings
(
toarstats_version
,
python_interpreter
):
"""
Collect statistics and samplings from older package version.
:param toarstats_version: path to the old package version
:param python_interpreter: path to the interpreter to use
:return: A list of all statistics and a list of all samplings
"""
all_statistics
=
set
()
all_samplings
=
set
()
for
file
in
toarstats_version
.
glob
(
"
*.py
"
):
try
:
content
=
file
.
read_text
()
except
UnicodeDecodeError
:
try
:
content
=
file
.
read_text
(
encoding
=
"
cp1252
"
)
except
UnicodeError
:
print
(
f
"
WARNING: ignoring
{
file
}
; unknown encoding
"
)
continue
try
:
statistics
,
samplings
=
json
.
loads
(
subprocess
.
run
(
[
python_interpreter
,
"
-c
"
,
GET_STATISTICS_AND_SAMPLINGS
,
content
,
file
],
capture_output
=
True
,
check
=
True
,
text
=
True
).
stdout
.
replace
(
"'"
,
'"'
))
except
subprocess
.
CalledProcessError
:
statistics
,
samplings
=
json
.
loads
(
subprocess
.
run
(
[
python_interpreter
,
"
-c
"
,
GET_STATISTICS_AND_SAMPLINGS
.
replace
(
"
el.arg
"
,
"
el.id
"
),
content
,
file
],
capture_output
=
True
,
check
=
True
,
text
=
True
).
stdout
.
replace
(
"'"
,
'"'
))
all_statistics
.
update
(
statistics
)
all_samplings
.
update
(
samplings
)
return
list
(
all_statistics
),
list
(
all_samplings
)
def
create_reference_results
(
reference_versions
,
sample_data_dir
):
"""
Create reference results.
:param reference_versions: zip of old package versions and
interpreters to use
:param sample_data_dir: path to the sample data directory
"""
for
toarstats_version
,
python_interpreter
in
reference_versions
:
statistics
,
samplings
=
get_statistics_and_samplings
(
toarstats_version
,
python_interpreter
)
results_dir
=
Path
(
sample_data_dir
.
parent
,
"
reference_results
"
,
toarstats_version
.
name
)
results_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
cur_script
=
CALCULATE_STATISTICS
.
replace
(
"
toarstats_version
"
,
toarstats_version
.
name
)
try
:
failed_combinations
=
json
.
loads
(
subprocess
.
run
(
[
python_interpreter
,
"
-c
"
,
cur_script
,
Path
(
sample_data_dir
,
"
sample_data.csv
"
),
Path
(
sample_data_dir
,
"
sample_metadata.cfg
"
),
results_dir
,
str
(
statistics
),
str
(
samplings
)],
capture_output
=
True
,
cwd
=
toarstats_version
.
parent
,
check
=
True
,
text
=
True
).
stdout
.
replace
(
"'"
,
'"'
))
except
subprocess
.
CalledProcessError
:
failed_combinations
=
json
.
loads
(
subprocess
.
run
(
[
python_interpreter
,
"
-c
"
,
cur_script
.
replace
(
"
configparser
"
,
"
ConfigParser
"
),
Path
(
sample_data_dir
,
"
sample_data.csv
"
),
Path
(
sample_data_dir
,
"
sample_metadata.cfg
"
),
results_dir
,
str
(
statistics
),
str
(
samplings
)],
capture_output
=
True
,
cwd
=
toarstats_version
.
parent
,
check
=
True
,
text
=
True
).
stdout
.
replace
(
"
u
'"
,
"'"
).
replace
(
"'"
,
'"'
))
if
failed_combinations
:
print
(
toarstats_version
.
name
)
for
combination
in
failed_combinations
:
print
(
*
combination
)
def
main
():
"""
Main function for the script.
"""
reference_versions
=
get_command_line_arguments
()
sample_data_dir
=
Path
(
Path
(
__file__
).
resolve
().
parent
,
"
sample_data
"
)
create_sample_data
(
sample_data_dir
)
create_reference_results
(
reference_versions
,
sample_data_dir
)
if
__name__
==
"
__main__
"
:
main
()
Loading