diff --git a/.gitignore b/.gitignore index f5e425f752a1de0de0c68036a54e0d19450320bb..04bc0d1b9ec314e6398dcc7907d73686f00655f3 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,7 @@ activate_env.sh /tmp/ /logging/ /HPC_logging/ +/tmp_downloads/ # test related data # ##################### diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py index c96026736074bc29497073e5669d557fdc6d647d..52b1121e7b8f165476d3c27d9e24b077a731f8e5 100644 --- a/mlair/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -701,12 +701,23 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): self._model_setup = model_setup self._labels = None self._data = self._prepare_data(data) + default_plot_name = self.plot_name + # draw full detail plot + self.plot_name = default_plot_name + "_full_detail" self._plot() self._save() - # draw also a vertical version - self.plot_name += "_vertical" + # draw also a vertical full detail version + self.plot_name = default_plot_name + "_full_detail_vertical" self._plot_vertical() self._save() + # draw default plot with only model comparison + self.plot_name = default_plot_name + self._plot(single_model_comparison=True) + self._save() + # draw also a vertical full detail version + self.plot_name = default_plot_name + "_vertical" + self._plot_vertical(single_model_comparison=True) + self._save() def _prepare_data(self, data: pd.DataFrame) -> pd.DataFrame: """ @@ -724,12 +735,13 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): self._labels = [str(i) + "d" for i in data.index.levels[1].values] return data.stack(level=0).reset_index(level=2, drop=True).reset_index(name="data") - def _plot(self): + def _plot(self, single_model_comparison=False): """Plot skill scores of the comparisons.""" size = max([len(np.unique(self._data.comparison)), 6]) fig, ax = plt.subplots(figsize=(size, size * 0.8)) - order = self._create_pseudo_order() - sns.boxplot(x="comparison", y="data", hue="ahead", data=self._data, whis=1., ax=ax, palette="Blues_d", + data = self._filter_comparisons(self._data) if single_model_comparison is True else self._data + order = self._create_pseudo_order(data) + sns.boxplot(x="comparison", y="data", hue="ahead", data=data, whis=1., ax=ax, palette="Blues_d", showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, order=order) ax.axhline(y=0, color="grey", linewidth=.5) @@ -740,11 +752,12 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): ax.legend(handles, self._labels) plt.tight_layout() - def _plot_vertical(self): + def _plot_vertical(self, single_model_comparison=False): """Plot skill scores of the comparisons, but vertically aligned.""" fig, ax = plt.subplots() - order = self._create_pseudo_order() - sns.boxplot(y="comparison", x="data", hue="ahead", data=self._data, whis=1., ax=ax, palette="Blues_d", + data = self._filter_comparisons(self._data) if single_model_comparison is True else self._data + order = self._create_pseudo_order(data) + sns.boxplot(y="comparison", x="data", hue="ahead", data=data, whis=1., ax=ax, palette="Blues_d", showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, order=order) # ax.axhline(x=0, color="grey", linewidth=.5) @@ -754,12 +767,17 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): ax.legend(handles, self._labels) plt.tight_layout() - def _create_pseudo_order(self): + def _create_pseudo_order(self, data): """Provide first predefined elements and append all remaining.""" first_elements = [f"{self._model_setup}-persi", "ols-persi", f"{self._model_setup}-ols"] - uniq, index = np.unique(first_elements + self._data.comparison.unique().tolist(), return_index=True) + first_elements = list(filter(lambda x: x in data.comparison.tolist(), first_elements)) + uniq, index = np.unique(first_elements + data.comparison.unique().tolist(), return_index=True) return uniq[index.argsort()] + def _filter_comparisons(self, data): + filtered_headers = list(filter(lambda x: "nn-" in x, data.comparison.unique())) + return data[data.comparison.isin(filtered_headers)] + def _lim(self) -> Tuple[float, float]: """ Calculate axis limits from data (Can be used to set axis extend). diff --git a/mlair/reference_data_handler/__init__.py b/mlair/reference_models/__init__.py similarity index 100% rename from mlair/reference_data_handler/__init__.py rename to mlair/reference_models/__init__.py diff --git a/mlair/reference_data_handler/abstract_reference_data_handler.py b/mlair/reference_models/abstract_reference_model.py similarity index 93% rename from mlair/reference_data_handler/abstract_reference_data_handler.py rename to mlair/reference_models/abstract_reference_model.py index 19c102946f4958992c82c7cf31d1281baa7418b9..e187e7ef62e3fe84f7ba2149a490f63ac718308f 100644 --- a/mlair/reference_data_handler/abstract_reference_data_handler.py +++ b/mlair/reference_models/abstract_reference_model.py @@ -36,12 +36,13 @@ class AbstractReferenceModel(ABC): return res -class AbstractReferenceb2share(AbstractReferenceModel): +class AbstractReferenceB2share(AbstractReferenceModel): """ Abstract class for reference models located on b2share (eudat or fz-juelich) See also https://github.com/EUDAT-Training/B2SHARE-Training/blob/master/api/01_Retrieve_existing_record.md """ + def __init__(self, b2share_hosturl: str, b2share_bucket: str, b2share_key: str): super().__init__() self.b2share_hosturl = b2share_hosturl @@ -63,3 +64,6 @@ class AbstractReferenceb2share(AbstractReferenceModel): out=f"{tmp_download_path}{self.b2share_key}", bar=self.bar_custom ) + + def make_reference_available_locally(self): + raise NotImplementedError diff --git a/mlair/reference_data_handler/intellio3_v1_reference.py b/mlair/reference_models/reference_model_intellio3_v1.py similarity index 84% rename from mlair/reference_data_handler/intellio3_v1_reference.py rename to mlair/reference_models/reference_model_intellio3_v1.py index aec05a3bc66a7451088824893abae72af00621fa..5f0bae48a32f54a0e58d0e6d6139370af8106d84 100644 --- a/mlair/reference_data_handler/intellio3_v1_reference.py +++ b/mlair/reference_models/reference_model_intellio3_v1.py @@ -11,10 +11,10 @@ import xarray as xr import shutil from mlair.configuration.path_config import check_path_and_create -from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceb2share +from mlair.reference_models.abstract_reference_model import AbstractReferenceB2share -class IntelliO3Reference(AbstractReferenceb2share): +class IntelliO3_ts_v1(AbstractReferenceB2share): """ Reference handler that extracts IntelliO3-ts v1.0 forecasts (Kleinert, 2021). @@ -57,25 +57,24 @@ class IntelliO3Reference(AbstractReferenceb2share): :return: base dir of tmp path and list of forecast files :rtype: tuple(str, list(str)) """ - for root, dirs, file_names in os.walk(self.tmp_extract_path+self.orig_forecast_path): + for base_dir, dirs, file_names in os.walk(self.tmp_extract_path + self.orig_forecast_path): pass - return root, file_names + return base_dir, file_names def read_and_drop(self, sel_coords: dict = None): """ Reads original forecast files, renames coord type and store forecasts as NetCdf4 files :param sel_coords: - :type sel_coords: """ if sel_coords is None: - sel_coords = {'type': 'CNN'} + sel_coords = {'type': ['CNN']} in_path, files = self.file_list() check_path_and_create(self.ref_store_path) for infile in files: - data = xr.open_dataarray(f"{in_path}{infile}") + data = xr.open_dataarray(os.path.join(in_path, infile)) data = data.sel(**sel_coords) - data.coords['type'] = (self.ref_name) - data.to_netcdf(f"{self.ref_store_path}{infile}") + data.coords['type'] = ['nn'] + data.to_netcdf(os.path.join(self.ref_store_path, infile)) def make_reference_available_locally(self, remove_tmp_dir: bool = True): """ @@ -93,6 +92,6 @@ class IntelliO3Reference(AbstractReferenceb2share): if __name__ == '__main__': - io3 = IntelliO3Reference('IntelliO3-ts') + io3 = IntelliO3_ts_v1('IntelliO3-ts') io3.make_reference_available_locally() diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 21aebd62bab490363797c0ef0624daa1d488097b..bf54b0619f94d21524edc95a52c2ad49dab788c5 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd from mlair.data_handler import DataCollection, AbstractDataHandler -from mlair.helpers import TimeTracking +from mlair.helpers import TimeTracking, to_list from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment @@ -65,6 +65,7 @@ class PreProcessing(RunEnvironment): self.data_store.set("stations", valid_stations) self.split_train_val_test() self.report_pre_processing() + self.prepare_competitors() def report_pre_processing(self): """Log some metrics on data and create latex report.""" @@ -325,6 +326,25 @@ class PreProcessing(RunEnvironment): if transformation_dict is not None: self.data_store.set("transformation", transformation_dict) + def prepare_competitors(self): + """ + Prepare competitor models already in the preprocessing stage. This is performed here, because some models might + need to have internet access, which is depending on the operating system not possible during postprocessing. + This method checks currently only, if the Intelli03-ts-v1 model is requested as competitor and downloads the + data if required. + """ + logging.info("Searching for competitors to be prepared for use.") + competitors = to_list(self.data_store.get_default("competitors", default=[])) + if len(competitors) > 0: + for competitor_name in competitors: + if competitor_name.lower() == "IntelliO3-ts-v1".lower(): + logging.info("Prepare IntelliO3-ts-v1 model") + from mlair.reference_models.reference_model_intellio3_v1 import IntelliO3_ts_v1 + path = os.path.join(self.data_store.get("competitor_path"), competitor_name) + IntelliO3_ts_v1("IntelliO3-ts-v1", path).make_reference_available_locally(remove_tmp_dir=False) + else: + logging.info("No preparation required because no competitor was provided to the workflow.") + def f_proc(data_handler, station, name_affix, store, **kwargs): """ diff --git a/requirements.txt b/requirements.txt index 77f300df6a8b97395fd5c6771310f7b7b9537a59..47995404d7c8c202071197bb2aab9dba81e18689 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,6 +24,7 @@ Markdown==3.2.1 matplotlib==3.2.0 mock==4.0.1 more-itertools==8.2.0 +netcdf4==1.5.5.1 numpy==1.18.1 packaging==20.3 pandas==1.0.1 diff --git a/run.py b/run.py index 9f5d0f083081d097e24b86f5dfc2b3b380e28e9b..f2bb336e8a886a3c0c4d60736c77b5ebc27cad67 100644 --- a/run.py +++ b/run.py @@ -5,6 +5,7 @@ import argparse from mlair.workflows import DefaultWorkflow from mlair.helpers import remove_items from mlair.configuration.defaults import DEFAULT_PLOT_LIST +import os def load_stations(): @@ -20,9 +21,14 @@ def load_stations(): def main(parser_args): plots = remove_items(DEFAULT_PLOT_LIST, "PlotConditionalQuantiles") - workflow = DefaultWorkflow(stations=load_stations(), - train_model=False, create_new_model=False, network="UBA", - evaluate_bootstraps=False, plot_list=["PlotStationMap"], **parser_args.__dict__) + workflow = DefaultWorkflow( # stations=load_stations(), + # stations=["DEBW087","DEBW013", "DEBW107", "DEBW076"], + stations=["DEBW013", "DEBW087", "DEBW107", "DEBW076"], + train_model=False, create_new_model=True, network="UBA", + evaluate_bootstraps=False, # plot_list=["PlotCompetitiveSkillScore"], + competitors=["test_model", "test_model2"], + competitor_path=os.path.join(os.getcwd(), "data", "comp_test"), + **parser_args.__dict__) workflow.run() diff --git a/run_mixed_sampling.py b/run_mixed_sampling.py index 04683a17ede641a5370aaeef741d2f4546f966b7..6ffb659953157060c39afb5960821e729df555dd 100644 --- a/run_mixed_sampling.py +++ b/run_mixed_sampling.py @@ -25,9 +25,9 @@ def main(parser_args): interpolation_limit=(3, 1), overwrite_local_data=False, sampling=("hourly", "daily"), statistics_per_var=stats, - create_new_model=False, train_model=False, epochs=1, - window_history_size=48, - window_history_offset=17, + create_new_model=True, train_model=False, epochs=1, + window_history_size=6 * 24 + 16, + window_history_offset=16, kz_filter_length=[100 * 24, 15 * 24], kz_filter_iter=[4, 5], start="2006-01-01", diff --git a/test/test_reference_data_handler/test_abstract_reference_data_handler.py b/test/test_reference_models/test_abstract_reference_model.py similarity index 80% rename from test/test_reference_data_handler/test_abstract_reference_data_handler.py rename to test/test_reference_models/test_abstract_reference_model.py index 98267fc913df299647c0fe8a1d95074e5d22bf04..4c455219be88a97e4f7f13fec663bd95bc8cf7d8 100644 --- a/test/test_reference_data_handler/test_abstract_reference_data_handler.py +++ b/test/test_reference_models/test_abstract_reference_model.py @@ -1,8 +1,8 @@ import pytest import mock -from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceModel -from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceb2share +from mlair.reference_models.abstract_reference_model import AbstractReferenceModel +from mlair.reference_models.abstract_reference_model import AbstractReferenceB2share class TestAbstractReferenceDataHandler: @@ -32,13 +32,13 @@ class TestAbstractReferenceb2share: b2share_hosturl = "b2share.url" b2share_bucket = "1111-2222-3333" b2share_key = "b2share_key.tar.gz" - return AbstractReferenceb2share(b2share_hosturl, b2share_bucket, b2share_key) + return AbstractReferenceB2share(b2share_hosturl, b2share_bucket, b2share_key) def test_inheritance(self): - assert issubclass(AbstractReferenceb2share, AbstractReferenceModel) + assert issubclass(AbstractReferenceB2share, AbstractReferenceModel) def test_init(self, ar): - assert isinstance(ar, AbstractReferenceb2share) + assert isinstance(ar, AbstractReferenceB2share) assert ar.b2share_hosturl == "b2share.url" assert ar.b2share_bucket == "1111-2222-3333" assert ar.b2share_key == "b2share_key.tar.gz" @@ -51,4 +51,4 @@ class TestAbstractReferenceb2share: pass def test_download_from_b2share(self): - pass \ No newline at end of file + pass diff --git a/test/test_reference_data_handler/test_intellio3_v1_reference.py b/test/test_reference_models/test_reference_model_intellio3_v1.py similarity index 61% rename from test/test_reference_data_handler/test_intellio3_v1_reference.py rename to test/test_reference_models/test_reference_model_intellio3_v1.py index 6582b99ec197f282a561bcdecd4d331cfc964327..bfd4cae3f04121ab0dd6a25e5d88ff88d73b54d3 100644 --- a/test/test_reference_data_handler/test_intellio3_v1_reference.py +++ b/test/test_reference_models/test_reference_model_intellio3_v1.py @@ -1,19 +1,18 @@ import pytest - -from mlair.reference_data_handler.intellio3_v1_reference import IntelliO3Reference -from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceb2share +from mlair.reference_models.reference_model_intellio3_v1 import IntelliO3_ts_v1 +from mlair.reference_models.abstract_reference_model import AbstractReferenceB2share class TestIntelliO3Reference: @pytest.fixture def io3(self): - return IntelliO3Reference("IntelliO3-test") + return IntelliO3_ts_v1("IntelliO3-test") def test_init_none_path(self): - io3 = IntelliO3Reference("IntelliO3-test") - assert isinstance(io3, IntelliO3Reference) + io3 = IntelliO3_ts_v1("IntelliO3-test") + assert isinstance(io3, IntelliO3_ts_v1) assert io3.ref_name == "IntelliO3-test" assert io3.ref_store_path == "IntelliO3-test/" assert io3.tmp_extract_path == "tmp_downloads/" @@ -24,12 +23,12 @@ class TestIntelliO3Reference: assert io3.b2share_hosturl == "https://b2share.eudat.eu" def test_init_extra_path(self): - io3 = IntelliO3Reference("IntelliO3-test", "DummyExtraPath/") - assert isinstance(io3, IntelliO3Reference) + io3 = IntelliO3_ts_v1("IntelliO3-test", "DummyExtraPath/") + assert isinstance(io3, IntelliO3_ts_v1) assert io3.ref_store_path == "DummyExtraPath/" def test_inheritance(self): - assert issubclass(IntelliO3Reference, AbstractReferenceb2share) + assert issubclass(IntelliO3_ts_v1, AbstractReferenceB2share) def test_untar_forecasts(self, io3): pass @@ -41,5 +40,4 @@ class TestIntelliO3Reference: pass def test_make_reference_available_locally(self): - io3 = IntelliO3Reference("IntelliO3-test", "DummyExtraPath/") - + pass diff --git a/test/test_reference_data_handler/tmp_downloads/IntelliO3-ts.tar.gz b/test/test_reference_models/tmp_downloads/IntelliO3-ts.tar.gz similarity index 100% rename from test/test_reference_data_handler/tmp_downloads/IntelliO3-ts.tar.gz rename to test/test_reference_models/tmp_downloads/IntelliO3-ts.tar.gz diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index 11c46e99fb38489f5cbb26a8a87032049c96c7ca..b5a1914e6b2aacd238f244d304184d9754326db7 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -46,8 +46,12 @@ class TestPreProcessing: with PreProcessing(): assert caplog.record_tuples[0] == ('root', 20, 'PreProcessing started') assert caplog.record_tuples[1] == ('root', 20, 'check valid stations started (preprocessing)') - assert caplog.record_tuples[-1] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 5 ' + assert caplog.record_tuples[-3] == ('root', 20, PyTestRegex(r'run for \d+:\d+:\d+ \(hh:mm:ss\) to check 5 ' r'station\(s\). Found 5/5 valid stations.')) + assert caplog.record_tuples[-2] == ('root', 20, "Searching for competitors to be prepared for use.") + assert caplog.record_tuples[-1] == ( + 'root', 20, "No preparation required because no competitor was provided " + "to the workflow.") RunEnvironment().__del__() def test_run(self, obj_with_exp_setup):