Compare revisions

leufen1 · leufen1 · leufen1 · leufen1 · leufen1 · leufen1
--- a/HPC_setup/requirements_HDFML_additionals.txt
+++ b/HPC_setup/requirements_HDFML_additionals.txt
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2

--- a/HPC_setup/requirements_JUWELS_additionals.txt
+++ b/HPC_setup/requirements_JUWELS_additionals.txt
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2

--- a/docs/_source/_plots/periodogram.png
+++ b/docs/_source/_plots/periodogram.png
--- a/mlair/data_handler/data_handler_mixed_sampling.py
+++ b/mlair/data_handler/data_handler_mixed_sampling.py
@@ -56,7 +56,7 @@ class DataHandlerMixedSamplingSingleStation(DataHandlerSingleStation):
        kwargs.update({parameter_name: parameter})

    def make_input_target(self):
-        self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
+        self._data = tuple(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
        self.set_inputs_and_targets()

    def load_and_interpolate(self, ind) -> [xr.DataArray, pd.DataFrame]:
@@ -110,7 +110,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
        A KZ filter is applied on the input data that has hourly resolution. Lables Y are provided as aggregated values
        with daily resolution.
        """
-        self._data = list(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
+        self._data = tuple(map(self.load_and_interpolate, [0, 1]))  # load input (0) and target (1) data
        self.set_inputs_and_targets()
        self.apply_kz_filter()

@@ -158,7 +158,7 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
    def _extract_lazy(self, lazy_data):
        _data, self.meta, _input_data, _target_data, self.cutoff_period, self.cutoff_period_days = lazy_data
        start_inp, end_inp = self.update_start_end(0)
-        self._data = list(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
+        self._data = tuple(map(lambda x: self._slice_prep(_data[x], *self.update_start_end(x)), [0, 1]))
        self.input_data = self._slice_prep(_input_data, start_inp, end_inp)
        self.target_data = self._slice_prep(_target_data, self.start, self.end)


--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -299,6 +299,7 @@ class DefaultDataHandler(AbstractDataHandler):
            for p in output:
                dh, s = p.get()
                _inner()
+            pool.close()
        else:  # serial solution
            logging.info("use serial transformation approach")
            for station in set_stations:

--- a/mlair/plotting/abstract_plot_class.py
+++ b/mlair/plotting/abstract_plot_class.py
+"""Abstract plot class that should be used for preprocessing and postprocessing plots."""
+__author__ = "Lukas Leufen"
+__date__ = '2021-04-13'
+
+import logging
+import os
+
+from matplotlib import pyplot as plt
+
+
+class AbstractPlotClass:
+    """
+    Abstract class for all plotting routines to unify plot workflow.
+
+    Each inheritance requires a _plot method. Create a plot class like:
+
+    .. code-block:: python
+
+        class MyCustomPlot(AbstractPlotClass):
+
+            def __init__(self, plot_folder, *args, **kwargs):
+                super().__init__(plot_folder, "custom_plot_name")
+                self._data = self._prepare_data(*args, **kwargs)
+                self._plot(*args, **kwargs)
+                self._save()
+
+            def _prepare_data(*args, **kwargs):
+                <your custom data preparation>
+                return data
+
+            def _plot(*args, **kwargs):
+                <your custom plotting without saving>
+
+    The save method is already implemented in the AbstractPlotClass. If special saving is required (e.g. if you are
+    using pdfpages), you need to overwrite it. Plots are saved as .pdf with a resolution of 500dpi per default (can be
+    set in super class initialisation).
+
+    Methods like the shown _prepare_data() are optional. The only method required to implement is _plot.
+
+    If you want to add a time tracking module, just add the TimeTrackingWrapper as decorator around your custom plot
+    class. It will log the spent time if you call your plotting without saving the returned object.
+
+    .. code-block:: python
+
+        @TimeTrackingWrapper
+        class MyCustomPlot(AbstractPlotClass):
+            pass
+
+    Let's assume it takes a while to create this very special plot.
+
+    >>> MyCustomPlot()
+    INFO: MyCustomPlot finished after 00:00:11 (hh:mm:ss)
+
+    """
+
+    def __init__(self, plot_folder, plot_name, resolution=500, rc_params=None):
+        """Set up plot folder and name, and plot resolution (default 500dpi)."""
+        plot_folder = os.path.abspath(plot_folder)
+        if not os.path.exists(plot_folder):
+            os.makedirs(plot_folder)
+        self.plot_folder = plot_folder
+        self.plot_name = plot_name
+        self.resolution = resolution
+        if rc_params is None:
+            rc_params = {'axes.labelsize': 'large',
+                         'xtick.labelsize': 'large',
+                         'ytick.labelsize': 'large',
+                         'legend.fontsize': 'large',
+                         'axes.titlesize': 'large',
+                         }
+        self.rc_params = rc_params
+        self._update_rc_params()
+
+    def _plot(self, *args):
+        """Abstract plot class needs to be implemented in inheritance."""
+        raise NotImplementedError
+
+    def _save(self, **kwargs):
+        """Store plot locally. Name of and path to plot need to be set on initialisation."""
+        plot_name = os.path.join(self.plot_folder, f"{self.plot_name}.pdf")
+        logging.debug(f"... save plot to {plot_name}")
+        plt.savefig(plot_name, dpi=self.resolution, **kwargs)
+        plt.close('all')
+
+    def _update_rc_params(self):
+        plt.rcParams.update(self.rc_params)
+
+    @staticmethod
+    def _get_sampling(sampling):
+        if sampling == "daily":
+            return "D"
+        elif sampling == "hourly":
+            return "h"
+
+    @staticmethod
+    def get_dataset_colors():
+        """
+        Standard colors used for train-, val-, and test-sets during postprocessing
+        """
+        colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9", "train_val": "#000000"}  # hex code
+        return colors
--- a/mlair/plotting/data_insight_plotting.py
+++ b/mlair/plotting/data_insight_plotting.py
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -19,9 +19,10 @@ from mlair.helpers.datastore import NameNotFoundInDataStore
 from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables
 from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel
 from mlair.model_modules import AbstractModelClass
-from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \
-    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotAvailabilityHistogram, \
-    PlotConditionalQuantiles, PlotSeparationOfScales
+from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \
+    PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales
+from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \
+    PlotPeriodogram
 from mlair.run_modules.run_environment import RunEnvironment


@@ -296,6 +297,7 @@ class PostProcessing(RunEnvironment):
        """
        logging.info("Run plotting routines...")
        path = self.data_store.get("forecast_path")
+        use_multiprocessing = self.data_store.get("use_multiprocessing")

        plot_list = self.data_store.get("plot_list", "postprocessing")
        time_dim = self.data_store.get("time_dim")
@@ -325,23 +327,6 @@ class PostProcessing(RunEnvironment):
        except Exception as e:
            logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error: {e}")

-        try:
-            if "PlotStationMap" in plot_list:
-                if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
-                        "hostname")[:6] in self.data_store.get("hpc_hosts"):
-                    logging.warning(
-                        f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}")
-                else:
-                    gens = [(self.train_data, {"marker": 5, "ms": 9}),
-                            (self.val_data, {"marker": 6, "ms": 9}),
-                            (self.test_data, {"marker": 4, "ms": 9})]
-                    PlotStationMap(generators=gens, plot_folder=self.plot_path)
-                    gens = [(self.train_val_data, {"marker": 8, "ms": 9}),
-                            (self.test_data, {"marker": 9, "ms": 9})]
-                    PlotStationMap(generators=gens, plot_folder=self.plot_path, plot_name="station_map_var")
-        except Exception as e:
-            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
-
        try:
            if "PlotMonthlySummary" in plot_list:
                PlotMonthlySummary(self.test_data.keys(), path, r"forecasts_%s_test.nc", self.target_var,
@@ -372,6 +357,23 @@ class PostProcessing(RunEnvironment):
        except Exception as e:
            logging.error(f"Could not create plot PlotTimeSeries due to the following error: {e}")

+        try:
+            if "PlotStationMap" in plot_list:
+                if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get(
+                        "hostname")[:6] in self.data_store.get("hpc_hosts"):
+                    logging.warning(
+                        f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}")
+                else:
+                    gens = [(self.train_data, {"marker": 5, "ms": 9}),
+                            (self.val_data, {"marker": 6, "ms": 9}),
+                            (self.test_data, {"marker": 4, "ms": 9})]
+                    PlotStationMap(generators=gens, plot_folder=self.plot_path)
+                    gens = [(self.train_val_data, {"marker": 8, "ms": 9}),
+                            (self.test_data, {"marker": 9, "ms": 9})]
+                    PlotStationMap(generators=gens, plot_folder=self.plot_path, plot_name="station_map_var")
+        except Exception as e:
+            logging.error(f"Could not create plot PlotStationMap due to the following error: {e}")
+
        try:
            if "PlotAvailability" in plot_list:
                avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data}
@@ -388,6 +390,14 @@ class PostProcessing(RunEnvironment):
        except Exception as e:
            logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}")

+        try:
+            if "PlotPeriodogram" in plot_list:
+                PlotPeriodogram(self.train_data, plot_folder=self.plot_path, time_dim=time_dim,
+                                variables_dim=target_dim, sampling=self._sampling,
+                                use_multiprocessing=use_multiprocessing)
+        except Exception as e:
+            logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}")
+
    def calculate_test_score(self):
        """Evaluate test score of model and save locally."""


--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -10,7 +10,6 @@ import multiprocessing
 import requests
 import psutil

-import numpy as np
 import pandas as pd

 from mlair.data_handler import DataCollection, AbstractDataHandler
@@ -257,6 +256,7 @@ class PreProcessing(RunEnvironment):
                if dh is not None:
                    collection.add(dh)
                    valid_stations.append(s)
+            pool.close()
        else:  # serial solution
            logging.info("use serial validate station approach")
            for station in set_stations:

--- a/requirements.txt
+++ b/requirements.txt
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2

--- a/requirements_gpu.txt
+++ b/requirements_gpu.txt
 absl-py==0.11.0
 appdirs==1.4.4
 astor==0.8.1
+astropy==4.1
 attrs==20.3.0
 bottleneck==1.3.2
 cached-property==1.5.2
No results found