diff --git a/mlair/plotting/preprocessing_plotting.py b/mlair/plotting/data_insight_plotting.py similarity index 100% rename from mlair/plotting/preprocessing_plotting.py rename to mlair/plotting/data_insight_plotting.py diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index a633dec1d65244aacb531d7dc1f94c8e2931d29c..23d26fc1e5c866657d28b11f275d76df5a8cc300 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -21,6 +21,8 @@ from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel from mlair.model_modules import AbstractModelClass from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotClimatologicalSkillScore, \ PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotConditionalQuantiles, PlotSeparationOfScales +from mlair.plotting.data_insight_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \ + PlotPeriodogram from mlair.run_modules.run_environment import RunEnvironment @@ -295,6 +297,7 @@ class PostProcessing(RunEnvironment): """ logging.info("Run plotting routines...") path = self.data_store.get("forecast_path") + use_multiprocessing = self.data_store.get("use_multiprocessing") plot_list = self.data_store.get("plot_list", "postprocessing") time_dim = self.data_store.get("time_dim") @@ -354,6 +357,47 @@ class PostProcessing(RunEnvironment): except Exception as e: logging.error(f"Could not create plot PlotTimeSeries due to the following error: {e}") + try: + if "PlotStationMap" in plot_list: + if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get( + "hostname")[:6] in self.data_store.get("hpc_hosts"): + logging.warning( + f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}") + else: + gens = [(self.train_data, {"marker": 5, "ms": 9}), + (self.val_data, {"marker": 6, "ms": 9}), + (self.test_data, {"marker": 4, "ms": 9})] + PlotStationMap(generators=gens, plot_folder=self.plot_path) + gens = [(self.train_val_data, {"marker": 8, "ms": 9}), + (self.test_data, {"marker": 9, "ms": 9})] + PlotStationMap(generators=gens, plot_folder=self.plot_path, plot_name="station_map_var") + except Exception as e: + logging.error(f"Could not create plot PlotStationMap due to the following error: {e}") + + try: + if "PlotAvailability" in plot_list: + avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data} + PlotAvailability(avail_data, plot_folder=self.plot_path, time_dimension=time_dim, + window_dimension=window_dim) + except Exception as e: + logging.error(f"Could not create plot PlotAvailability due to the following error: {e}") + + try: + if "PlotAvailabilityHistogram" in plot_list: + avail_data = {"train": self.train_data, "val": self.val_data, "test": self.test_data} + PlotAvailabilityHistogram(avail_data, plot_folder=self.plot_path, station_dim=iter_dim, + history_dim=window_dim) + except Exception as e: + logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}") + + try: + if "PlotPeriodogram" in plot_list: + PlotPeriodogram(self.train_data, plot_folder=self.plot_path, time_dim=time_dim, + variables_dim=target_dim, sampling=self._sampling, + use_multiprocessing=use_multiprocessing) + except Exception as e: + logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}") + def calculate_test_score(self): """Evaluate test score of model and save locally.""" diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 148c34a04ffb823d829b307c6c500c9e1077dea1..4edf8e965c7140be067428b4ee1c596b8a85b312 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -10,7 +10,6 @@ import multiprocessing import requests import psutil -import numpy as np import pandas as pd from mlair.data_handler import DataCollection, AbstractDataHandler @@ -18,8 +17,6 @@ from mlair.helpers import TimeTracking, to_list, tables from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment -from mlair.plotting.preprocessing_plotting import PlotStationMap, PlotAvailability, PlotAvailabilityHistogram, \ - PlotPeriodogram class PreProcessing(RunEnvironment): @@ -69,7 +66,6 @@ class PreProcessing(RunEnvironment): self.split_train_val_test() self.report_pre_processing() self.prepare_competitors() - self.plot() def report_pre_processing(self): """Log some metrics on data and create latex report.""" @@ -330,65 +326,6 @@ class PreProcessing(RunEnvironment): else: logging.info("No preparation required because no competitor was provided to the workflow.") - def plot(self): - logging.info("Run plotting routines...") - - plot_list = self.data_store.get("plot_list", "postprocessing") - time_dim = self.data_store.get("time_dim") - window_dim = self.data_store.get("window_dim") - target_dim = self.data_store.get("target_dim") - iter_dim = self.data_store.get("iter_dim") - - train_data = self.data_store.get("data_collection", "train") - val_data = self.data_store.get("data_collection", "val") - test_data = self.data_store.get("data_collection", "test") - train_val_data = self.data_store.get("data_collection", "train_val") - plot_path: str = self.data_store.get("plot_path") - - sampling = self.data_store.get("sampling") - use_multiprocessing = self.data_store.get("use_multiprocessing") - - try: - if "PlotStationMap" in plot_list: - if self.data_store.get("hostname")[:2] in self.data_store.get("hpc_hosts") or self.data_store.get( - "hostname")[:6] in self.data_store.get("hpc_hosts"): - logging.warning( - f"Skip 'PlotStationMap` because running on a hpc node: {self.data_store.get('hostname')}") - else: - gens = [(train_data, {"marker": 5, "ms": 9}), - (val_data, {"marker": 6, "ms": 9}), - (test_data, {"marker": 4, "ms": 9})] - PlotStationMap(generators=gens, plot_folder=plot_path) - gens = [(train_val_data, {"marker": 8, "ms": 9}), - (test_data, {"marker": 9, "ms": 9})] - PlotStationMap(generators=gens, plot_folder=plot_path, plot_name="station_map_var") - except Exception as e: - logging.error(f"Could not create plot PlotStationMap due to the following error: {e}") - - try: - if "PlotAvailability" in plot_list: - avail_data = {"train": train_data, "val": val_data, "test": test_data} - PlotAvailability(avail_data, plot_folder=plot_path, time_dimension=time_dim, - window_dimension=window_dim) - except Exception as e: - logging.error(f"Could not create plot PlotAvailability due to the following error: {e}") - - try: - if "PlotAvailabilityHistogram" in plot_list: - avail_data = {"train": train_data, "val": val_data, "test": test_data} - PlotAvailabilityHistogram(avail_data, plot_folder=plot_path, station_dim=iter_dim, - history_dim=window_dim) - except Exception as e: - logging.error(f"Could not create plot PlotAvailabilityHistogram due to the following error: {e}") - - try: - if "PlotPeriodogram" in plot_list: - PlotPeriodogram(train_data, plot_folder=plot_path, time_dim=time_dim, variables_dim=target_dim, - sampling=sampling, use_multiprocessing=use_multiprocessing) - - except Exception as e: - logging.error(f"Could not create plot PlotPeriodogram due to the following error: {e}") - def f_proc(data_handler, station, name_affix, store, **kwargs): """