diff --git a/src/plotting/postprocessing_plotting.py b/src/plotting/postprocessing_plotting.py index 8efd54bb23035eed4cf51e94235bf8de7ff2a481..3902bacc2d6f62c3dcdf821f13d93c7048a54b1a 100644 --- a/src/plotting/postprocessing_plotting.py +++ b/src/plotting/postprocessing_plotting.py @@ -70,8 +70,9 @@ class AbstractPlotClass: INFO: MyCustomPlot finished after 00:00:11 (hh:mm:ss) """ + def __init__(self, plot_folder, plot_name, resolution=500): - """Set up plot folder and name, and plot resolution (default 500dpi)""" + """Set up plot folder and name, and plot resolution (default 500dpi).""" self.plot_folder = plot_folder self.plot_name = plot_name self.resolution = resolution @@ -81,7 +82,7 @@ class AbstractPlotClass: raise NotImplementedError def _save(self, **kwargs): - """Standard save method to store plot locally. Name of and path to plot need to be set on initialisation.""" + """Store plot locally. Name of and path to plot need to be set on initialisation.""" plot_name = os.path.join(os.path.abspath(self.plot_folder), f"{self.plot_name}.pdf") logging.debug(f"... save plot to {plot_name}") plt.savefig(plot_name, dpi=self.resolution, **kwargs) @@ -91,27 +92,26 @@ class AbstractPlotClass: @TimeTrackingWrapper class PlotMonthlySummary(AbstractPlotClass): """ - Show a monthly summary over all stations for each lead time ("ahead") as box and whiskers plot. The plot is saved - in data_path with name monthly_summary_box_plot.pdf and 500dpi resolution. + Show a monthly summary over all stations for each lead time ("ahead") as box and whiskers plot. + + The plot is saved in data_path with name monthly_summary_box_plot.pdf and 500dpi resolution. .. image:: ../../../../../_source/_plots/monthly_summary_box_plot.png :width: 400 + :param stations: all stations to plot + :param data_path: path, where the data is located + :param name: full name of the local files with a % as placeholder for the station name + :param target_var: display name of the target variable on plot's axis + :param window_lead_time: lead time to plot, if window_lead_time is higher than the available lead time or not given + the maximum lead time from data is used. (default None -> use maximum lead time from data). + :param plot_folder: path to save the plot (default: current directory) + """ def __init__(self, stations: List, data_path: str, name: str, target_var: str, window_lead_time: int = None, plot_folder: str = "."): - """ - Sets attributes and create plot - - :param stations: all stations to plot - :param data_path: path, where the data is located - :param name: full name of the local files with a % as placeholder for the station name - :param target_var: display name of the target variable on plot's axis - :param window_lead_time: lead time to plot, if window_lead_time is higher than the available lead time or not given - the maximum lead time from data is used. (default None -> use maximum lead time from data). - :param plot_folder: path to save the plot (default: current directory) - """ + """Set attributes and create plot.""" super().__init__(plot_folder, "monthly_summary_box_plot") self._data_path = data_path self._data_name = name @@ -122,8 +122,10 @@ class PlotMonthlySummary(AbstractPlotClass): def _prepare_data(self, stations: List) -> xr.DataArray: """ - Pre-process data required to plot. For each station, load locally saved predictions, extract the CNN prediction - and the observation and group them into monthly bins (no aggregation, only sorting them). + Pre.process data required to plot. + + For each station, load locally saved predictions, extract the CNN prediction and the observation and group them + into monthly bins (no aggregation, only sorting them). :param stations: all stations to plot :return: The entire data set, flagged with the corresponding month. @@ -152,9 +154,11 @@ class PlotMonthlySummary(AbstractPlotClass): def _get_window_lead_time(self, window_lead_time: int): """ - Extract the lead time from data and arguments. If window_lead_time is not given, extract this information from - data itself by the number of ahead dimensions. If given, check if data supports the give length. If the number - of ahead dimensions in data is lower than the given lead time, data's lead time is used. + Extract the lead time from data and arguments. + + If window_lead_time is not given, extract this information from data itself by the number of ahead dimensions. + If given, check if data supports the give length. If the number of ahead dimensions in data is lower than the + given lead time, data's lead time is used. :param window_lead_time: lead time from arguments to validate :return: validated lead time, comes either from given argument or from data itself @@ -166,8 +170,7 @@ class PlotMonthlySummary(AbstractPlotClass): def _plot(self, target_var: str): """ - Main plot function that creates a monthly grouped box plot over all stations but with separate boxes for each - lead time step. + Create a monthly grouped box plot over all stations but with separate boxes for each lead time step. :param target_var: display name of the target variable on plot's axis """ @@ -185,10 +188,11 @@ class PlotMonthlySummary(AbstractPlotClass): @TimeTrackingWrapper class PlotStationMap(AbstractPlotClass): """ - Plot geographical overview of all used stations as squares. Different data sets can be colorised by its key in the - input dictionary generators. The key represents the color to plot on the map. Currently, there is only a white - background, but this can be adjusted by loading locally stored topography data (not implemented yet). The plot is - saved under plot_path with the name station_map.pdf + Plot geographical overview of all used stations as squares. + + Different data sets can be colorised by its key in the input dictionary generators. The key represents the color to + plot on the map. Currently, there is only a white background, but this can be adjusted by loading locally stored + topography data (not implemented yet). The plot is saved under plot_path with the name station_map.pdf .. image:: ../../../../../_source/_plots/station_map.png :width: 400 @@ -196,7 +200,7 @@ class PlotStationMap(AbstractPlotClass): def __init__(self, generators: Dict, plot_folder: str = "."): """ - Sets attributes and create plot + Set attributes and create plot. :param generators: dictionary with the plot color of each data set as key and the generator containing all stations as value. @@ -208,9 +212,7 @@ class PlotStationMap(AbstractPlotClass): self._save() def _draw_background(self): - """ - Draw coastline, lakes, ocean, rivers and country borders as background on the map. - """ + """Draw coastline, lakes, ocean, rivers and country borders as background on the map.""" self._ax.add_feature(cfeature.COASTLINE.with_scale("50m"), edgecolor='black') self._ax.add_feature(cfeature.LAKES.with_scale("50m")) self._ax.add_feature(cfeature.OCEAN.with_scale("50m")) @@ -219,8 +221,9 @@ class PlotStationMap(AbstractPlotClass): def _plot_stations(self, generators): """ - The actual plot function. Loops over all keys in generators dict and its containing stations and plots a square - and the stations's position on the map regarding the given color. + Loop over all keys in generators dict and its containing stations and plot the stations's position. + + Position is highlighted by a square on the map regarding the given color. :param generators: dictionary with the plot color of each data set as key and the generator containing all stations as value. @@ -236,7 +239,9 @@ class PlotStationMap(AbstractPlotClass): def _plot(self, generators: Dict): """ - Main plot function to create the station map plot. Sets figure and calls all required sub-methods. + Create the station map plot. + + Set figure and call all required sub-methods. :param generators: dictionary with the plot color of each data set as key and the generator containing all stations as value. @@ -387,11 +392,12 @@ def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_w @TimeTrackingWrapper class PlotClimatologicalSkillScore(AbstractPlotClass): """ - Create plot of climatological skill score after Murphy (1988) as box plot over all stations. A forecast time step - (called "ahead") is separately shown to highlight the differences for each prediction time step. Either each single - term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed (score_only=True, - default). Y-axis is adjusted following the data and not hard coded. The plot is saved under plot_folder path with - name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi. + Create plot of climatological skill score after Murphy (1988) as box plot over all stations. + + A forecast time step (called "ahead") is separately shown to highlight the differences for each prediction time + step. Either each single term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed + (score_only=True, default). Y-axis is adjusted following the data and not hard coded. The plot is saved under + plot_folder path with name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi. .. image:: ../../../../../_source/_plots/skill_score_clim_all_terms_CNN.png :width: 400 @@ -399,19 +405,17 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): .. image:: ../../../../../_source/_plots/skill_score_clim_CNN.png :width: 400 + :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms. + :param plot_folder: path to save the plot (default: current directory) + :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms (default True) + :param extra_name_tag: additional tag that can be included in the plot name (default "") + :param model_setup: architecture type to specify plot name (default "CNN") + """ def __init__(self, data: Dict, plot_folder: str = ".", score_only: bool = True, extra_name_tag: str = "", model_setup: str = ""): - """ - Sets attributes and create plot - - :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms. - :param plot_folder: path to save the plot (default: current directory) - :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms (default True) - :param extra_name_tag: additional tag that can be included in the plot name (default "") - :param model_setup: architecture type to specify plot name (default "CNN") - """ + """Initialise.""" super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_setup}") self._labels = None self._data = self._prepare_data(data, score_only) @@ -420,8 +424,10 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): def _prepare_data(self, data: Dict, score_only: bool) -> pd.DataFrame: """ - Shrink given data, if only scores are relevant. In any case, transform data to a plot friendly format. Also set - plot labels depending on the lead time dimensions. + Shrink given data, if only scores are relevant. + + In any case, transform data to a plot friendly format. Also set plot labels depending on the lead time + dimensions. :param data: dictionary with station names as keys and 2D xarrays as values :param score_only: if true only scores of CASE I to IV are relevant @@ -435,7 +441,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): def _label_add(self, score_only: bool): """ - Adds the phrase "terms and " if score_only is disabled or empty string (if score_only=True). + Add the phrase "terms and " if score_only is disabled or empty string (if score_only=True). :param score_only: if false all terms are relevant, otherwise only CASE I to IV :return: additional label @@ -444,7 +450,7 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): def _plot(self, score_only): """ - Main plot function to plot climatological skill score. + Plot climatological skill score. :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms """ @@ -463,22 +469,24 @@ class PlotClimatologicalSkillScore(AbstractPlotClass): @TimeTrackingWrapper class PlotCompetitiveSkillScore(AbstractPlotClass): """ - Create competitive skill score for the given model setup and the reference models ordinary least squared ("ols") and - the persistence forecast ("persi") for all lead times ("ahead"). The plot is saved under plot_folder with the name + Create competitive skill score plot. + + Create this plot for the given model setup and the reference models ordinary least squared ("ols") and the + persistence forecast ("persi") for all lead times ("ahead"). The plot is saved under plot_folder with the name skill_score_competitive_{model_setup}.pdf and resolution of 500dpi. .. image:: ../../../../../_source/_plots/skill_score_competitive.png :width: 400 + :param data: data frame with index=['cnn-persi', 'ols-persi', 'cnn-ols'] and columns "ahead" containing the pre- + calculated comparisons for cnn, persistence and ols. + :param plot_folder: path to save the plot (default: current directory) + :param model_setup: architecture type (default "CNN") + """ def __init__(self, data: pd.DataFrame, plot_folder=".", model_setup="CNN"): - """ - :param data: data frame with index=['cnn-persi', 'ols-persi', 'cnn-ols'] and columns "ahead" containing the pre- - calculated comparisons for cnn, persistence and ols. - :param plot_folder: path to save the plot (default: current directory) - :param model_setup: architecture type (default "CNN") - """ + """Initialise.""" super().__init__(plot_folder, f"skill_score_competitive_{model_setup}") self._labels = None self._data = self._prepare_data(data) @@ -487,7 +495,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): def _prepare_data(self, data: pd.DataFrame) -> pd.DataFrame: """ - Reformat given data and create plot labels. Introduces the dimensions stations and comparison + Reformat given data and create plot labels and introduce the dimensions stations and comparison. :param data: data frame with index=['cnn-persi', 'ols-persi', 'cnn-ols'] and columns "ahead" containing the pre- calculated comparisons for cnn, persistence and ols. @@ -502,9 +510,7 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): return data.stack(level=0).reset_index(level=2, drop=True).reset_index(name="data") def _plot(self): - """ - Main plot function to plot skill scores of the comparisons cnn-persi, ols-persi and cnn-ols. - """ + """Plot skill scores of the comparisons cnn-persi, ols-persi and cnn-ols.""" fig, ax = plt.subplots() sns.boxplot(x="comparison", y="data", hue="ahead", data=self._data, whis=1., ax=ax, palette="Blues_d", showmeans=True, meanprops={"markersize": 3, "markeredgecolor": "k"}, flierprops={"marker": "."}, @@ -518,8 +524,10 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): def _ylim(self) -> Tuple[float, float]: """ - Calculate y-axis limits from data. Lower is the minimum of either 0 or data's minimum (reduced by small - subtrahend) and upper limit is data's maximum (increased by a small addend). + Calculate y-axis limits from data. + + Lower limit is the minimum of 0 and data's minimum (reduced by small subtrahend) and upper limit is data's + maximum (increased by a small addend). :return: """ @@ -531,11 +539,12 @@ class PlotCompetitiveSkillScore(AbstractPlotClass): @TimeTrackingWrapper class PlotBootstrapSkillScore(AbstractPlotClass): """ - Create plot of climatological skill score after Murphy (1988) as box plot over all stations. A forecast time step - (called "ahead") is separately shown to highlight the differences for each prediction time step. Either each single - term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed (score_only=True, - default). Y-axis is adjusted following the data and not hard coded. The plot is saved under plot_folder path with - name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi. + Create plot of climatological skill score after Murphy (1988) as box plot over all stations. + + A forecast time step (called "ahead") is separately shown to highlight the differences for each prediction time + step. Either each single term is plotted (score_only=False) or only the resulting scores CASE I to IV are displayed + (score_only=True, default). Y-axis is adjusted following the data and not hard coded. The plot is saved under + plot_folder path with name skill_score_clim_{extra_name_tag}{model_setup}.pdf and resolution of 500dpi. .. image:: ../../../../../_source/_plots/skill_score_bootstrap.png :width: 400 @@ -544,7 +553,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass): def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = ""): """ - Sets attributes and create plot + Set attributes and create plot. :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms. :param plot_folder: path to save the plot (default: current directory) @@ -559,8 +568,10 @@ class PlotBootstrapSkillScore(AbstractPlotClass): def _prepare_data(self, data: Dict) -> pd.DataFrame: """ - Shrink given data, if only scores are relevant. In any case, transform data to a plot friendly format. Also set - plot labels depending on the lead time dimensions. + Shrink given data, if only scores are relevant. + + In any case, transform data to a plot friendly format. Also set plot labels depending on the lead time + dimensions. :param data: dictionary with station names as keys and 2D xarrays as values :return: pre-processed data set @@ -571,7 +582,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass): def _label_add(self, score_only: bool): """ - Adds the phrase "terms and " if score_only is disabled or empty string (if score_only=True). + Add the phrase "terms and " if score_only is disabled or empty string (if score_only=True). :param score_only: if false all terms are relevant, otherwise only CASE I to IV :return: additional label @@ -579,9 +590,7 @@ class PlotBootstrapSkillScore(AbstractPlotClass): return "" if score_only else "terms and " def _plot(self): - """ - Main plot function to plot climatological skill score. - """ + """Plot climatological skill score.""" fig, ax = plt.subplots() sns.boxplot(x=self._x_name, y="data", hue="ahead", data=self._data, ax=ax, whis=1., palette="Blues_d", showmeans=True, meanprops={"markersize": 1, "markeredgecolor": "k"}, flierprops={"marker": "."}) @@ -594,9 +603,15 @@ class PlotBootstrapSkillScore(AbstractPlotClass): @TimeTrackingWrapper class PlotTimeSeries: + """ + Create time series plot. + + Currently, plots are under development and not well designed for any use in public. + """ def __init__(self, stations: List, data_path: str, name: str, window_lead_time: int = None, plot_folder: str = ".", sampling="daily"): + """Initialise.""" self._data_path = data_path self._data_name = name self._stations = stations @@ -613,9 +628,11 @@ class PlotTimeSeries: def _get_window_lead_time(self, window_lead_time: int): """ - Extract the lead time from data and arguments. If window_lead_time is not given, extract this information from - data itself by the number of ahead dimensions. If given, check if data supports the give length. If the number - of ahead dimensions in data is lower than the given lead time, data's lead time is used. + Extract the lead time from data and arguments. + + If window_lead_time is not given, extract this information from data itself by the number of ahead dimensions. + If given, check if data supports the give length. If the number of ahead dimensions in data is lower than the + given lead time, data's lead time is used. :param window_lead_time: lead time from arguments to validate :return: validated lead time, comes either from given argument or from data itself @@ -702,9 +719,9 @@ class PlotTimeSeries: return f(data, min), f(data, max) @staticmethod - def _create_pdf_pages(plot_folder): + def _create_pdf_pages(plot_folder: str): """ - Standard save method to store plot locally. The name of this plot is static. + Store plot locally. :param plot_folder: path to save the plot """ @@ -716,6 +733,24 @@ class PlotTimeSeries: @TimeTrackingWrapper class PlotAvailability(AbstractPlotClass): """ + Create data availablility plot similar to Gantt plot. + + Each entry of given generator, will result in a new line in the plot. Data is summarised for given temporal + resolution and checked whether data is available or not for each time step. This is afterwards highlighted as a + colored bar or a blank space. + + You can set different colors to highlight subsets for example by providing different generators for the same index + using different keys in the input dictionary. + + Note: each bar is surrounded by a small white box to highlight gabs in between. This can result in too long gabs + in display, if a gab is only very short. Also this appears on a (fluent) transition from one to another subset. + + Calling this class will create three versions fo the availability plot. + + 1) Data availability for each element + 1) Data availability as summary over all elements (is there at least a single elemnt for each time step) + 1) Combination of single and overall availability + .. image:: ../../../../../_source/_plots/data_availability.png :width: 400 @@ -729,6 +764,7 @@ class PlotAvailability(AbstractPlotClass): def __init__(self, generators: Dict[str, DataGenerator], plot_folder: str = ".", sampling="daily", summary_name="data availability"): + """Initialise.""" # create standard Gantt plot for all stations (currently in single pdf file with single page) super().__init__(plot_folder, "data_availability") self.sampling = self._get_sampling(sampling)