Ghost User · 0eeaaf5c · aef32cc8 · 0bd174b6 · 06a474d8 · 0eeaaf5c
--- a/src/plotting/postprocessing_plotting.py

+ 223

− 106
+++ b/src/plotting/postprocessing_plotting.py

+ 223

− 106
 @@ -189,26 +189,12 @@ class PlotStationMap(AbstractPlotClass):


 @TimeTrackingWrapper
-def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_window: int = 3, ref_name: str = 'obs',
-                               pred_name: str = 'CNN', season: str = "", forecast_path: str = None,
-                               plot_name_affix: str = "", units: str = "ppb"):
+class PlotConditionalQuantiles(AbstractPlotClass):
    """
-    This plot was originally taken from Murphy, Brown and Chen (1989):
-    https://journals.ametsoc.org/doi/pdf/10.1175/1520-0434%281989%29004%3C0485%3ADVOTF%3E2.0.CO%3B2
-
-    :param stations: stations to include in the plot (forecast data needs to be available already)
-    :param plot_folder: path to save the plot (default: current directory)
-    :param rolling_window: the rolling window mean will smooth the plot appearance (no smoothing in bin calculation,
-        this is only a cosmetic step, default: 3)
-    :param ref_name: name of the reference data series
-    :param pred_name: name of the investigated data series
-    :param season: season name to highlight if not empty
-    :param forecast_path: path to save the plot file
-    :param plot_name_affix: name to specify this plot (e.g. 'cali-ref', default: '')
-    :param units: units of the forecasted values (default: ppb)
+    This class creates cond.quantile plots as originally proposed by Murphy, Brown and Chen (1989) [But in log scale]
+
+    Link to paper: https://journals.ametsoc.org/doi/pdf/10.1175/1520-0434%281989%29004%3C0485%3ADVOTF%3E2.0.CO%3B2
    """
-    # time = TimeTracking()
-    logging.debug(f"started plot_conditional_quantiles()")
    # ignore warnings if nans appear in quantile grouping
    warnings.filterwarnings("ignore", message="All-NaN slice encountered")
    # ignore warnings if mean is calculated on nans
 @@ -216,112 +202,235 @@ def plot_conditional_quantiles(stations: list, plot_folder: str = ".", rolling_w
    # ignore warnings for y tick = 0 on log scale (instead of 0.00001 or similar)
    warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.")

-    def load_data():
+    def __init__(self, stations: List, data_pred_path: str, plot_folder: str = ".", plot_per_seasons=True,
+                 rolling_window: int = 3, model_mame: str = "CNN", obs_name: str = "obs", **kwargs):
+        """
+
+        :param stations: all stations to plot
+        :param data_pred_path: path to dir which contains the forecasts as .nc files
+        :param plot_folder: path where the plots are stored
+        :param plot_per_seasons: if `True' create cond. quantile plots for seasons (DJF, MAM, JJA, SON) individually
+        :param rolling_window: smoothing of quantiles (3 is used by Murphy et al.)
+        :param model_mame: name of the model prediction as stored in netCDF file (for example "CNN")
+        :param obs_name: name of observation as stored in netCDF file (for example "obs")
+        :param kwargs: Some further arguments which are listed in self._opts
+        """
+        super().__init__(plot_folder, "conditional_quantiles")
+
+        self._data_pred_path = data_pred_path
+        self._stations = stations
+        self._rolling_window = rolling_window
+        self._model_name = model_mame
+        self._obs_name = obs_name
+
+        self._opts = {"q": kwargs.get("q", [.1, .25, .5, .75, .9]),
+                      "linetype": kwargs.get("linetype", [':', '-.', '--', '-.', ':']),
+                      "legend": kwargs.get("legend",
+                                           ['.10th and .90th quantile', '.25th and .75th quantile', '.50th quantile',
+                                            'reference 1:1']),
+                      "data_unit": kwargs.get("data_unit", "ppb"),
+                      }
+        if plot_per_seasons is True:
+            self.seasons = ['DJF', 'MAM', 'JJA', 'SON']
+        else:
+            self.seasons = ""
+        self._data = self._load_data()
+        self._bins = self._get_bins_from_rage_of_data()
+
+        self._plot()
+
+    def _load_data(self):
+        """
+        This method loads forcast data
+
+        :return:
+        """
        logging.debug("... load data")
        data_collector = []
-        for station in stations:
-            file = os.path.join(forecast_path, f"forecasts_{station}_test.nc")
+        for station in self._stations:
+            file = os.path.join(self._data_pred_path, f"forecasts_{station}_test.nc")
            data_tmp = xr.open_dataarray(file)
-            data_collector.append(data_tmp.loc[:, :, ['CNN', 'obs', 'OLS']].assign_coords(station=station))
-        return xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
+            data_collector.append(data_tmp.loc[:, :, [self._model_name, self._obs_name]].assign_coords(station=station))
+        res = xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
+        return res

-    def segment_data(data):
+    def _segment_data(self, data, x_model):
+        """
+        This method creates segmented data which is used for cond. quantile plots
+
+        :param data:
+        :param x_model:
+        :return:
+        """
        logging.debug("... segment data")
        # combine index and station to multi index
        data = data.stack(z=['index', 'station'])
        # replace multi index by simple position index (order is not relevant anymore)
        data.coords['z'] = range(len(data.coords['z']))
-        # segment data of pred_name into bins
-        data.loc[pred_name, ...] = data.loc[pred_name, ...].to_pandas().T.apply(pd.cut, bins=bins,
-                                                                                labels=bins[1:]).T.values
+        # segment data of x_model into bins
+        data.loc[x_model, ...] = data.loc[x_model, ...].to_pandas().T.apply(pd.cut, bins=self._bins,
+                                                                                labels=self._bins[1:]).T.values
        return data

-    def create_quantile_panel(data, q):
+    @staticmethod
+    def _labels(plot_type, data_unit="ppb"):
+        """
+        Helper method to correctly assign (x,y) labels to plots, depending on like-base or cali-ref factorization
+
+        :param plot_type:
+        :param data_unit:
+        :return:
+        """
+        names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})")
+        if plot_type == "obs":
+            return names
+        else:
+            return names[::-1]
+
+    def _get_bins_from_rage_of_data(self):
+        """
+        Get array of bins to use for quantiles
+
+        :return:
+        """
+        return np.arange(0, math.ceil(self._data.max().max()) + 1, 1).astype(int)
+
+    def _create_quantile_panel(self, data, x_model, y_model):
+        """
+        Clculate quantiles
+
+        :param data:
+        :param x_model:
+        :param y_model:
+        :return:
+        """
        logging.debug("... create quantile panel")
        # create empty xarray with dims: time steps ahead, quantiles, bin index (numbers create in previous step)
-        quantile_panel = xr.DataArray(np.full([data.ahead.shape[0], len(q), bins[1:].shape[0]], np.nan),
-                                      coords=[data.ahead, q, bins[1:]], dims=['ahead', 'quantiles', 'categories'])
+        quantile_panel = xr.DataArray(
+            np.full([data.ahead.shape[0], len(self._opts["q"]), self._bins[1:].shape[0]], np.nan),
+            coords=[data.ahead, self._opts["q"], self._bins[1:]], dims=['ahead', 'quantiles', 'categories'])
        # ensure that the coordinates are in the right order
        quantile_panel = quantile_panel.transpose('ahead', 'quantiles', 'categories')
        # calculate for each bin of the pred_name data the quantiles of the ref_name data
-        for bin in bins[1:]:
-            mask = (data.loc[pred_name, ...] == bin)
-            quantile_panel.loc[..., bin] = data.loc[ref_name, ...].where(mask).quantile(q, dim=['z']).T
-
+        for bin in self._bins[1:]:
+            mask = (data.loc[x_model, ...] == bin)
+            quantile_panel.loc[..., bin] = data.loc[y_model, ...].where(mask).quantile(self._opts["q"],
+                                                                                                    dim=['z']).T
        return quantile_panel

-    def labels(plot_type, data_unit="ppb"):
-        names = (f"forecast concentration (in {data_unit})", f"observed concentration (in {data_unit})")
-        if plot_type == "obs":
-            return names
-        else:
-            return names[::-1]
+    @staticmethod
+    def add_affix(x):
+        """
+        Helper method to add additional information on plot name

-    xlabel, ylabel = labels(ref_name, units)
-
-    opts = {"q": [.1, .25, .5, .75, .9], "linetype": [':', '-.', '--', '-.', ':'],
-            "legend": ['.10th and .90th quantile', '.25th and .75th quantile', '.50th quantile', 'reference 1:1'],
-            "xlabel": xlabel, "ylabel": ylabel}
-
-    # set name and path of the plot
-    base_name = "conditional_quantiles"
-    def add_affix(x): return f"_{x}" if len(x) > 0 else ""
-    plot_name = f"{base_name}{add_affix(season)}{add_affix(plot_name_affix)}_plot.pdf"
-    plot_path = os.path.join(os.path.abspath(plot_folder), plot_name)
-
-    # check forecast path
-    if forecast_path is None:
-        raise ValueError("Forecast path is not given but required.")
-
-    # load data and set data bins
-    orig_data = load_data()
-    bins = np.arange(0, math.ceil(orig_data.max().max()) + 1, 1).astype(int)
-    segmented_data = segment_data(orig_data)
-    quantile_panel = create_quantile_panel(segmented_data, q=opts["q"])
-
-    # init pdf output
-    pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
-    logging.debug(f"... plot path is {plot_path}")
-
-    # create plot for each time step ahead
-    y2_max = 0
-    for iteration, d in enumerate(segmented_data.ahead):
-        logging.debug(f"... plotting {d.values} time step(s) ahead")
-        # plot smoothed lines with rolling mean
-        smooth_data = quantile_panel.loc[d, ...].rolling(categories=rolling_window, center=True).mean().to_pandas().T
-        ax = smooth_data.plot(style=opts["linetype"], color='black', legend=False)
-        ax2 = ax.twinx()
-        # add reference line
-        ax.plot([0, bins.max()], [0, bins.max()], color='k', label='reference 1:1', linewidth=.8)
-        # add histogram of the segmented data (pred_name)
-        handles, labels = ax.get_legend_handles_labels()
-        segmented_data.loc[pred_name, d, :].to_pandas().hist(bins=bins, ax=ax2, color='k', alpha=.3, grid=False,
-                                                             rwidth=1)
-        # add legend
-        plt.legend(handles[:3] + [handles[-1]], opts["legend"], loc='upper left', fontsize='large')
-        # adjust limits and set labels
-        ax.set(xlim=(0, bins.max()), ylim=(0, bins.max()))
-        ax.set_xlabel(opts["xlabel"], fontsize='x-large')
-        ax.tick_params(axis='x', which='major', labelsize=15)
-        ax.set_ylabel(opts["ylabel"], fontsize='x-large')
-        ax.tick_params(axis='y', which='major', labelsize=15)
-        ax2.yaxis.label.set_color('gray')
-        ax2.tick_params(axis='y', colors='gray')
-        ax2.yaxis.labelpad = -15
-        ax2.set_yscale('log')
-        if iteration == 0:
-            y2_max = ax2.get_ylim()[1] + 100
-        ax2.set(ylim=(0, y2_max * 10 ** 8), yticks=np.logspace(0, 4, 5))
-        ax2.set_ylabel('              sample size', fontsize='x-large')
-        ax2.tick_params(axis='y', which='major', labelsize=15)
-        # set title and save current figure
-        title = f"{d.values} time step(s) ahead{f' ({season})' if len(season) > 0 else ''}"
-        plt.title(title)
-        pdf_pages.savefig()
-    # close all open figures / plots
-    pdf_pages.close()
-    plt.close('all')
-    #logging.info(f"plot_conditional_quantiles() finished after {time}")
+        :param x:
+        :return:
+        """
+        return f"_{x}" if len(x) > 0 else ""
+
+    def _prepare_plots(self, data, x_model, y_model):
+        """
+        Get segmented_data and quantile_panel
+
+        :param data:
+        :param x_model:
+        :param y_model:
+        :return:
+        """
+        segmented_data = self._segment_data(data, x_model)
+        quantile_panel = self._create_quantile_panel(segmented_data, x_model, y_model)
+        return segmented_data, quantile_panel
+
+    def _plot(self):
+        """
+        Main plot call
+
+        :return:
+        """
+        if len(self.seasons) > 0:
+            self._plot_seasons()
+        self._plot_all()
+
+    def _plot_seasons(self):
+        """
+        Seasonal plot call
+
+        :return:
+        """
+        for season in self.seasons:
+            self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._model_name,
+                            y_model=self._obs_name, plot_name_affix="cali-ref", season=season)
+            self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._obs_name,
+                            y_model=self._model_name, plot_name_affix="like-base", season=season)
+
+    def _plot_all(self):
+        """
+        Full plot call
+
+        :return:
+        """
+        self._plot_base(data=self._data, x_model=self._model_name, y_model=self._obs_name, plot_name_affix="cali-ref")
+        self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._model_name, plot_name_affix="like-base")
+
+    @TimeTrackingWrapper
+    def _plot_base(self, data, x_model, y_model, plot_name_affix, season=""):
+        """
+        Base method to create cond. quantile plots. Is called from _plot_all and _plot_seasonal
+
+        :param data: data which is used to create cond. quantile plot
+        :param x_model: name of model on x axis (can also be obs)
+        :param y_model: name of model on y axis (can also be obs)
+        :param plot_name_affix: should be `cali-ref' or `like-base'
+        :param season: List of seasons to use
+        :return:
+        """
+        segmented_data, quantile_panel = self._prepare_plots(data, x_model, y_model)
+        ylabel, xlabel = self._labels(x_model, self._opts["data_unit"])
+        plot_name = f"{self.plot_name}{self.add_affix(season)}{self.add_affix(plot_name_affix)}_plot.pdf"
+        #f"{base_name}{add_affix(season)}{add_affix(plot_name_affix)}_plot.pdf"
+        plot_path = os.path.join(os.path.abspath(self.plot_folder), plot_name)
+        pdf_pages = matplotlib.backends.backend_pdf.PdfPages(plot_path)
+        logging.debug(f"... plot path is {plot_path}")
+
+        # create plot for each time step ahead
+        y2_max = 0
+        for iteration, d in enumerate(segmented_data.ahead):
+            logging.debug(f"... plotting {d.values} time step(s) ahead")
+            # plot smoothed lines with rolling mean
+            smooth_data = quantile_panel.loc[d, ...].rolling(categories=self._rolling_window,
+                                                             center=True).mean().to_pandas().T
+            ax = smooth_data.plot(style=self._opts["linetype"], color='black', legend=False)
+            ax2 = ax.twinx()
+            # add reference line
+            ax.plot([0, self._bins.max()], [0, self._bins.max()], color='k', label='reference 1:1', linewidth=.8)
+            # add histogram of the segmented data (pred_name)
+            handles, labels = ax.get_legend_handles_labels()
+            segmented_data.loc[x_model, d, :].to_pandas().hist(bins=self._bins, ax=ax2, color='k', alpha=.3, grid=False,
+                                                                 rwidth=1)
+            # add legend
+            plt.legend(handles[:3] + [handles[-1]], self._opts["legend"], loc='upper left', fontsize='large')
+            # adjust limits and set labels
+            ax.set(xlim=(0, self._bins.max()), ylim=(0, self._bins.max()))
+            ax.set_xlabel(xlabel, fontsize='x-large')
+            ax.tick_params(axis='x', which='major', labelsize=15)
+            ax.set_ylabel(ylabel, fontsize='x-large')
+            ax.tick_params(axis='y', which='major', labelsize=15)
+            ax2.yaxis.label.set_color('gray')
+            ax2.tick_params(axis='y', colors='gray')
+            ax2.yaxis.labelpad = -15
+            ax2.set_yscale('log')
+            if iteration == 0:
+                y2_max = ax2.get_ylim()[1] + 100
+            ax2.set(ylim=(0, y2_max * 10 ** 8), yticks=np.logspace(0, 4, 5))
+            ax2.set_ylabel('              sample size', fontsize='x-large')
+            ax2.tick_params(axis='y', which='major', labelsize=15)
+            # set title and save current figure
+            title = f"{d.values} time step(s) ahead{f' ({season})' if len(season) > 0 else ''}"
+            plt.title(title)
+            pdf_pages.savefig()
+        # close all open figures / plots
+        pdf_pages.close()
+        plt.close('all')


 @TimeTrackingWrapper
 @@ -697,7 +806,6 @@ class PlotAvailability(AbstractPlotClass):
                plt_dict[summary_name].update({subset: t2})
        return plt_dict

-
    def _plot(self, plt_dict):
        # colors = {"train": "orange", "val": "blueishgreen", "test": "skyblue"}  # color names
        colors = {"train": "#e69f00", "val": "#009e73", "test": "#56b4e9"}  # hex code
 @@ -722,3 +830,12 @@ class PlotAvailability(AbstractPlotClass):
        handles = [mpatches.Patch(color=c, label=k) for k, c in colors.items()]
        lgd = plt.legend(handles=handles, bbox_to_anchor=(0, 1, 1, 0.2), loc="lower center", ncol=len(handles))
        return lgd
+
+
+if __name__ == "__main__":
+    stations = ['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087']
+    path = "../../testrun_network/forecasts"
+    plt_path = "../../"
+
+    con_quan_cls = PlotConditionalQuantiles(stations, path, plt_path)
+