diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 5af011db9133a22d3b7dbe30197ec16f60c7e5db..7633a2a9c1842219d7af7b9c7b2b4f23a034cbdf 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -11,6 +11,8 @@ import pandas as pd
 from typing import Union, Tuple, Dict, List
 import itertools
 from collections import OrderedDict
+from mlair.helpers import to_list
+
 
 Data = Union[xr.DataArray, pd.DataFrame]
 
@@ -569,7 +571,6 @@ def create_single_bootstrap_realization(data: xr.DataArray, dim_name_time: str)
     :param dim_name_time: name of time dimension
     :return: bootstrapped realization of data
     """
-
     num_of_blocks = data.coords[dim_name_time].shape[0]
     boot_idx = np.random.choice(num_of_blocks, size=num_of_blocks, replace=True)
     return data.isel({dim_name_time: boot_idx})
@@ -585,7 +586,7 @@ def calculate_average(data: xr.DataArray, **kwargs) -> xr.DataArray:
 
 
 def create_n_bootstrap_realizations(data: xr.DataArray, dim_name_time: str, dim_name_model: str, n_boots: int = 1000,
-                                    dim_name_boots: str = 'boots') -> xr.DataArray:
+                                    dim_name_boots: str = 'boots', seasons: List = None) -> Dict[str, xr.DataArray]:
     """
     Create n bootstrap realizations and calculate averages across realizations
 
@@ -594,26 +595,23 @@ def create_n_bootstrap_realizations(data: xr.DataArray, dim_name_time: str, dim_
     :param dim_name_model: name of model dimension
     :param n_boots: number of bootstap realizations
     :param dim_name_boots: name of bootstap dimension
+    :param seasons: calculate errors for given seasons in addition (default None)
     :return:
     """
+    seasons = [] if seasons is None else to_list(seasons)  # assure seasons to be empty list if None
     res_dims = [dim_name_boots]
     dims = list(data.dims)
     other_dims = [v for v in dims if v in set(dims).difference([dim_name_time])]
     coords = {dim_name_boots: range(n_boots), **{dim_name: data.coords[dim_name] for dim_name in other_dims}}
     if len(dims) > 1:
         res_dims = res_dims + other_dims
-    res = xr.DataArray(np.nan, dims=res_dims, coords=coords)
+    realizations = {k: xr.DataArray(np.nan, dims=res_dims, coords=coords) for k in seasons + [""]}
     for boot in range(n_boots):
-        res[boot] = (calculate_average(
-            create_single_bootstrap_realization(data, dim_name_time=dim_name_time),
-            dim=dim_name_time, skipna=True))
-    return res
-
-
-
-
-
-
-
-
-
+        shuffled = create_single_bootstrap_realization(data, dim_name_time=dim_name_time)
+        realizations[""][boot] = calculate_average(shuffled, dim=dim_name_time, skipna=True)
+        for season in seasons:
+            assert season in ["DJF", "MAM", "JJA", "SON"]
+            sel = shuffled[dim_name_time].dt.season == season
+            realizations[season][boot] = calculate_average(shuffled.sel({dim_name_time: sel}),
+                                                           dim=dim_name_time, skipna=True)
+    return realizations
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index bd2012c3fe9f53e7e07bfb4bfc4cde096c2dc891..299febfe7db6f3237b3390ec6c0563506d61a98c 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -1095,7 +1095,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
     def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type",
                  error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots',
                  block_length: str = None, model_name: str = "NN", model_indicator: str = "nn",
-                 ahead_dim: str = "ahead", sampling: Union[str, Tuple[str]] = ""):
+                 ahead_dim: str = "ahead", sampling: Union[str, Tuple[str]] = "", season_annotation: str = None):
         super().__init__(plot_folder, "sample_uncertainty_from_bootstrap")
         self.default_plot_name = self.plot_name
         self.model_type_dim = model_type_dim
@@ -1105,6 +1105,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         self.error_unit = error_unit
         self.block_length = block_length
         self.model_name = model_name
+        _season = season_annotation or ""
         self.sampling = {"daily": "d", "hourly": "H"}.get(sampling[1] if isinstance(sampling, tuple) else sampling, "")
         data = self.rename_model_indicator(data, model_name, model_indicator)
         self.prepare_data(data)
@@ -1114,12 +1115,12 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
 
         # plot raw metric (mse)
         for orientation, utest, agg_type in variants:
-            self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type)
+            self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type, season=_season)
 
         # plot root of metric (rmse)
         self._apply_root()
         for orientation, utest, agg_type in variants:
-            self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type, tag="_sqrt")
+            self._plot(orientation=orientation, apply_u_test=utest, agg_type=agg_type, tag="_sqrt", season=_season)
 
         self._data_table = None
         self._n_boots = None
@@ -1148,9 +1149,10 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         self.error_measure = f"root {self.error_measure}"
         self.error_unit = self.error_unit.replace("$^2$", "")
 
-    def _plot(self, orientation: str = "v", apply_u_test: bool = False, agg_type="single", tag=""):
+    def _plot(self, orientation: str = "v", apply_u_test: bool = False, agg_type="single", tag="", season=""):
         self.plot_name = self.default_plot_name + {"v": "_vertical", "h": "_horizontal"}[orientation] + \
-                         {True: "_u_test", False: ""}[apply_u_test] + "_" + agg_type + tag
+                         {True: "_u_test", False: ""}[apply_u_test] + "_" + agg_type + tag + \
+                         {"": ""}.get(season, f"_{season}")
         if apply_u_test is True and agg_type == "multi":
             return  # not implemented
         data_table = self._data_table
@@ -1198,10 +1200,13 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
             ax.set_xlabel(f"{self.error_measure} (in {self.error_unit})")
             xlims = list(ax.get_xlim())
             ax.set_xlim([xlims[0], xlims[1] * 1.015])
-
         else:
             raise ValueError(f"orientation must be `v' or `h' but is: {orientation}")
-        text = f"n={n_boots}" if self.block_length is None else f"{self.block_length}, n={n_boots}"
+        text = f"n={n_boots}"
+        if self.block_length is not None:
+            text = f"{self.block_length}, {text}"
+        if len(season) > 0:
+            text = f"{season}, {text}"
         loc = "lower left"
         text_box = AnchoredText(text, frameon=True, loc=loc, pad=0.5, bbox_to_anchor=(0., 1.0),
                                 bbox_transform=ax.transAxes)
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 7a31f83fdf09ceed649aa02d4eecb74a9165eba0..eb0d72becc4570c7081f02e14bf0549de1623c47 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -87,6 +87,7 @@ class PostProcessing(RunEnvironment):
         self.skill_scores = None
         self.feature_importance_skill_scores = None
         self.uncertainty_estimate = None
+        self.uncertainty_estimate_seasons = {}
         self.block_mse_per_station = None
         self.competitor_path = self.data_store.get("competitor_path")
         self.competitors = to_list(self.data_store.get_default("competitors", default=[]))
@@ -154,14 +155,16 @@ class PostProcessing(RunEnvironment):
                                                                     separate_ahead=separate_ahead,
                                                                     block_length=block_length)
         self.block_mse_per_station = block_mse_per_station
-        self.uncertainty_estimate = statistics.create_n_bootstrap_realizations(
+        estimate = statistics.create_n_bootstrap_realizations(
             block_mse, dim_name_time=self.index_dim, dim_name_model=self.model_type_dim,
-            dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots)
+            dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots, seasons=["DJF", "MAM", "JJA", "SON"])
+        self.uncertainty_estimate = estimate.pop("")
+        self.uncertainty_estimate_seasons = estimate
         self.report_sample_uncertainty()
 
     def report_sample_uncertainty(self, percentiles: list = None):
         """
-        Store raw results of uncertainty estimate and calculate aggregate statistcs and store as raw data but also as
+        Store raw results of uncertainty estimate and calculate aggregate statistics and store as raw data but also as
         markdown and latex.
         """
         report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
@@ -170,31 +173,37 @@ class PostProcessing(RunEnvironment):
         # store raw results as nc
         file_name = os.path.join(report_path, "uncertainty_estimate_raw_results.nc")
         self.uncertainty_estimate.to_netcdf(path=file_name)
+        for season in self.uncertainty_estimate_seasons.keys():
+            file_name = os.path.join(report_path, f"uncertainty_estimate_raw_results_{season}.nc")
+            self.uncertainty_estimate_seasons[season].to_netcdf(path=file_name)
 
         # store statistics
         if percentiles is None:
             percentiles = [.05, .1, .25, .5, .75, .9, .95]
 
-        for ahead_steps in ["single", "multi"]:
-            if ahead_steps == "single":
-                try:
-                    df_descr = self.uncertainty_estimate.to_pandas().describe(percentiles=percentiles).astype("float32")
-                except ValueError:
-                    df_descr = self.uncertainty_estimate.mean(self.ahead_dim).to_pandas().describe(percentiles=percentiles).astype("float32")
-            else:
-                if self.ahead_dim not in self.uncertainty_estimate.dims:
-                    continue
-                df_descr = self.uncertainty_estimate.to_dataframe(self.model_type_dim).unstack().groupby(level=self.ahead_dim).describe(
-                    percentiles=percentiles).astype("float32")
-                df_descr = df_descr.stack(-1)
-                df_descr = df_descr.reorder_levels(df_descr.index.names[::-1])
-                df_sorter = ["count", "mean", "std", "min", *[f"{round(p * 100)}%" for p in percentiles], "max"]
-                df_descr = df_descr.loc[df_sorter]
-            column_format = tables.create_column_format_for_tex(df_descr)
-            file_name = os.path.join(report_path, f"uncertainty_estimate_statistics_{ahead_steps}.%s")
-            tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df_descr)
-            tables.save_to_md(report_path, file_name % "md", df=df_descr)
-            df_descr.to_csv(file_name % "csv", sep=";")
+        for season in [None] + list(self.uncertainty_estimate_seasons.keys()):
+            estimate = self.uncertainty_estimate if season is None else self.uncertainty_estimate_seasons[season]
+            affix = "" if season is None else f"_{season}"
+            for ahead_steps in ["single", "multi"]:
+                if ahead_steps == "single":
+                    try:
+                        df_descr = estimate.to_pandas().describe(percentiles=percentiles).astype("float32")
+                    except ValueError:
+                        df_descr = estimate.mean(self.ahead_dim).to_pandas().describe(percentiles=percentiles).astype("float32")
+                else:
+                    if self.ahead_dim not in estimate.dims:
+                        continue
+                    df_descr = estimate.to_dataframe(self.model_type_dim).unstack().groupby(level=self.ahead_dim).describe(
+                        percentiles=percentiles).astype("float32")
+                    df_descr = df_descr.stack(-1)
+                    df_descr = df_descr.reorder_levels(df_descr.index.names[::-1])
+                    df_sorter = ["count", "mean", "std", "min", *[f"{round(p * 100)}%" for p in percentiles], "max"]
+                    df_descr = df_descr.loc[df_sorter]
+                column_format = tables.create_column_format_for_tex(df_descr)
+                file_name = os.path.join(report_path, f"uncertainty_estimate_statistics_{ahead_steps}{affix}.%s")
+                tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df_descr)
+                tables.save_to_md(report_path, file_name % "md", df=df_descr)
+                df_descr.to_csv(file_name % "csv", sep=";")
 
     def calculate_block_mse(self, evaluate_competitors=True, separate_ahead=False, block_length="1m"):
         """
@@ -589,11 +598,13 @@ class PostProcessing(RunEnvironment):
         try:
             if "PlotSampleUncertaintyFromBootstrap" in plot_list and self.uncertainty_estimate is not None:
                 block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
-                PlotSampleUncertaintyFromBootstrap(
-                    data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim,
-                    dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error",
-                    error_unit=r"ppb$^2$", block_length=block_length, model_name=self.model_display_name,
-                    model_indicator=self.forecast_indicator, sampling=self._sampling)
+                for season in [None] + list(self.uncertainty_estimate_seasons.keys()):
+                    estimate = self.uncertainty_estimate if season is None else self.uncertainty_estimate_seasons[season]
+                    PlotSampleUncertaintyFromBootstrap(
+                        data=estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim,
+                        dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error",
+                        error_unit=r"ppb$^2$", block_length=block_length, model_name=self.model_display_name,
+                        model_indicator=self.forecast_indicator, sampling=self._sampling, season_annotation=season)
         except Exception as e:
             logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")