diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index af7975f3a042163a885f590c6624076fe91f03aa..19a4893d49c7702cd092858c5c885453e974cbc1 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -284,7 +284,7 @@ class SkillScores:
     def get_model_name_combinations(self):
         """Return all combinations of two models as tuple and string."""
         combinations = list(itertools.combinations(self.models, 2))
-        combination_strings = [f"{first}-{second}" for (first, second) in combinations]
+        combination_strings = [f"{first} - {second}" for (first, second) in combinations]
         return combinations, combination_strings
 
     def skill_scores(self) -> [pd.DataFrame, pd.DataFrame]:
diff --git a/mlair/plotting/abstract_plot_class.py b/mlair/plotting/abstract_plot_class.py
index dab45156ac1bbe033ba073e01245ffc8b65ca6b3..c91dbec78c4bc990cc9c40c3afb6c506b62928d8 100644
--- a/mlair/plotting/abstract_plot_class.py
+++ b/mlair/plotting/abstract_plot_class.py
@@ -59,7 +59,7 @@ class AbstractPlotClass:
         if not os.path.exists(plot_folder):
             os.makedirs(plot_folder)
         self.plot_folder = plot_folder
-        self.plot_name = plot_name
+        self.plot_name = plot_name.replace("/", "_")
         self.resolution = resolution
         if rc_params is None:
             rc_params = {'axes.labelsize': 'large',
diff --git a/mlair/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py
index 43f1864f7354c1f711bb886f4f97eda56439ab89..2a41aab81d7ed62b1b58af515d703a2281236645 100644
--- a/mlair/plotting/postprocessing_plotting.py
+++ b/mlair/plotting/postprocessing_plotting.py
@@ -171,14 +171,14 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
     warnings.filterwarnings("ignore", message="Attempted to set non-positive bottom ylim on a log-scaled axis.")
 
     def __init__(self, stations: List, data_pred_path: str, plot_folder: str = ".", plot_per_seasons=True,
-                 rolling_window: int = 3, model_name: str = "nn", obs_name: str = "obs", **kwargs):
+                 rolling_window: int = 3, forecast_indicator: str = "nn", obs_indicator: str = "obs", **kwargs):
         """Initialise."""
         super().__init__(plot_folder, "conditional_quantiles")
         self._data_pred_path = data_pred_path
         self._stations = stations
         self._rolling_window = rolling_window
-        self._model_name = model_name
-        self._obs_name = obs_name
+        self._forecast_indicator = forecast_indicator
+        self._obs_name = obs_indicator
         self._opts = self._get_opts(kwargs)
         self._seasons = ['DJF', 'MAM', 'JJA', 'SON'] if plot_per_seasons is True else ""
         self._data = self._load_data()
@@ -205,7 +205,8 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
         for station in self._stations:
             file = os.path.join(self._data_pred_path, f"forecasts_{station}_test.nc")
             data_tmp = xr.open_dataarray(file)
-            data_collector.append(data_tmp.loc[:, :, [self._model_name, self._obs_name]].assign_coords(station=station))
+            data_collector.append(data_tmp.loc[:, :, [self._forecast_indicator,
+                                                      self._obs_name]].assign_coords(station=station))
         res = xr.concat(data_collector, dim='station').transpose('index', 'type', 'ahead', 'station')
         return res
 
@@ -312,15 +313,15 @@ class PlotConditionalQuantiles(AbstractPlotClass):  # pragma: no cover
     def _plot_seasons(self):
         """Create seasonal plots."""
         for season in self._seasons:
-            self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._model_name,
+            self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._forecast_indicator,
                             y_model=self._obs_name, plot_name_affix="cali-ref", season=season)
             self._plot_base(data=self._data.where(self._data['index.season'] == season), x_model=self._obs_name,
-                            y_model=self._model_name, plot_name_affix="like-base", season=season)
+                            y_model=self._forecast_indicator, plot_name_affix="like-base", season=season)
 
     def _plot_all(self):
         """Plot overall conditional quantiles on full data."""
-        self._plot_base(data=self._data, x_model=self._model_name, y_model=self._obs_name, plot_name_affix="cali-ref")
-        self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._model_name, plot_name_affix="like-base")
+        self._plot_base(data=self._data, x_model=self._forecast_indicator, y_model=self._obs_name, plot_name_affix="cali-ref")
+        self._plot_base(data=self._data, x_model=self._obs_name, y_model=self._forecast_indicator, plot_name_affix="like-base")
 
     @TimeTrackingWrapper
     def _plot_base(self, data: xr.DataArray, x_model: str, y_model: str, plot_name_affix: str, season: str = ""):
@@ -401,14 +402,14 @@ class PlotClimatologicalSkillScore(AbstractPlotClass):  # pragma: no cover
     :param plot_folder: path to save the plot (default: current directory)
     :param score_only: if true plot only scores of CASE I to IV, otherwise plot all single terms (default True)
     :param extra_name_tag: additional tag that can be included in the plot name (default "")
-    :param model_setup: architecture type to specify plot name (default "")
+    :param model_name: architecture type to specify plot name (default "")
 
     """
 
     def __init__(self, data: Dict, plot_folder: str = ".", score_only: bool = True, extra_name_tag: str = "",
-                 model_setup: str = ""):
+                 model_name: str = ""):
         """Initialise."""
-        super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_setup}")
+        super().__init__(plot_folder, f"skill_score_clim_{extra_name_tag}{model_name}")
         self._labels = None
         self._data = self._prepare_data(data, score_only)
         self._plot(score_only)
@@ -565,13 +566,13 @@ class PlotCompetitiveSkillScore(AbstractPlotClass):  # pragma: no cover
 
     def _create_pseudo_order(self, data):
         """Provide first predefined elements and append all remaining."""
-        first_elements = [f"{self._model_setup}-persi", "ols-persi", f"{self._model_setup}-ols"]
+        first_elements = [f"{self._model_setup} - persi", "ols - persi", f"{self._model_setup} - ols"]
         first_elements = list(filter(lambda x: x in data.comparison.tolist(), first_elements))
         uniq, index = np.unique(first_elements + data.comparison.unique().tolist(), return_index=True)
         return uniq[index.argsort()]
 
     def _filter_comparisons(self, data):
-        filtered_headers = list(filter(lambda x: "nn-" in x, data.comparison.unique()))
+        filtered_headers = list(filter(lambda x: f"{self._model_setup} - " in x, data.comparison.unique()))
         return data[data.comparison.isin(filtered_headers)]
 
     @staticmethod
@@ -606,23 +607,22 @@ class PlotFeatureImportanceSkillScore(AbstractPlotClass):  # pragma: no cover
 
     """
 
-    def __init__(self, data: Dict, plot_folder: str = ".", model_setup: str = "", separate_vars: List = None,
-                 sampling: str = "daily", ahead_dim: str = "ahead", bootstrap_type: str = None,
-                 bootstrap_method: str = None, boot_dim: str = "boots", model_name: str = "NN",
-                 branch_names: list = None, ylim: tuple = None):
+    def __init__(self, data: Dict, plot_folder: str = ".", separate_vars: List = None, sampling: str = "daily",
+                 ahead_dim: str = "ahead", bootstrap_type: str = None, bootstrap_method: str = None,
+                 boot_dim: str = "boots", model_name: str = "NN", branch_names: list = None, ylim: tuple = None):
         """
         Set attributes and create plot.
 
         :param data: dictionary with station names as keys and 2D xarrays as values, consist on axis ahead and terms.
         :param plot_folder: path to save the plot (default: current directory)
-        :param model_setup: architecture type to specify plot name (default "CNN")
         :param separate_vars: variables to plot separated (default: ['o3'])
         :param sampling: type of sampling rate, should be either hourly or daily (default: "daily")
         :param ahead_dim: name of the ahead dimensions (default: "ahead")
         :param bootstrap_annotation: additional information to use in the file name (default: None)
+        :param model_name: architecture type to specify plot name (default "NN")
         """
         annotation = ["_".join([s for s in ["", bootstrap_type, bootstrap_method] if s is not None])][0]
-        super().__init__(plot_folder, f"feature_importance_{model_setup}{annotation}")
+        super().__init__(plot_folder, f"feature_importance_{model_name}{annotation}")
         if separate_vars is None:
             separate_vars = ['o3']
         self._labels = None
@@ -1053,7 +1053,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
 
     def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type",
                  error_measure: str = "mse", error_unit: str = None, dim_name_boots: str = 'boots',
-                 block_length: str = None):
+                 block_length: str = None, model_name: str = "NN", model_indicator: str = "nn"):
         super().__init__(plot_folder, "sample_uncertainty_from_bootstrap")
         default_name = self.plot_name
         self.model_type_dim = model_type_dim
@@ -1061,6 +1061,7 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         self.dim_name_boots = dim_name_boots
         self.error_unit = error_unit
         self.block_length = block_length
+        data = self.rename_model_indicator(data, model_name, model_indicator)
         self.prepare_data(data)
         self._plot(orientation="v")
 
@@ -1078,6 +1079,11 @@ class PlotSampleUncertaintyFromBootstrap(AbstractPlotClass):  # pragma: no cover
         self._data_table = None
         self._n_boots = None
 
+    def rename_model_indicator(self, data, model_name, model_indicator):
+        data.coords[self.model_type_dim] = [{model_indicator: model_name}.get(n, n)
+                                            for n in data.coords[self.model_type_dim].values]
+        return data
+
     def prepare_data(self, data: xr.DataArray):
         self._data_table = data.to_pandas()
         if "persi" in self._data_table.columns:
diff --git a/mlair/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py
index 63be6eb4c6e8b5f8d3149df023e07d23805f077f..70b23c3730d9091d3780746cbb3913eefe4dcf95 100644
--- a/mlair/run_modules/experiment_setup.py
+++ b/mlair/run_modules/experiment_setup.py
@@ -224,7 +224,7 @@ class ExperimentSetup(RunEnvironment):
                  max_number_multiprocessing: int = None, start_script: Union[Callable, str] = None,
                  overwrite_lazy_data: bool = None, uncertainty_estimate_block_length: str = None,
                  uncertainty_estimate_evaluate_competitors: bool = None, uncertainty_estimate_n_boots: int = None,
-                 do_uncertainty_estimate: bool = None, **kwargs):
+                 do_uncertainty_estimate: bool = None, model_display_name: str = None, **kwargs):
 
         # create run framework
         super().__init__()
@@ -377,6 +377,8 @@ class ExperimentSetup(RunEnvironment):
                         default=DEFAULT_FEATURE_IMPORTANCE_BOOTSTRAP_TYPE, scope="feature_importance")
 
         self._set_param("plot_list", plot_list, default=DEFAULT_PLOT_LIST, scope="general.postprocessing")
+        if model_display_name is not None:
+            self._set_param("model_display_name", model_display_name)
         self._set_param("neighbors", ["DEBW030"])  # TODO: just for testing
 
         # set competitors
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 3f20d7b5cd8fa8d57c43f204b537ef02c08a8c95..71c49433f34949b538423312c10152d03312165f 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -95,6 +95,7 @@ class PostProcessing(RunEnvironment):
         self.uncertainty_estimate_boot_dim = "boots"
         self.model_type_dim = "type"
         self.index_dim = "index"
+        self.model_display_name = self.data_store.get_default("model_display_name", default=self.model.model_name)
         self._run()
 
     def _run(self):
@@ -480,7 +481,7 @@ class PostProcessing(RunEnvironment):
                     for boot_method, boot_skill_score in boot_data.items():
                         try:
                             PlotFeatureImportanceSkillScore(
-                                boot_skill_score, plot_folder=self.plot_path, model_setup=self.forecast_indicator,
+                                boot_skill_score, plot_folder=self.plot_path, model_name=self.model_display_name,
                                 sampling=self._sampling, ahead_dim=self.ahead_dim,
                                 separate_vars=to_list(self.target_var), bootstrap_type=boot_type,
                                 bootstrap_method=boot_method)
@@ -493,7 +494,9 @@ class PostProcessing(RunEnvironment):
 
         try:
             if "PlotConditionalQuantiles" in plot_list:
-                PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path)
+                PlotConditionalQuantiles(self.test_data.keys(), data_pred_path=path, plot_folder=self.plot_path,
+                                         forecast_indicator=self.forecast_indicator,
+                                         obs_indicator=self.observation_indicator)
         except Exception as e:
             logging.error(f"Could not create plot PlotConditionalQuantiles due to the following error:"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -509,9 +512,9 @@ class PostProcessing(RunEnvironment):
         try:
             if "PlotClimatologicalSkillScore" in plot_list:
                 PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path,
-                                             model_setup=self.forecast_indicator)
+                                             model_name=self.model_display_name)
                 PlotClimatologicalSkillScore(self.skill_scores[1], plot_folder=self.plot_path, score_only=False,
-                                             extra_name_tag="all_terms_", model_setup=self.forecast_indicator)
+                                             extra_name_tag="all_terms_", model_name=self.model_display_name)
         except Exception as e:
             logging.error(f"Could not create plot PlotClimatologicalSkillScore due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -519,7 +522,7 @@ class PostProcessing(RunEnvironment):
         try:
             if "PlotCompetitiveSkillScore" in plot_list:
                 PlotCompetitiveSkillScore(self.skill_scores[0], plot_folder=self.plot_path,
-                                          model_setup=self.forecast_indicator)
+                                          model_setup=self.model_display_name)
         except Exception as e:
             logging.error(f"Could not create plot PlotCompetitiveSkillScore due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -593,7 +596,8 @@ class PostProcessing(RunEnvironment):
                 PlotSampleUncertaintyFromBootstrap(
                     data=self.uncertainty_estimate, plot_folder=self.plot_path, model_type_dim=self.model_type_dim,
                     dim_name_boots=self.uncertainty_estimate_boot_dim, error_measure="mean squared error",
-                    error_unit=r"ppb$^2$", block_length=block_length)
+                    error_unit=r"ppb$^2$", block_length=block_length, model_name=self.model_display_name,
+                    model_indicator=self.forecast_indicator)
         except Exception as e:
             logging.error(f"Could not create plot PlotSampleUncertaintyFromBootstrap due to the following error: {e}"
                           f"\n{sys.exc_info()[0]}\n{sys.exc_info()[1]}\n{sys.exc_info()[2]}")
@@ -903,7 +907,8 @@ class PostProcessing(RunEnvironment):
         errors = {}
         for station in all_stations:
             external_data = self._get_external_data(station, path)  # test data
-
+            external_data.coords[self.model_type_dim] = [{self.forecast_indicator: self.model_display_name}.get(n, n)
+                                                         for n in external_data.coords[self.model_type_dim].values]
             # test errors
             if external_data is not None:
                 model_type_list = external_data.coords[self.model_type_dim].values.tolist()
@@ -1022,8 +1027,8 @@ class PostProcessing(RunEnvironment):
                     df.reindex(df.index.drop(["total"]).to_list() + ["total"], )
                 column_format = tables.create_column_format_for_tex(df)
                 if model_type == "skill_score":
-                    file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_')
+                    file_name = f"error_report_{model_type}_{metric}.%s".replace(' ', '_').replace('/', '_')
                 else:
-                    file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_')
+                    file_name = f"error_report_{metric}_{model_type}.%s".replace(' ', '_').replace('/', '_')
                 tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df)
                 tables.save_to_md(report_path, file_name % "md", df=df)
diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py
index c076253d92a0e24f419046805687d2a80143176c..8d82afb4c002c660e6fb966945b2e383007d5b70 100644
--- a/mlair/run_modules/training.py
+++ b/mlair/run_modules/training.py
@@ -70,7 +70,7 @@ class Training(RunEnvironment):
         self.model: keras.Model = self.data_store.get("model", "model")
         self.train_set: Union[KerasIterator, None] = None
         self.val_set: Union[KerasIterator, None] = None
-        self.test_set: Union[KerasIterator, None] = None
+        # self.test_set: Union[KerasIterator, None] = None
         self.batch_size = self.data_store.get("batch_size")
         self.epochs = self.data_store.get("epochs")
         self.callbacks: CallbackHandler = self.data_store.get("callbacks", "model")
@@ -81,9 +81,9 @@ class Training(RunEnvironment):
 
     def _run(self) -> None:
         """Run training. Details in class description."""
-        self.set_generators()
         self.make_predict_function()
         if self._train_model:
+            self.set_generators()
             self.train()
             self.save_model()
             self.report_training()
@@ -118,7 +118,9 @@ class Training(RunEnvironment):
         The called sub-method will automatically distribute the data according to the batch size. The subsets can be
         accessed as class variables train_set, val_set, and test_set.
         """
-        for mode in ["train", "val", "test"]:
+        logging.info("set generators for training and validation")
+        # for mode in ["train", "val", "test"]:
+        for mode in ["train", "val"]:
             self._set_gen(mode)
 
     def train(self) -> None:
diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py
index b16c0c2586f87af8368ac0059edc8a3997780f69..1b83b3823519d63d5dcbc10f0e31fc3433f98f34 100644
--- a/test/test_run_modules/test_training.py
+++ b/test/test_run_modules/test_training.py
@@ -234,7 +234,7 @@ class TestTraining:
                             statistics_per_var, window_history_size, window_lead_time) -> Training:
 
         channels = len(list(statistics_per_var.keys()))
-        model =  FCN([(window_history_size + 1, 1, channels)], [window_lead_time])
+        model = FCN([(window_history_size + 1, 1, channels)], [window_lead_time])
 
         obj = object.__new__(Training)
         super(Training, obj).__init__()
@@ -306,7 +306,7 @@ class TestTraining:
         assert init_without_run.train_set._collection.return_value == "mock_train_gen"
 
     def test_set_generators(self, init_without_run):
-        sets = ["train", "val", "test"]
+        sets = ["train", "val"]
         assert all([getattr(init_without_run, f"{obj}_set") is None for obj in sets])
         init_without_run.set_generators()
         assert not all([getattr(init_without_run, f"{obj}_set") is None for obj in sets])
@@ -366,10 +366,10 @@ class TestTraining:
     def test_resume_training1(self, path: str, model_path, batch_path, data_collection, statistics_per_var,
                               window_history_size, window_lead_time):
 
-        obj_1st = self.create_training_obj(2, path, data_collection, batch_path, model_path, statistics_per_var,
+        obj_1st = self.create_training_obj(4, path, data_collection, batch_path, model_path, statistics_per_var,
                                            window_history_size, window_lead_time)
         keras.utils.get_custom_objects().update(obj_1st.model.custom_objects)
         assert obj_1st._run() is None
-        obj_2nd = self.create_training_obj(4, path, data_collection, batch_path, model_path, statistics_per_var,
+        obj_2nd = self.create_training_obj(8, path, data_collection, batch_path, model_path, statistics_per_var,
                                            window_history_size, window_lead_time)
         assert obj_2nd._run() is None