diff --git a/src/helpers/statistics.py b/src/helpers/statistics.py
index a38f8915dc06b1e8760f758dfb50c449a1cbd71d..74bac269fb904db81365450e92536e8c494bf027 100644
--- a/src/helpers/statistics.py
+++ b/src/helpers/statistics.py
@@ -117,41 +117,39 @@ class SkillScores:
     r"""
     Calculate different kinds of skill scores.
 
-    **Skill score on MSE**:
+    Skill score on MSE:
+        Calculate skill score based on MSE for given forecast, reference and observations.
 
-    Calculate skill score based on MSE for given forecast, reference and observations.
+        .. math::
 
-    .. math::
+            \text{SkillScore} = 1 - \frac{\text{MSE(obs, for)}}{\text{MSE(obs, ref)}}
 
-        \text{SkillScore} = 1 - \frac{\text{MSE(obs, for)}}{\text{MSE(obs, ref)}}
+        To run:
 
-    To run:
+        .. code-block:: python
 
-    .. code-block:: python
+            skill_scores = SkillScores(None).general_skill_score(data, observation_name, forecast_name, reference_name)
 
-        skill_scores = SkillScores(None).general_skill_score(data, observation_name, forecast_name, reference_name)
+    Competitive skill score:
+        Calculate skill scores to highlight differences between forecasts. This skill score is also based on the MSE.
+        Currently required forecasts are CNN, OLS and persi, as well as the observation obs.
 
-    **Competitive skill score**:
+        .. code-block:: python
 
-    Calculate skill scores to highlight differences between forecasts. This skill score is also based on the MSE.
-    Currently required forecasts are CNN, OLS and persi, as well as the observation obs.
+            skill_scores_class = SkillScores(internal_data)  # must contain columns CNN, OLS, persi and obs.
+            skill_scores = skill_scores_class.skill_scores(window_lead_time=3)
 
-    .. code-block:: python
+    Skill score according to Murphy:
+        Follow climatological skill score definition of Murphy (1988). External data is data from another time period
+        than the internal data set on initialisation. In other terms, this should be the train and validation data
+        whereas the internal data is the test data. This sounds perhaps counter-intuitive, but if a skill score is
+        evaluated to a model to another, this must be performend test data set. Therefore, for this case the foreign
+        data is train and val data.
 
-        skill_scores_class = SkillScores(internal_data)  # must contain columns CNN, OLS, persi and obs.
-        skill_scores = skill_scores_class.skill_scores(window_lead_time=3)
+        .. code-block:: python
 
-    **Skill score according to Murphy**:
-
-    Follow climatological skill score definition of Murphy (1988). External data is data from another time period than
-    the internal data set on initialisation. In other terms, this should be the train and validation data whereas the
-    internal data is the test data. This sounds perhaps counter-intuitive, but if a skill score is evaluated to a model
-    to another, this must be performend test data set. Therefore, for this case the foreign data is train and val data.
-
-    .. code-block:: python
-
-        skill_scores_class = SkillScores(internal_data)  # must contain columns obs and CNN.
-        skill_scores_clim = skill_scores_class.climatological_skill_scores(external_data, window_lead_time=3)
+            skill_scores_class = SkillScores(internal_data)  # must contain columns obs and CNN.
+            skill_scores_clim = skill_scores_class.climatological_skill_scores(external_data, window_lead_time=3)
 
     """
 
diff --git a/src/model_modules/__init__.py b/src/model_modules/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..35f4060886036d3f51c24b4480738566ff80a445 100644
--- a/src/model_modules/__init__.py
+++ b/src/model_modules/__init__.py
@@ -0,0 +1 @@
+"""Collection of all modules that are related to a model."""
diff --git a/src/model_modules/keras_extensions.py b/src/model_modules/keras_extensions.py
index 0b374bc4cfb55c945aeceb54112579716e1c6c17..c41d722197c2529f04f6643cc72b51f0d3fe0087 100644
--- a/src/model_modules/keras_extensions.py
+++ b/src/model_modules/keras_extensions.py
@@ -296,7 +296,7 @@ class CallbackHandler:
         else:
             return [clb["callback"] for clb in self._get_callbacks()]
 
-    def get_callback_by_name(self, obj_name: str) -> Callback:
+    def get_callback_by_name(self, obj_name: str) -> Union[Callback, History]:
         """
         Get single callback by its name.
 
diff --git a/src/run_modules/training.py b/src/run_modules/training.py
index 93eb5762ec765b34191537e72abb2277cddeea7a..389e0eb4d64c44b4c94f99f41f0d3878d2a856a7 100644
--- a/src/run_modules/training.py
+++ b/src/run_modules/training.py
@@ -1,3 +1,5 @@
+"""Training module."""
+
 __author__ = "Lukas Leufen, Felix Kleinert"
 __date__ = '2019-12-05'
 
@@ -7,16 +9,54 @@ import os
 from typing import Union
 
 import keras
+from keras.callbacks import Callback, History
 
 from src.data_handling.data_distributor import Distributor
-from src.model_modules.keras_extensions import LearningRateDecay, CallbackHandler
+from src.model_modules.keras_extensions import CallbackHandler
 from src.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate
 from src.run_modules.run_environment import RunEnvironment
 
 
 class Training(RunEnvironment):
+    """
+    Perform training.
+        #. set_generators(): set generators for training, validation and testing and distribute according to batch size
+        #. make_predict_function(): create predict function before distribution on multiple nodes (detailed information
+           in method description)
+        #. train(): start or resume training of model and save callbacks
+        #. save_model(): save best model from training as final model
+
+    Required objects [scope] from data store:
+        * `model` [model]
+        * `batch_size` [model]
+        * `epochs` [model]
+        * `callbacks` [model]
+        * `model_name* [model]
+        * `experiment_name` [.]
+        * `experiment_path` [.]
+        * `trainable` [.]
+        * `create_new_model` [.]
+        * `generator` [train, val, test]
+        * `plot_path` [.]
+
+    Optional objects
+        * `permute_data` [train, val, test]
+        * `upsampling` [train, val, test]
+
+    Sets
+        * `best_model` [.]
+
+    Creates
+        * `<exp_name>_model-best.h5`
+        * `<exp_name>_model-best-callbacks-<name>.h5` (all callbacks from CallbackHandler)
+        * `history.json`
+        * `history_lr.json` (optional)
+        * `<exp_name>_history_<name>.pdf` (different monitoring plots depending on loss metrics and callbacks)
+
+    """
 
     def __init__(self):
+        """Set up training."""
         super().__init__()
         self.model: keras.Model = self.data_store.get("model", "model")
         self.train_set: Union[Distributor, None] = None
@@ -31,17 +71,7 @@ class Training(RunEnvironment):
         self._run()
 
     def _run(self) -> None:
-        """
-        Perform training
-        1) set_generators():
-            set generators for training, validation and testing and distribute according to batch size
-        2) make_predict_function():
-            create predict function before distribution on multiple nodes (detailed information in method description)
-        3) train():
-            start or resume training of model and save callbacks
-        4) save_model():
-            save best model from training as final model
-        """
+        """Run training. Details in class description."""
         self.set_generators()
         self.make_predict_function()
         if self._trainable:
@@ -52,40 +82,44 @@ class Training(RunEnvironment):
 
     def make_predict_function(self) -> None:
         """
-        Creates the predict function. Must be called before distributing. This is necessary, because tf will compile
-        the predict function just in the moment it is used the first time. This can cause problems, if the model is
-        distributed on different workers. To prevent this, the function is pre-compiled. See discussion @
+        Create predict function.
+
+        Must be called before distributing. This is necessary, because tf will compile the predict function just in
+        the moment it is used the first time. This can cause problems, if the model is distributed on different
+        workers. To prevent this, the function is pre-compiled. See discussion @
         https://stackoverflow.com/questions/40850089/is-keras-thread-safe/43393252#43393252
         """
         self.model._make_predict_function()
 
     def _set_gen(self, mode: str) -> None:
         """
-        Set and distribute the generators for given mode regarding batch size
+        Set and distribute the generators for given mode regarding batch size.
 
         :param mode: name of set, should be from ["train", "val", "test"]
         """
         gen = self.data_store.get("generator", mode)
-        # permute_data = self.data_store.get_default("permute_data", mode, default=False)
         kwargs = self.data_store.create_args_dict(["permute_data", "upsampling"], scope=mode)
         setattr(self, f"{mode}_set", Distributor(gen, self.model, self.batch_size, **kwargs))
 
     def set_generators(self) -> None:
         """
-        Set all generators for training, validation, and testing subsets. The called sub-method will automatically
-        distribute the data according to the batch size. The subsets can be accessed as class variables train_set,
-        val_set, and test_set .
+        Set all generators for training, validation, and testing subsets.
+
+        The called sub-method will automatically distribute the data according to the batch size. The subsets can be
+        accessed as class variables train_set, val_set, and test_set.
         """
         for mode in ["train", "val", "test"]:
             self._set_gen(mode)
 
     def train(self) -> None:
         """
-        Perform training using keras fit_generator(). Callbacks are stored locally in the experiment directory. Best
-        model from training is saved for class variable model. If the file path of checkpoint is not empty, this method
-        assumes, that this is not a new training starting from the very beginning, but a resumption from a previous
-        started but interrupted training (or a stopped and now continued training). Train will automatically load the
-        locally stored information and the corresponding model and proceed with the already started training.
+        Perform training using keras fit_generator().
+
+        Callbacks are stored locally in the experiment directory. Best model from training is saved for class
+        variable model. If the file path of checkpoint is not empty, this method assumes, that this is not a new
+        training starting from the very beginning, but a resumption from a previous started but interrupted training
+        (or a stopped and now continued training). Train will automatically load the locally stored information and the
+        corresponding model and proceed with the already started training.
         """
         logging.info(f"Train with {len(self.train_set)} mini batches.")
         logging.info(f"Train with option upsampling={self.train_set.upsampling}.")
@@ -105,7 +139,7 @@ class Training(RunEnvironment):
             self.callbacks.load_callbacks()
             self.callbacks.update_checkpoint()
             self.model = keras.models.load_model(checkpoint.filepath)
-            hist = self.callbacks.get_callback_by_name("hist")
+            hist: History = self.callbacks.get_callback_by_name("hist")
             initial_epoch = max(hist.epoch) + 1
             _ = self.model.fit_generator(generator=self.train_set.distribute_on_batches(),
                                          steps_per_epoch=len(self.train_set),
@@ -125,9 +159,7 @@ class Training(RunEnvironment):
         self.create_monitoring_plots(history, lr)
 
     def save_model(self) -> None:
-        """
-        save model in local experiment directory. Model is named as <experiment_name>_<custom_model_name>.h5 .
-        """
+        """Save model in local experiment directory. Model is named as `<experiment_name>_<custom_model_name>.h5`."""
         model_name = self.data_store.get("model_name", "model")
         logging.debug(f"save best model to {model_name}")
         self.model.save(model_name)
@@ -146,13 +178,15 @@ class Training(RunEnvironment):
         except OSError:
             logging.info('no weights to reload...')
 
-    def save_callbacks_as_json(self, history: keras.callbacks.History, lr_sc: keras.callbacks) -> None:
+    def save_callbacks_as_json(self, history: Callback, lr_sc: Callback) -> None:
         """
         Save callbacks (history, learning rate) of training.
+
         * history.history -> history.json
         * lr_sc.lr -> history_lr.json
 
         :param history: history object of training
+        :param lr_sc: learning rate object
         """
         logging.debug("saving callbacks")
         path = self.data_store.get("experiment_path")
@@ -162,13 +196,14 @@ class Training(RunEnvironment):
             with open(os.path.join(path, "history_lr.json"), "w") as f:
                 json.dump(lr_sc.lr, f)
 
-    def create_monitoring_plots(self, history: keras.callbacks.History, lr_sc: LearningRateDecay) -> None:
+    def create_monitoring_plots(self, history: Callback, lr_sc: Callback) -> None:
         """
-        Creates the history and learning rate plot in dependence of the number of epochs. The plots are saved in the
-        experiment's plot_path. History plot is named '<exp_name>_history_loss_val_loss.pdf', the learning rate with
-        '<exp_name>_history_learning_rate.pdf'.
+        Create plot of history and learning rate in dependence of the number of epochs.
+
+        The plots are saved in the experiment's plot_path. History plot is named `<exp_name>_history_loss_val_loss.pdf`,
+        the learning rate with `<exp_name>_history_learning_rate.pdf`.
 
-        :param history: keras history object with losses to plot (must include 'loss' and 'val_loss')
+        :param history: keras history object with losses to plot (must at least include `loss` and `val_loss`)
         :param lr_sc:  learning rate decay object with 'lr' attribute
         """
         path = self.data_store.get("plot_path")