diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py
index 65144acfa184578b938840afad862f77b728eadb..460d1c100dadbc2aea5d43932e902cc080177b27 100644
--- a/mlair/data_handler/data_handler_single_station.py
+++ b/mlair/data_handler/data_handler_single_station.py
@@ -476,66 +476,10 @@ class DataHandlerSingleStation(AbstractDataHandler):
         """
         Set up transformation by extracting all relevant information.
 
-        Extract all information from transformation dictionary. Possible keys are method, mean, std, min, max.
-        * If a transformation should be applied on base of existing values, these need to be provided in the respective
-          keys "mean" and "std" (again only if required for given method).
-
-        :param transformation: the transformation dictionary as described above.
-
-        :return: updated transformation dictionary
-
-        ## Transformation
-
-        There are two different approaches (called scopes) to transform the data:
-        1) `station`: transform data for each station independently (somehow like batch normalisation)
-        1) `data`: transform all data of each station with shared metrics
-
-        Transformation must be set by the `transformation` attribute. If `transformation = None` is given to `ExperimentSetup`,
-        data is not transformed at all. For all other setups, use the following dictionary structure to specify the
-        transformation.
-        ```
-        transformation = {"scope": <...>,
-                        "method": <...>,
-                        "mean": <...>,
-                        "std": <...>}
-        ExperimentSetup(..., transformation=transformation, ...)
-        ```
-
-        ### scopes
-
-        **station**: mean and std are not used
-
-        **data**: either provide already calculated values for mean and std (if required by transformation method), or choose
-        from different calculation schemes, explained in the mean and std section.
-
-        ### supported transformation methods
-        Currently supported methods are:
-        * standardise (default, if method is not given)
-        * centre
-
-        ### mean and std
-        `"mean"="accurate"`: calculate the accurate values of mean and std (depending on method) by using all data. Although,
-        this method is accurate, it may take some time for the calculation. Furthermore, this could potentially lead to memory
-        issue (not explored yet, but could appear for a very big amount of data)
-
-        `"mean"="estimate"`: estimate mean and std (depending on method). For each station, mean and std are calculated and
-        afterwards aggregated using the mean value over all station-wise metrics. This method is less accurate, especially
-        regarding the std calculation but therefore much faster.
-
-        We recommend to use the later method *estimate* because of following reasons:
-        * much faster calculation
-        * real accuracy of mean and std is less important, because it is "just" a transformation / scaling
-        * accuracy of mean is almost as high as in the *accurate* case, because of
-        $\bar{x_{ij}} = \bar{\left(\bar{x_i}\right)_j}$. The only difference is, that in the *estimate* case, each mean is
-        equally weighted for each station independently of the actual data count of the station.
-        * accuracy of std is lower for *estimate* because of $\var{x_{ij}} \ne \bar{\left(\var{x_i}\right)_j}$, but still the mean of all
-        station-wise std is a decent estimate of the true std.
-
-        `"mean"=<value, e.g. xr.DataArray>`: If mean and std are already calculated or shall be set manually, just add the
-        scaling values instead of the calculation method. For method *centre*, std can still be None, but is required for the
-        *standardise* method. **Important**: Format of given values **must** match internal data format of DataPreparation
-        class: `xr.DataArray` with `dims=["variables"]` and one value for each variable.
-
+        * Either return new empty DataClass instances if given transformation arg is None,
+        * or return given object twice if transformation is a DataClass instance,
+        * or return the inputs and targets attributes if transformation is a TransformationClass instance (default
+          design behaviour)
         """
         if transformation is None:
             return statistics.DataClass(), statistics.DataClass()
diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py
index 4dedceb0a942ffbbf0abba530798898b40281ed4..e6dde10bf6bd13013fa454eadd1a7976c00dd3e2 100644
--- a/mlair/data_handler/default_data_handler.py
+++ b/mlair/data_handler/default_data_handler.py
@@ -217,6 +217,30 @@ class DefaultDataHandler(AbstractDataHandler):
 
     @classmethod
     def transformation(cls, set_stations, **kwargs):
+        """
+        ### supported transformation methods
+
+        Currently supported methods are:
+
+        * standardise (default, if method is not given)
+        * centre
+
+        ### mean and std estimation
+
+        Mean and std (depending on method) are estimated. For each station, mean and std are calculated and afterwards
+        aggregated using the mean value over all station-wise metrics. This method is not exactly accurate, especially
+        regarding the std calculation but therefore much faster. Furthermore, it is a weighted mean weighted by the
+        time series length / number of data itself - a longer time series has more influence on the transformation
+        settings than a short time series. The estimation of the std in less accurate, because the unweighted mean of
+        all stds in not equal to the true std, but still the mean of all station-wise std is a decent estimate. Finally,
+        the real accuracy of mean and std is less important, because it is "just" a transformation / scaling.
+
+        ### mean and std given
+
+        If mean and std are not None, the default data handler expects this parameters to match the data and applies
+        this values to the data. Make sure that all dimensions and/or coordinates are in agreement.
+        """
+
         sp_keys = {k: copy.deepcopy(kwargs[k]) for k in cls._requirements if k in kwargs}
         transformation_class = sp_keys.get("transformation", None)
         if transformation_class is None: