diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index e95be2d5301edcfb4f7b7f5eb386f58a736f1495..bc0ad1bf2fcb800259c86d0106921f6888030f86 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -113,9 +113,47 @@ class PreProcessing(RunEnvironment):
         precision = 4
         path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
         path_config.check_path_and_create(path)
-        set_names = ["train", "val", "test"]
-        df = pd.DataFrame(columns=meta_data + set_names)
-        for set_name in set_names:
+        names_of_set = ["train", "val", "test"]
+        df = self.create_info_df(meta_data, meta_round, names_of_set, precision)
+        column_format = self.create_column_format_for_tex(df)
+        self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df)
+        self.save_to_md(path=path, filename="station_sample_size.md", df=df)
+        df_nometa = df.drop(meta_data, axis=1)
+        self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa)
+        self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa)
+        # df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
+        #                    column_format=column_format)
+        df_descr = self.create_describe_df(df_nometa)
+        column_format = self.create_column_format_for_tex(df_descr)
+        self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr)
+        self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr)
+        # df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
+
+    @staticmethod
+    def create_describe_df(df, percentiles=None, ignore_last_lines: int = 2):
+        if percentiles is None:
+            percentiles = [.05, .1, .25, .5, .75, .9, .95]
+        df_descr = df.iloc[:-ignore_last_lines].astype('float32').describe(
+            percentiles=percentiles).astype('int32')
+        df_descr = pd.concat([df.loc[['# Samples']], df_descr]).T
+        df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True)
+        df_descr_colnames = list(df_descr.columns)
+        df_descr_colnames = [df_descr_colnames[1]] + [df_descr_colnames[0]] + df_descr_colnames[2:]
+        df_descr = df_descr[df_descr_colnames]
+        return df_descr
+
+    @staticmethod
+    def save_to_tex(path, filename, column_format, df, na_rep='---'):
+        df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format)
+
+    @staticmethod
+    def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"):
+        df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding),
+                       tablefmt=tablefmt)
+
+    def create_info_df(self, meta_data, meta_round, names_of_set, precision):
+        df = pd.DataFrame(columns=meta_data + names_of_set)
+        for set_name in names_of_set:
             data = self.data_store.get("data_collection", set_name)
             for station in data:
                 station_name = str(station.id_class)
@@ -128,22 +166,7 @@ class PreProcessing(RunEnvironment):
         df.sort_index(inplace=True)
         df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
         df.index.name = 'stat. ID'
-        column_format = self.create_column_format_for_tex(df)
-        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
-        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github")
-        df_nometa = df.drop(meta_data, axis=1)
-        df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
-                           column_format=column_format)
-        df_descr = df_nometa.iloc[:-2].astype('float32').describe(
-            percentiles=[.05, .1, .25, .5, .75, .9, .95]).astype('int32')
-        df_descr = pd.concat([df_nometa.loc[['# Samples']], df_descr]).T
-        df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True)
-        df_descr_colnames = list(df_descr.columns)
-        df_descr_colnames = [df_descr_colnames[1]] + [df_descr_colnames[0]] + df_descr_colnames[2:]
-        df_descr = df_descr[df_descr_colnames]
-        column_format = self.create_column_format_for_tex(df_descr)
-        df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
-
+        return df
 
     @staticmethod
     def create_column_format_for_tex(df: pd.DataFrame) -> str:
diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py
index bdb8fdabff67ad894275c805522b9df4cf167011..5e89beff72ed659d2ba20045af25612e68ce8d22 100644
--- a/test/test_run_modules/test_pre_processing.py
+++ b/test/test_run_modules/test_pre_processing.py
@@ -8,6 +8,8 @@ from mlair.helpers import PyTestRegex
 from mlair.run_modules.experiment_setup import ExperimentSetup
 from mlair.run_modules.pre_processing import PreProcessing
 from mlair.run_modules.run_environment import RunEnvironment
+import pandas as pd
+import numpy as np
 
 
 class TestPreProcessing:
@@ -115,3 +117,38 @@ class TestPreProcessing:
         assert pre.transformation(data_preparation, stations) is None
         class data_preparation_no_trans: pass
         assert pre.transformation(data_preparation_no_trans, stations) is None
+
+    @pytest.fixture
+    def dummy_df(self):
+        data_dict = {'station_name': {'DEBW013': 'Stuttgart Bad Cannstatt', 'DEBW076': 'Baden-Baden',
+                                      'DEBW087': 'Schwäbische_Alb', 'DEBW107': 'Tübingen',
+                                      'DEBY081': 'Garmisch-Partenkirchen/Kreuzeckbahnstraße', '# Stations': np.nan,
+                                      '# Samples': np.nan},
+                     'station_lon': {'DEBW013': 9.2297, 'DEBW076': 8.2202, 'DEBW087': 9.2076, 'DEBW107': 9.0512,
+                                     'DEBY081': 11.0631, '# Stations': np.nan, '# Samples': np.nan},
+                     'station_lat': {'DEBW013': 48.8088, 'DEBW076': 48.7731, 'DEBW087': 48.3458, 'DEBW107': 48.5077,
+                                     'DEBY081': 47.4764, '# Stations': np.nan, '# Samples': np.nan},
+                     'station_alt': {'DEBW013': 235.0, 'DEBW076': 148.0, 'DEBW087': 798.0, 'DEBW107': 325.0,
+                                     'DEBY081': 735.0, '# Stations': np.nan, '# Samples': np.nan},
+                     'train': {'DEBW013': 1413, 'DEBW076': 3002, 'DEBW087': 3016, 'DEBW107': 1782, 'DEBY081': 2837,
+                               '# Stations': 6, '# Samples': 12050},
+                     'val': {'DEBW013': 698, 'DEBW076': 715, 'DEBW087': 700, 'DEBW107': 701, 'DEBY081': 456,
+                             '# Stations': 6, '# Samples': 3270},
+                     'test': {'DEBW013': 1066, 'DEBW076': 696, 'DEBW087': 1080, 'DEBW107': 1080, 'DEBY081': 700,
+                              '# Stations': 6, '# Samples': 4622}}
+        df = pd.DataFrame.from_dict(data_dict)
+        return df
+
+    def test_create_column_format_for_tex(self):
+        df = pd.DataFrame(np.ones((2, 1)))
+        df_col = PreProcessing.create_column_format_for_tex(df)  # len: 1+1
+        assert df_col == 'lr'
+        assert len(df_col) == 2
+        df = pd.DataFrame(np.ones((2, 2)))
+        df_col = PreProcessing.create_column_format_for_tex(df)  # len: 2+1
+        assert df_col == 'lcr'
+        assert len(df_col) == 3
+        df = pd.DataFrame(np.ones((2, 3)))
+        df_col = PreProcessing.create_column_format_for_tex(df) # len: 3+1
+        assert df_col == 'lccr'
+        assert len(df_col) == 4