diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py
index 147f480cda6c9f6466057bdc5cb152076e0f7132..551ea599a3114b7b97f5bcb146cf6e131e324eb5 100644
--- a/src/run_modules/pre_processing.py
+++ b/src/run_modules/pre_processing.py
@@ -62,37 +62,42 @@ class PreProcessing(RunEnvironment):
 
     def create_latex_report(self):
         """
-        This function creates a latex table containing the Station IDs as index, and number of valid data points per
-        station per subset as well as used_meta_data:
-        could look like this
-        \begin{tabular}{llrrrlll}
-        \toprule
-        {} &                 station\_name &  station\_lon &  station\_lat &  station\_alt & train &  val &  test \\
-        \midrule
-        DENW094 &            Aachen-Burtscheid &       6.0939 &      50.7547 &        205.0 &  1875 &  584 &  1032 \\
-        DEBW029 &                        Aalen &      10.0963 &      48.8479 &        424.0 &  2958 &  715 &  1080 \\
-        DENI052 &                     Allertal &       9.6230 &      52.8294 &         38.0 &  2790 &  497 &  1080 \\
+        This function creates tables with information on the station meta data and a summary on subset sample sizes.
+
+        * station_sample_size.md: see table below
+        * station_sample_size.tex: same as table below, but as latex table
+        * station_sample_size_short.tex: reduced size table without any meta data besides station ID, as latex table
+
+        All tables are stored inside experiment_path inside the folder latex_report. The table format (e.g. which meta
+        data is highlighted) is currently hardcoded to have a stable table style. If further styles are needed, it is
+        better to add an additional style than modifying the existing table styles.
+
+        | stat. ID   | station_name                              |   station_lon |   station_lat |   station_alt |   train |   val |   test |
+        |------------|-------------------------------------------|---------------|---------------|---------------|---------|-------|--------|
+        | DEBW013    | Stuttgart Bad Cannstatt                   |        9.2297 |       48.8088 |           235 |    1434 |   712 |   1080 |
+        | DEBW076    | Baden-Baden                               |        8.2202 |       48.7731 |           148 |    3037 |   722 |    710 |
+        | DEBW087    | Schwäbische_Alb                           |        9.2076 |       48.3458 |           798 |    3044 |   714 |   1087 |
+        | DEBW107    | Tübingen                                  |        9.0512 |       48.5077 |           325 |    1803 |   715 |   1087 |
+        | DEBY081    | Garmisch-Partenkirchen/Kreuzeckbahnstraße |       11.0631 |       47.4764 |           735 |    2935 |   525 |    714 |
+        | # Stations | nan                                       |      nan      |      nan      |           nan |       6 |     6 |      6 |
+        | # Samples  | nan                                       |      nan      |      nan      |           nan |   12253 |  3388 |   4678 |
+
         """
         meta_data = ['station_name', 'station_lon', 'station_lat', 'station_alt']
         meta_round = ["station_lon", "station_lat", "station_alt"]
         precision = 4
-        path = self.data_store.get("experiment_path")
-        path = os.path.join(path, "latex_report")
+        path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
         check_path_and_create(path)
-        table_name = "test.tex"
-        data_train: DataGenerator = self.data_store.get('generator', 'train')
-        data_val: DataGenerator = self.data_store.get('generator', 'val')
-        data_test: DataGenerator = self.data_store.get('generator', 'test')
-
-        df = pd.DataFrame(columns=meta_data+["train", "val", "test"])
-        for k, data in zip(["train", "val", "test"], [data_train, data_val, data_test]):
-            stations = data.stations
-            for station in stations:
-                df.loc[station, k] = data.get_data_generator(station).get_transposed_label().shape[0]
+        set_names = ["train", "val", "test"]
+        df = pd.DataFrame(columns=meta_data+set_names)
+        for set_name in set_names:
+            data: DataGenerator = self.data_store.get("generator", set_name)
+            for station in data.stations:
+                df.loc[station, set_name] = data.get_data_generator(station).get_transposed_label().shape[0]
                 if df.loc[station, meta_data].isnull().any():
                     df.loc[station, meta_data] = data.get_data_generator(station).meta.loc[meta_data].values.flatten()
-            df.loc["# Samples", k] = df.loc[:, k].sum()
-            df.loc["# Stations", k] = df.loc[:, k].count()
+            df.loc["# Samples", set_name] = df.loc[:, set_name].sum()
+            df.loc["# Stations", set_name] = df.loc[:, set_name].count()
         df[meta_round] = df[meta_round].astype(float).round(precision)
         df.sort_index(inplace=True)
         df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
@@ -101,9 +106,10 @@ class PreProcessing(RunEnvironment):
         column_format[0] = 'l'
         column_format[-1] = 'r'
         column_format = ''.join(column_format.tolist())
-        df.to_latex(os.path.join(path, "test.tex"), na_rep='---', column_format=column_format)
-        df.to_markdown(open(os.path.join(path, "test.md"), mode="w", encoding='utf-8'), tablefmt="github")
-        df.drop(meta_data, axis=1).to_latex(os.path.join(path, "test_short.tex"), na_rep='---', column_format=column_format)
+        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
+        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github")
+        df.drop(meta_data, axis=1).to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
+                                            column_format=column_format)
 
     def split_train_val_test(self) -> None:
         """