From e54ef5410152cf9b377f655bd4dd3cbbceea9421 Mon Sep 17 00:00:00 2001
From: leufen1 <l.leufen@fz-juelich.de>
Date: Wed, 3 Mar 2021 18:00:54 +0100
Subject: [PATCH] moved table creation to new module

---
 mlair/helpers/tables.py             | 24 ++++++++++++++++
 mlair/run_modules/pre_processing.py | 43 ++++++++---------------------
 2 files changed, 35 insertions(+), 32 deletions(-)
 create mode 100644 mlair/helpers/tables.py

diff --git a/mlair/helpers/tables.py b/mlair/helpers/tables.py
new file mode 100644
index 00000000..e7628ba4
--- /dev/null
+++ b/mlair/helpers/tables.py
@@ -0,0 +1,24 @@
+import pandas as pd
+import numpy as np
+import os
+
+
+def create_column_format_for_tex(df: pd.DataFrame) -> str:
+    """
+    Creates column format for latex table based on the shape of a given DataFrame.
+
+    Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
+    """
+    column_format = np.repeat('c', df.shape[1] + 1)
+    column_format[0] = 'l'
+    column_format[-1] = 'r'
+    column_format = ''.join(column_format.tolist())
+    return column_format
+
+
+def save_to_tex(path, filename, column_format, df, na_rep='---'):
+    df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format)
+
+
+def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"):
+    df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), tablefmt=tablefmt)
diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index cdf195e7..813873b8 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -14,7 +14,7 @@ import numpy as np
 import pandas as pd
 
 from mlair.data_handler import DataCollection, AbstractDataHandler
-from mlair.helpers import TimeTracking, to_list
+from mlair.helpers import TimeTracking, to_list, tables
 from mlair.configuration import path_config
 from mlair.helpers.join import EmptyQueryResult
 from mlair.run_modules.run_environment import RunEnvironment
@@ -119,19 +119,20 @@ class PreProcessing(RunEnvironment):
         path_config.check_path_and_create(path)
         names_of_set = ["train", "val", "test"]
         df = self.create_info_df(meta_data, meta_round, names_of_set, precision)
-        column_format = self.create_column_format_for_tex(df)
-        self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df)
-        self.save_to_md(path=path, filename="station_sample_size.md", df=df)
+        column_format = tables.create_column_format_for_tex(df)
+        tables.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df)
+        tables.save_to_md(path=path, filename="station_sample_size.md", df=df)
         df_nometa = df.drop(meta_data, axis=1)
-        column_format = self.create_column_format_for_tex(df)
-        self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa)
-        self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa)
+        column_format = tables.create_column_format_for_tex(df)
+        tables.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format,
+                           df=df_nometa)
+        tables.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa)
         # df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
         #                    column_format=column_format)
         df_descr = self.create_describe_df(df_nometa)
-        column_format = self.create_column_format_for_tex(df_descr)
-        self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr)
-        self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr)
+        column_format = tables.create_column_format_for_tex(df_descr)
+        tables.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr)
+        tables.save_to_md(path=path, filename="station_describe_short.md", df=df_descr)
         # df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
 
     @staticmethod
@@ -147,15 +148,6 @@ class PreProcessing(RunEnvironment):
         df_descr = df_descr[df_descr_colnames]
         return df_descr
 
-    @staticmethod
-    def save_to_tex(path, filename, column_format, df, na_rep='---'):
-        df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format)
-
-    @staticmethod
-    def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"):
-        df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding),
-                       tablefmt=tablefmt)
-
     def create_info_df(self, meta_data, meta_round, names_of_set, precision):
         df = pd.DataFrame(columns=meta_data + names_of_set)
         for set_name in names_of_set:
@@ -174,19 +166,6 @@ class PreProcessing(RunEnvironment):
         df.index.name = 'stat. ID'
         return df
 
-    @staticmethod
-    def create_column_format_for_tex(df: pd.DataFrame) -> str:
-        """
-        Creates column format for latex table based on the shape of a given DataFrame.
-
-        Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
-        """
-        column_format = np.repeat('c', df.shape[1] + 1)
-        column_format[0] = 'l'
-        column_format[-1] = 'r'
-        column_format = ''.join(column_format.tolist())
-        return column_format
-
     def split_train_val_test(self) -> None:
         """
         Split data into subsets.
-- 
GitLab