From 8c05b32752bcc1ae355c8b5c35d3fec563cb65cb Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Thu, 29 Oct 2020 07:41:33 +0100
Subject: [PATCH] update tex report

---
 src/run_modules/pre_processing.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py
index 551ea599..5731b7a6 100644
--- a/src/run_modules/pre_processing.py
+++ b/src/run_modules/pre_processing.py
@@ -102,14 +102,32 @@ class PreProcessing(RunEnvironment):
         df.sort_index(inplace=True)
         df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
         df.index.name = 'stat. ID'
-        column_format = np.repeat('c', df.shape[1]+1)
+        column_format = self.create_column_format_for_tex(df)
+        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
+        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github")
+        df_nometa = df.drop(meta_data, axis=1)
+        df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
+                           column_format=column_format)
+        df_descr = df_nometa.iloc[:-2].astype('float32').describe(
+            percentiles=[.05, .1, .25, .5, .75, .9, .95]).astype('int32')
+        df_descr = pd.concat([df_nometa.loc[['# Samples']], df_descr]).T
+        df_descr.rename(columns={"# Samples": "sum"}, inplace=True)
+        column_format = self.create_column_format_for_tex(df_descr)
+        df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---',
+                          column_format=column_format)
+
+    @staticmethod
+    def create_column_format_for_tex(df: pd.DataFrame) -> str:
+        """
+        Creates column format for latex table based on the shape of a given DataFrame.
+
+        Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
+        """
+        column_format = np.repeat('c', df.shape[1] + 1)
         column_format[0] = 'l'
         column_format[-1] = 'r'
         column_format = ''.join(column_format.tolist())
-        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
-        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github")
-        df.drop(meta_data, axis=1).to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
-                                            column_format=column_format)
+        return column_format
 
     def split_train_val_test(self) -> None:
         """
-- 
GitLab