From 70670eb36ae225fba38e1951b0626f6dbba5eb37 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Mon, 23 Nov 2020 16:33:08 +0100
Subject: [PATCH] include .tex table:'station_describe_short'

---
 mlair/run_modules/pre_processing.py | 31 ++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py
index 4cee4a97..e95be2d5 100644
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -128,15 +128,36 @@ class PreProcessing(RunEnvironment):
         df.sort_index(inplace=True)
         df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
         df.index.name = 'stat. ID'
+        column_format = self.create_column_format_for_tex(df)
+        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
+        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github")
+        df_nometa = df.drop(meta_data, axis=1)
+        df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
+                           column_format=column_format)
+        df_descr = df_nometa.iloc[:-2].astype('float32').describe(
+            percentiles=[.05, .1, .25, .5, .75, .9, .95]).astype('int32')
+        df_descr = pd.concat([df_nometa.loc[['# Samples']], df_descr]).T
+        df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True)
+        df_descr_colnames = list(df_descr.columns)
+        df_descr_colnames = [df_descr_colnames[1]] + [df_descr_colnames[0]] + df_descr_colnames[2:]
+        df_descr = df_descr[df_descr_colnames]
+        column_format = self.create_column_format_for_tex(df_descr)
+        df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
+
+
+    @staticmethod
+    def create_column_format_for_tex(df: pd.DataFrame) -> str:
+        """
+        Creates column format for latex table based on the shape of a given DataFrame.
+
+        Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
+        """
         column_format = np.repeat('c', df.shape[1] + 1)
         column_format[0] = 'l'
         column_format[-1] = 'r'
         column_format = ''.join(column_format.tolist())
-        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
-        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'),
-                       tablefmt="github")
-        df.drop(meta_data, axis=1).to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
-                                            column_format=column_format)
+        return column_format
+
 
     def split_train_val_test(self) -> None:
         """
-- 
GitLab