include .tex table:'station_describe_short'

70670eb3 · Felix Kleinert · 1aec0e47 · 70670eb3
Commit 70670eb3 authored 4 years ago by Felix Kleinert
--- a/mlair/run_modules/pre_processing.py
+++ b/mlair/run_modules/pre_processing.py
@@ -128,15 +128,36 @@ class PreProcessing(RunEnvironment):
        df.sort_index(inplace=True)
        df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
        df.index.name = 'stat. ID'
+        column_format = self.create_column_format_for_tex(df)
+        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
+        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github")
+        df_nometa = df.drop(meta_data, axis=1)
+        df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
+                           column_format=column_format)
+        df_descr = df_nometa.iloc[:-2].astype('float32').describe(
+            percentiles=[.05, .1, .25, .5, .75, .9, .95]).astype('int32')
+        df_descr = pd.concat([df_nometa.loc[['# Samples']], df_descr]).T
+        df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True)
+        df_descr_colnames = list(df_descr.columns)
+        df_descr_colnames = [df_descr_colnames[1]] + [df_descr_colnames[0]] + df_descr_colnames[2:]
+        df_descr = df_descr[df_descr_colnames]
+        column_format = self.create_column_format_for_tex(df_descr)
+        df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format)
+    @staticmethod
+    def create_column_format_for_tex(df: pd.DataFrame) -> str:
+        """
+        Creates column format for latex table based on the shape of a given DataFrame.
+        Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r'
+        """
        column_format = np.repeat('c', df.shape[1] + 1)
        column_format[0] = 'l'
        column_format[-1] = 'r'
        column_format = ''.join(column_format.tolist())
-        df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format)
+        return column_format
-        df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'),
-                       tablefmt="github")
-        df.drop(meta_data, axis=1).to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---',
-                                            column_format=column_format)
    def split_train_val_test(self) -> None:
        """