From 70670eb36ae225fba38e1951b0626f6dbba5eb37 Mon Sep 17 00:00:00 2001 From: Felix Kleinert <f.kleinert@fz-juelich.de> Date: Mon, 23 Nov 2020 16:33:08 +0100 Subject: [PATCH] include .tex table:'station_describe_short' --- mlair/run_modules/pre_processing.py | 31 ++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index 4cee4a97..e95be2d5 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -128,15 +128,36 @@ class PreProcessing(RunEnvironment): df.sort_index(inplace=True) df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], ) df.index.name = 'stat. ID' + column_format = self.create_column_format_for_tex(df) + df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format) + df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), tablefmt="github") + df_nometa = df.drop(meta_data, axis=1) + df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---', + column_format=column_format) + df_descr = df_nometa.iloc[:-2].astype('float32').describe( + percentiles=[.05, .1, .25, .5, .75, .9, .95]).astype('int32') + df_descr = pd.concat([df_nometa.loc[['# Samples']], df_descr]).T + df_descr.rename(columns={"# Samples": "no. samples", "count": "no. stations"}, inplace=True) + df_descr_colnames = list(df_descr.columns) + df_descr_colnames = [df_descr_colnames[1]] + [df_descr_colnames[0]] + df_descr_colnames[2:] + df_descr = df_descr[df_descr_colnames] + column_format = self.create_column_format_for_tex(df_descr) + df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format) + + + @staticmethod + def create_column_format_for_tex(df: pd.DataFrame) -> str: + """ + Creates column format for latex table based on the shape of a given DataFrame. + + Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' + """ column_format = np.repeat('c', df.shape[1] + 1) column_format[0] = 'l' column_format[-1] = 'r' column_format = ''.join(column_format.tolist()) - df.to_latex(os.path.join(path, "station_sample_size.tex"), na_rep='---', column_format=column_format) - df.to_markdown(open(os.path.join(path, "station_sample_size.md"), mode="w", encoding='utf-8'), - tablefmt="github") - df.drop(meta_data, axis=1).to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---', - column_format=column_format) + return column_format + def split_train_val_test(self) -> None: """ -- GitLab