From e54ef5410152cf9b377f655bd4dd3cbbceea9421 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Wed, 3 Mar 2021 18:00:54 +0100 Subject: [PATCH] moved table creation to new module --- mlair/helpers/tables.py | 24 ++++++++++++++++ mlair/run_modules/pre_processing.py | 43 ++++++++--------------------- 2 files changed, 35 insertions(+), 32 deletions(-) create mode 100644 mlair/helpers/tables.py diff --git a/mlair/helpers/tables.py b/mlair/helpers/tables.py new file mode 100644 index 00000000..e7628ba4 --- /dev/null +++ b/mlair/helpers/tables.py @@ -0,0 +1,24 @@ +import pandas as pd +import numpy as np +import os + + +def create_column_format_for_tex(df: pd.DataFrame) -> str: + """ + Creates column format for latex table based on the shape of a given DataFrame. + + Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' + """ + column_format = np.repeat('c', df.shape[1] + 1) + column_format[0] = 'l' + column_format[-1] = 'r' + column_format = ''.join(column_format.tolist()) + return column_format + + +def save_to_tex(path, filename, column_format, df, na_rep='---'): + df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format) + + +def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"): + df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), tablefmt=tablefmt) diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index cdf195e7..813873b8 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -14,7 +14,7 @@ import numpy as np import pandas as pd from mlair.data_handler import DataCollection, AbstractDataHandler -from mlair.helpers import TimeTracking, to_list +from mlair.helpers import TimeTracking, to_list, tables from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment @@ -119,19 +119,20 @@ class PreProcessing(RunEnvironment): path_config.check_path_and_create(path) names_of_set = ["train", "val", "test"] df = self.create_info_df(meta_data, meta_round, names_of_set, precision) - column_format = self.create_column_format_for_tex(df) - self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df) - self.save_to_md(path=path, filename="station_sample_size.md", df=df) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df) + tables.save_to_md(path=path, filename="station_sample_size.md", df=df) df_nometa = df.drop(meta_data, axis=1) - column_format = self.create_column_format_for_tex(df) - self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa) - self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, + df=df_nometa) + tables.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa) # df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---', # column_format=column_format) df_descr = self.create_describe_df(df_nometa) - column_format = self.create_column_format_for_tex(df_descr) - self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr) - self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr) + column_format = tables.create_column_format_for_tex(df_descr) + tables.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr) + tables.save_to_md(path=path, filename="station_describe_short.md", df=df_descr) # df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format) @staticmethod @@ -147,15 +148,6 @@ class PreProcessing(RunEnvironment): df_descr = df_descr[df_descr_colnames] return df_descr - @staticmethod - def save_to_tex(path, filename, column_format, df, na_rep='---'): - df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format) - - @staticmethod - def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"): - df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), - tablefmt=tablefmt) - def create_info_df(self, meta_data, meta_round, names_of_set, precision): df = pd.DataFrame(columns=meta_data + names_of_set) for set_name in names_of_set: @@ -174,19 +166,6 @@ class PreProcessing(RunEnvironment): df.index.name = 'stat. ID' return df - @staticmethod - def create_column_format_for_tex(df: pd.DataFrame) -> str: - """ - Creates column format for latex table based on the shape of a given DataFrame. - - Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' - """ - column_format = np.repeat('c', df.shape[1] + 1) - column_format[0] = 'l' - column_format[-1] = 'r' - column_format = ''.join(column_format.tolist()) - return column_format - def split_train_val_test(self) -> None: """ Split data into subsets. -- GitLab