diff --git a/mlair/helpers/tables.py b/mlair/helpers/tables.py new file mode 100644 index 0000000000000000000000000000000000000000..e7628ba4f88f56a80eb321a3210d4699148fc485 --- /dev/null +++ b/mlair/helpers/tables.py @@ -0,0 +1,24 @@ +import pandas as pd +import numpy as np +import os + + +def create_column_format_for_tex(df: pd.DataFrame) -> str: + """ + Creates column format for latex table based on the shape of a given DataFrame. + + Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' + """ + column_format = np.repeat('c', df.shape[1] + 1) + column_format[0] = 'l' + column_format[-1] = 'r' + column_format = ''.join(column_format.tolist()) + return column_format + + +def save_to_tex(path, filename, column_format, df, na_rep='---'): + df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format) + + +def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"): + df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), tablefmt=tablefmt) diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index cdf195e705238252b117955ab1959c4177cbd17a..813873b8181fcb78917c5ef4e697da63b2941845 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -14,7 +14,7 @@ import numpy as np import pandas as pd from mlair.data_handler import DataCollection, AbstractDataHandler -from mlair.helpers import TimeTracking, to_list +from mlair.helpers import TimeTracking, to_list, tables from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment @@ -119,19 +119,20 @@ class PreProcessing(RunEnvironment): path_config.check_path_and_create(path) names_of_set = ["train", "val", "test"] df = self.create_info_df(meta_data, meta_round, names_of_set, precision) - column_format = self.create_column_format_for_tex(df) - self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df) - self.save_to_md(path=path, filename="station_sample_size.md", df=df) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df) + tables.save_to_md(path=path, filename="station_sample_size.md", df=df) df_nometa = df.drop(meta_data, axis=1) - column_format = self.create_column_format_for_tex(df) - self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa) - self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, + df=df_nometa) + tables.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa) # df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---', # column_format=column_format) df_descr = self.create_describe_df(df_nometa) - column_format = self.create_column_format_for_tex(df_descr) - self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr) - self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr) + column_format = tables.create_column_format_for_tex(df_descr) + tables.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr) + tables.save_to_md(path=path, filename="station_describe_short.md", df=df_descr) # df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format) @staticmethod @@ -147,15 +148,6 @@ class PreProcessing(RunEnvironment): df_descr = df_descr[df_descr_colnames] return df_descr - @staticmethod - def save_to_tex(path, filename, column_format, df, na_rep='---'): - df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format) - - @staticmethod - def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"): - df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), - tablefmt=tablefmt) - def create_info_df(self, meta_data, meta_round, names_of_set, precision): df = pd.DataFrame(columns=meta_data + names_of_set) for set_name in names_of_set: @@ -174,19 +166,6 @@ class PreProcessing(RunEnvironment): df.index.name = 'stat. ID' return df - @staticmethod - def create_column_format_for_tex(df: pd.DataFrame) -> str: - """ - Creates column format for latex table based on the shape of a given DataFrame. - - Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' - """ - column_format = np.repeat('c', df.shape[1] + 1) - column_format[0] = 'l' - column_format[-1] = 'r' - column_format = ''.join(column_format.tolist()) - return column_format - def split_train_val_test(self) -> None: """ Split data into subsets.