From dda93fe3f1f5e7a5008eebce30ce03bdea345536 Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz-juelich.de> Date: Tue, 7 Apr 2020 15:46:11 +0200 Subject: [PATCH] added table reporting to latex and markdown --- requirements.txt | 1 + requirements_gpu.txt | 1 + src/run_modules/pre_processing.py | 52 ++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e7c2f439..b46f4441 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,6 +53,7 @@ seaborn==0.10.0 --no-binary shapely Shapely==1.7.0 six==1.11.0 statsmodels==0.11.1 +tabulate tensorboard==1.13.1 tensorflow-estimator==1.13.0 tensorflow==1.13.1 diff --git a/requirements_gpu.txt b/requirements_gpu.txt index 9d1c2d62..6ce4df8f 100644 --- a/requirements_gpu.txt +++ b/requirements_gpu.txt @@ -53,6 +53,7 @@ seaborn==0.10.0 --no-binary shapely Shapely==1.7.0 six==1.11.0 statsmodels==0.11.1 +tabulate tensorboard==1.13.1 tensorflow-estimator==1.13.0 tensorflow-gpu==1.13.1 diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py index b5de28b3..54035402 100644 --- a/src/run_modules/pre_processing.py +++ b/src/run_modules/pre_processing.py @@ -3,10 +3,14 @@ __date__ = '2019-11-25' import logging +import os from typing import Tuple, Dict, List +import numpy as np +import pandas as pd + from src.data_handling.data_generator import DataGenerator -from src.helpers import TimeTracking +from src.helpers import TimeTracking, check_path_and_create from src.join import EmptyQueryResult from src.run_modules.run_environment import RunEnvironment @@ -54,6 +58,52 @@ class PreProcessing(RunEnvironment): logging.debug(f"Number of test stations: {n_test}") logging.debug(f"TEST SHAPE OF GENERATOR CALL: {self.data_store.get('generator', 'test')[0][0].shape}" f"{self.data_store.get('generator', 'test')[0][1].shape}") + self.create_latex_report() + + def create_latex_report(self): + """ + This function creates a latex table containing the Station IDs as index, and number of valid data points per + station per subset as well as used_meta_data: + could look like this + \begin{tabular}{llrrrlll} + \toprule + {} & station\_name & station\_lon & station\_lat & station\_alt & train & val & test \\ + \midrule + DENW094 & Aachen-Burtscheid & 6.0939 & 50.7547 & 205.0 & 1875 & 584 & 1032 \\ + DEBW029 & Aalen & 10.0963 & 48.8479 & 424.0 & 2958 & 715 & 1080 \\ + DENI052 & Allertal & 9.6230 & 52.8294 & 38.0 & 2790 & 497 & 1080 \\ + """ + meta_data = ['station_name', 'station_lon', 'station_lat', 'station_alt'] + meta_round = ["station_lon", "station_lat", "station_alt"] + precision = 4 + path = self.data_store.get("experiment_path") + path = os.path.join(path, "latex_report") + check_path_and_create(path) + table_name = "test.tex" + data_train: DataGenerator = self.data_store.get('generator', 'train') + data_val: DataGenerator = self.data_store.get('generator', 'val') + data_test: DataGenerator = self.data_store.get('generator', 'test') + + df = pd.DataFrame(columns=meta_data+["train", "val", "test"]) + for k, data in zip(["train", "val", "test"], [data_train, data_val, data_test]): + stations = data.stations + for station in stations: + df.loc[station, k] = data.get_data_generator(station).get_transposed_label().shape[0] + if df.loc[station, meta_data].isnull().any(): + df.loc[station, meta_data] = data.get_data_generator(station).meta.loc[meta_data].values.flatten() + df.loc["# Samples", k] = df.loc[:, k].sum() + df.loc["# Stations", k] = df.loc[:, k].count() + df[meta_round] = df[meta_round].astype(float).round(precision) + df.sort_index(inplace=True) + df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], ) + df.index.name = 'stat. ID' + column_format = np.repeat('c', df.shape[1]+1) + column_format[0] = 'l' + column_format[-1] = 'r' + column_format = ''.join(column_format.tolist()) + df.to_latex(os.path.join(path, "test.tex"), na_rep='---', column_format=column_format) + df.to_markdown(open(os.path.join(path, "test.md"), "w"), tablefmt="github") + df.drop(meta_data, axis=1).to_latex(os.path.join(path, "test_short.tex"), na_rep='---', column_format=column_format) def split_train_val_test(self) -> None: """ -- GitLab