Skip to content
Snippets Groups Projects
Commit dda93fe3 authored by lukas leufen's avatar lukas leufen
Browse files

added table reporting to latex and markdown

parent 26d51f3d
No related branches found
No related tags found
3 merge requests!90WIP: new release update,!89Resolve "release branch / CI on gpu",!86Resolve "Extract number of examples per data-set per station"
Pipeline #33908 failed
......@@ -53,6 +53,7 @@ seaborn==0.10.0
--no-binary shapely Shapely==1.7.0
six==1.11.0
statsmodels==0.11.1
tabulate
tensorboard==1.13.1
tensorflow-estimator==1.13.0
tensorflow==1.13.1
......
......@@ -53,6 +53,7 @@ seaborn==0.10.0
--no-binary shapely Shapely==1.7.0
six==1.11.0
statsmodels==0.11.1
tabulate
tensorboard==1.13.1
tensorflow-estimator==1.13.0
tensorflow-gpu==1.13.1
......
......@@ -3,10 +3,14 @@ __date__ = '2019-11-25'
import logging
import os
from typing import Tuple, Dict, List
import numpy as np
import pandas as pd
from src.data_handling.data_generator import DataGenerator
from src.helpers import TimeTracking
from src.helpers import TimeTracking, check_path_and_create
from src.join import EmptyQueryResult
from src.run_modules.run_environment import RunEnvironment
......@@ -54,6 +58,52 @@ class PreProcessing(RunEnvironment):
logging.debug(f"Number of test stations: {n_test}")
logging.debug(f"TEST SHAPE OF GENERATOR CALL: {self.data_store.get('generator', 'test')[0][0].shape}"
f"{self.data_store.get('generator', 'test')[0][1].shape}")
self.create_latex_report()
def create_latex_report(self):
"""
This function creates a latex table containing the Station IDs as index, and number of valid data points per
station per subset as well as used_meta_data:
could look like this
\begin{tabular}{llrrrlll}
\toprule
{} & station\_name & station\_lon & station\_lat & station\_alt & train & val & test \\
\midrule
DENW094 & Aachen-Burtscheid & 6.0939 & 50.7547 & 205.0 & 1875 & 584 & 1032 \\
DEBW029 & Aalen & 10.0963 & 48.8479 & 424.0 & 2958 & 715 & 1080 \\
DENI052 & Allertal & 9.6230 & 52.8294 & 38.0 & 2790 & 497 & 1080 \\
"""
meta_data = ['station_name', 'station_lon', 'station_lat', 'station_alt']
meta_round = ["station_lon", "station_lat", "station_alt"]
precision = 4
path = self.data_store.get("experiment_path")
path = os.path.join(path, "latex_report")
check_path_and_create(path)
table_name = "test.tex"
data_train: DataGenerator = self.data_store.get('generator', 'train')
data_val: DataGenerator = self.data_store.get('generator', 'val')
data_test: DataGenerator = self.data_store.get('generator', 'test')
df = pd.DataFrame(columns=meta_data+["train", "val", "test"])
for k, data in zip(["train", "val", "test"], [data_train, data_val, data_test]):
stations = data.stations
for station in stations:
df.loc[station, k] = data.get_data_generator(station).get_transposed_label().shape[0]
if df.loc[station, meta_data].isnull().any():
df.loc[station, meta_data] = data.get_data_generator(station).meta.loc[meta_data].values.flatten()
df.loc["# Samples", k] = df.loc[:, k].sum()
df.loc["# Stations", k] = df.loc[:, k].count()
df[meta_round] = df[meta_round].astype(float).round(precision)
df.sort_index(inplace=True)
df = df.reindex(df.index.drop(["# Stations", "# Samples"]).to_list() + ["# Stations", "# Samples"], )
df.index.name = 'stat. ID'
column_format = np.repeat('c', df.shape[1]+1)
column_format[0] = 'l'
column_format[-1] = 'r'
column_format = ''.join(column_format.tolist())
df.to_latex(os.path.join(path, "test.tex"), na_rep='---', column_format=column_format)
df.to_markdown(open(os.path.join(path, "test.md"), "w"), tablefmt="github")
df.drop(meta_data, axis=1).to_latex(os.path.join(path, "test_short.tex"), na_rep='---', column_format=column_format)
def split_train_val_test(self) -> None:
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment