diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index ad6a368fdf7980639802412201e964def80669b2..3631597aedb90b3411163a42490e9c023bad706a 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -196,9 +196,23 @@ def log_apply(data: Data, mean: Data, std: Data) -> Data: return standardise_apply(np.log1p(data), mean, std) -def mean_squared_error(a, b): +def mean_squared_error(a, b, dim=None): """Calculate mean squared error.""" - return np.square(a - b).mean() + return np.square(a - b).mean(dim) + + +def mean_absolute_error(a, b, dim=None): + """Calculate mean absolute error.""" + return np.abs(a - b).mean(dim) + + +def calculate_error_metrics(a, b, dim): + """Calculate MSE, RMSE, and MAE. Additionally return number of used values for calculation.""" + mse = mean_squared_error(a, b, dim) + rmse = np.sqrt(mse) + mae = mean_absolute_error(a, b, dim) + n = (a - b).notnull().sum(dim) + return {"mse": mse, "rmse": rmse, "mae": mae, "n": n} class SkillScores: diff --git a/mlair/helpers/tables.py b/mlair/helpers/tables.py new file mode 100644 index 0000000000000000000000000000000000000000..e7628ba4f88f56a80eb321a3210d4699148fc485 --- /dev/null +++ b/mlair/helpers/tables.py @@ -0,0 +1,24 @@ +import pandas as pd +import numpy as np +import os + + +def create_column_format_for_tex(df: pd.DataFrame) -> str: + """ + Creates column format for latex table based on the shape of a given DataFrame. + + Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' + """ + column_format = np.repeat('c', df.shape[1] + 1) + column_format[0] = 'l' + column_format[-1] = 'r' + column_format = ''.join(column_format.tolist()) + return column_format + + +def save_to_tex(path, filename, column_format, df, na_rep='---'): + df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format) + + +def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"): + df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), tablefmt=tablefmt) diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index 6f78a03d67a0698274eb4795bc8941c590386063..5216157f9c1bcbd586deec46fec65538144d0e28 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -13,9 +13,10 @@ import numpy as np import pandas as pd import xarray as xr +from mlair.configuration import path_config from mlair.data_handler import BootStraps, KerasIterator from mlair.helpers.datastore import NameNotFoundInDataStore -from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list +from mlair.helpers import TimeTracking, statistics, extract_value, remove_items, to_list, tables from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel from mlair.model_modules import AbstractModelClass from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \ @@ -102,9 +103,11 @@ class PostProcessing(RunEnvironment): create_new_bootstraps = self.data_store.get("create_new_bootstraps", "postprocessing") self.bootstrap_postprocessing(create_new_bootstraps) - # skill scores + # skill scores and error metrics with TimeTracking(name="calculate skill scores"): - self.skill_scores = self.calculate_skill_scores() + skill_score_competitive, skill_score_climatological, errors = self.calculate_error_metrics() + self.skill_scores = (skill_score_competitive, skill_score_climatological) + self.report_error_metrics(errors) # plotting self.plot() @@ -386,8 +389,10 @@ class PostProcessing(RunEnvironment): def calculate_test_score(self): """Evaluate test score of model and save locally.""" + + # test scores on transformed data test_score = self.model.evaluate_generator(generator=self.test_data_distributed, - use_multiprocessing=True, verbose=0, steps=1) + use_multiprocessing=True, verbose=0) path = self.data_store.get("model_path") with open(os.path.join(path, "test_scores.txt"), "a") as f: for index, item in enumerate(to_list(test_score)): @@ -656,22 +661,29 @@ class PostProcessing(RunEnvironment): except (TypeError, AttributeError): return forecast if competitor is None else competitor - def calculate_skill_scores(self) -> Tuple[Dict, Dict]: + def calculate_error_metrics(self) -> Tuple[Dict, Dict, Dict]: """ - Calculate skill scores of NN forecast. + Calculate error metrics and skill scores of NN forecast. The competitive skill score compares the NN prediction with persistence and ordinary least squares forecasts. Whereas, the climatological skill scores evaluates the NN prediction in terms of meaningfulness in comparison to different climatological references. - :return: competitive and climatological skill scores + :return: competitive and climatological skill scores, error metrics """ path = self.data_store.get("forecast_path") all_stations = self.data_store.get("stations") skill_score_competitive = {} skill_score_climatological = {} + errors = {} for station in all_stations: - external_data = self._get_external_data(station, path) + external_data = self._get_external_data(station, path) # test data + + # test errors + errors[station] = statistics.calculate_error_metrics(*map(lambda x: external_data.sel(type=x), + [self.forecast_indicator, "obs"]), + dim="index") + # skill score competitor = self.load_competitors(station) combined = self._combine_forecasts(external_data, competitor, dim="type") model_list = remove_items(list(combined.type.values), "obs") if combined is not None else None @@ -683,4 +695,37 @@ class PostProcessing(RunEnvironment): if internal_data is not None: skill_score_climatological[station] = skill_score.climatological_skill_scores( internal_data, self.window_lead_time, forecast_name=self.forecast_indicator) - return skill_score_competitive, skill_score_climatological + + errors.update({"total": self.calculate_average_errors(errors)}) + return skill_score_competitive, skill_score_climatological, errors + + @staticmethod + def calculate_average_errors(errors): + avg_error = {} + n_total = sum([x.get("n", 0) for _, x in errors.items()]) + for station, station_errors in errors.items(): + n_station = station_errors.get("n") + for error_metric, val in station_errors.items(): + new_val = avg_error.get(error_metric, 0) + val * n_station / n_total + avg_error[error_metric] = new_val + return avg_error + + def report_error_metrics(self, errors): + report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report") + path_config.check_path_and_create(report_path) + metric_collection = {} + for station, station_errors in errors.items(): + for metric, vals in station_errors.items(): + if metric == "n": + continue + pd_vals = pd.DataFrame.from_dict({station: vals}).T + pd_vals.columns = [f"{metric}(t+{x})" for x in vals.coords["ahead"].values] + mc = metric_collection.get(metric, pd.DataFrame()) + mc = mc.append(pd_vals) + metric_collection[metric] = mc + for metric, error_df in metric_collection.items(): + df = error_df.sort_index() + df.reindex(df.index.drop(["total"]).to_list() + ["total"], ) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(report_path, f"error_report_{metric}.tex", column_format=column_format, df=df) + tables.save_to_md(report_path, f"error_report_{metric}.md", df=df) diff --git a/mlair/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py index cdf195e705238252b117955ab1959c4177cbd17a..813873b8181fcb78917c5ef4e697da63b2941845 100644 --- a/mlair/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -14,7 +14,7 @@ import numpy as np import pandas as pd from mlair.data_handler import DataCollection, AbstractDataHandler -from mlair.helpers import TimeTracking, to_list +from mlair.helpers import TimeTracking, to_list, tables from mlair.configuration import path_config from mlair.helpers.join import EmptyQueryResult from mlair.run_modules.run_environment import RunEnvironment @@ -119,19 +119,20 @@ class PreProcessing(RunEnvironment): path_config.check_path_and_create(path) names_of_set = ["train", "val", "test"] df = self.create_info_df(meta_data, meta_round, names_of_set, precision) - column_format = self.create_column_format_for_tex(df) - self.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df) - self.save_to_md(path=path, filename="station_sample_size.md", df=df) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(path=path, filename="station_sample_size.tex", column_format=column_format, df=df) + tables.save_to_md(path=path, filename="station_sample_size.md", df=df) df_nometa = df.drop(meta_data, axis=1) - column_format = self.create_column_format_for_tex(df) - self.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, df=df_nometa) - self.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa) + column_format = tables.create_column_format_for_tex(df) + tables.save_to_tex(path=path, filename="station_sample_size_short.tex", column_format=column_format, + df=df_nometa) + tables.save_to_md(path=path, filename="station_sample_size_short.md", df=df_nometa) # df_nometa.to_latex(os.path.join(path, "station_sample_size_short.tex"), na_rep='---', # column_format=column_format) df_descr = self.create_describe_df(df_nometa) - column_format = self.create_column_format_for_tex(df_descr) - self.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr) - self.save_to_md(path=path, filename="station_describe_short.md", df=df_descr) + column_format = tables.create_column_format_for_tex(df_descr) + tables.save_to_tex(path=path, filename="station_describe_short.tex", column_format=column_format, df=df_descr) + tables.save_to_md(path=path, filename="station_describe_short.md", df=df_descr) # df_descr.to_latex(os.path.join(path, "station_describe_short.tex"), na_rep='---', column_format=column_format) @staticmethod @@ -147,15 +148,6 @@ class PreProcessing(RunEnvironment): df_descr = df_descr[df_descr_colnames] return df_descr - @staticmethod - def save_to_tex(path, filename, column_format, df, na_rep='---'): - df.to_latex(os.path.join(path, filename), na_rep=na_rep, column_format=column_format) - - @staticmethod - def save_to_md(path, filename, df, mode="w", encoding='utf-8', tablefmt="github"): - df.to_markdown(open(os.path.join(path, filename), mode=mode, encoding=encoding), - tablefmt=tablefmt) - def create_info_df(self, meta_data, meta_round, names_of_set, precision): df = pd.DataFrame(columns=meta_data + names_of_set) for set_name in names_of_set: @@ -174,19 +166,6 @@ class PreProcessing(RunEnvironment): df.index.name = 'stat. ID' return df - @staticmethod - def create_column_format_for_tex(df: pd.DataFrame) -> str: - """ - Creates column format for latex table based on the shape of a given DataFrame. - - Calculates number of columns and uses 'c' as column position. First element is set to 'l', last to 'r' - """ - column_format = np.repeat('c', df.shape[1] + 1) - column_format[0] = 'l' - column_format[-1] = 'r' - column_format = ''.join(column_format.tolist()) - return column_format - def split_train_val_test(self) -> None: """ Split data into subsets. diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py index d4badfe25c94133a53a93ca69f1b2f63a955803c..5f895b77d53d45bedc255bc7ff051f9d6a8d20a3 100644 --- a/mlair/run_modules/training.py +++ b/mlair/run_modules/training.py @@ -10,13 +10,15 @@ from typing import Union import keras from keras.callbacks import Callback, History +import psutil +import pandas as pd from mlair.data_handler import KerasIterator from mlair.model_modules.keras_extensions import CallbackHandler from mlair.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate from mlair.run_modules.run_environment import RunEnvironment from mlair.configuration import path_config -from mlair.helpers import to_list +from mlair.helpers import to_list, tables class Training(RunEnvironment): @@ -141,7 +143,8 @@ class Training(RunEnvironment): verbose=2, validation_data=self.val_set, validation_steps=len(self.val_set), - callbacks=self.callbacks.get_callbacks(as_dict=False)) + callbacks=self.callbacks.get_callbacks(as_dict=False), + workers=psutil.cpu_count(logical=False)) else: logging.info("Found locally stored model and checkpoints. Training is resumed from the last checkpoint.") self.callbacks.load_callbacks() @@ -156,7 +159,8 @@ class Training(RunEnvironment): validation_data=self.val_set, validation_steps=len(self.val_set), callbacks=self.callbacks.get_callbacks(as_dict=False), - initial_epoch=initial_epoch) + initial_epoch=initial_epoch, + workers=psutil.cpu_count(logical=False)) history = hist try: lr = self.callbacks.get_callback_by_name("lr") @@ -233,22 +237,26 @@ class Training(RunEnvironment): PlotModelLearningRate(filename=os.path.join(path, f"{name}_history_learning_rate.pdf"), lr_sc=lr_sc) def report_training(self): + # create training summary data = {"mini batches": len(self.train_set), "upsampling extremes": self.train_set.upsampling, "shuffling": self.train_set.shuffle, "created new model": self._create_new_model, "epochs": self.epochs, "batch size": self.batch_size} - import pandas as pd df = pd.DataFrame.from_dict(data, orient="index", columns=["training setting"]) df.sort_index(inplace=True) - column_format = "ll" path = os.path.join(self.data_store.get("experiment_path"), "latex_report") path_config.check_path_and_create(path) - df.to_latex(os.path.join(path, "training_settings.tex"), na_rep='---', column_format=column_format) - df.to_markdown(open(os.path.join(path, "training_settings.md"), mode="w", encoding='utf-8'), - tablefmt="github") - val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0, steps=1) - for index, item in enumerate(to_list(val_score)): - logging.info(f"{self.model.metrics_names[index]} (val), {item}") + # store as .tex and .md + tables.save_to_tex(path, "training_settings.tex", column_format="ll", df=df) + tables.save_to_md(path, "training_settings.md", df=df) + + # calculate val scores + val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0) + path = self.data_store.get("model_path") + with open(os.path.join(path, "val_scores.txt"), "a") as f: + for index, item in enumerate(to_list(val_score)): + logging.info(f"{self.model.metrics_names[index]} (val), {item}") + f.write(f"{self.model.metrics_names[index]}, {item}\n") diff --git a/test/test_helpers/test_tables.py b/test/test_helpers/test_tables.py new file mode 100644 index 0000000000000000000000000000000000000000..1b19b19bac65a9c44b7a46d8ec0b4bff33598a55 --- /dev/null +++ b/test/test_helpers/test_tables.py @@ -0,0 +1,21 @@ +import pandas as pd +import numpy as np + +from mlair.helpers import tables + + +class TestTables: + + def test_create_column_format_for_tex(self): + df = pd.DataFrame(np.ones((2, 1))) + df_col = tables.create_column_format_for_tex(df) # len: 1+1 + assert df_col == 'lr' + assert len(df_col) == 2 + df = pd.DataFrame(np.ones((2, 2))) + df_col = tables.create_column_format_for_tex(df) # len: 2+1 + assert df_col == 'lcr' + assert len(df_col) == 3 + df = pd.DataFrame(np.ones((2, 3))) + df_col = tables.create_column_format_for_tex(df) # len: 3+1 + assert df_col == 'lccr' + assert len(df_col) == 4 diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index b5a1914e6b2aacd238f244d304184d9754326db7..5ae64bf3d535e72d9361394741ed8b8094091b1d 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -140,40 +140,28 @@ class TestPreProcessing: data_preparation = AbstractDataHandler stations = ['DEBW107', 'DEBY081'] assert pre.transformation(data_preparation, stations) is None + class data_preparation_no_trans: pass + assert pre.transformation(data_preparation_no_trans, stations) is None - @pytest.fixture - def dummy_df(self): - data_dict = {'station_name': {'DEBW013': 'Stuttgart Bad Cannstatt', 'DEBW076': 'Baden-Baden', - 'DEBW087': 'Schwäbische_Alb', 'DEBW107': 'Tübingen', - 'DEBY081': 'Garmisch-Partenkirchen/Kreuzeckbahnstraße', '# Stations': np.nan, - '# Samples': np.nan}, - 'station_lon': {'DEBW013': 9.2297, 'DEBW076': 8.2202, 'DEBW087': 9.2076, 'DEBW107': 9.0512, - 'DEBY081': 11.0631, '# Stations': np.nan, '# Samples': np.nan}, - 'station_lat': {'DEBW013': 48.8088, 'DEBW076': 48.7731, 'DEBW087': 48.3458, 'DEBW107': 48.5077, - 'DEBY081': 47.4764, '# Stations': np.nan, '# Samples': np.nan}, - 'station_alt': {'DEBW013': 235.0, 'DEBW076': 148.0, 'DEBW087': 798.0, 'DEBW107': 325.0, - 'DEBY081': 735.0, '# Stations': np.nan, '# Samples': np.nan}, - 'train': {'DEBW013': 1413, 'DEBW076': 3002, 'DEBW087': 3016, 'DEBW107': 1782, 'DEBY081': 2837, - '# Stations': 6, '# Samples': 12050}, - 'val': {'DEBW013': 698, 'DEBW076': 715, 'DEBW087': 700, 'DEBW107': 701, 'DEBY081': 456, - '# Stations': 6, '# Samples': 3270}, - 'test': {'DEBW013': 1066, 'DEBW076': 696, 'DEBW087': 1080, 'DEBW107': 1080, 'DEBY081': 700, - '# Stations': 6, '# Samples': 4622}} - df = pd.DataFrame.from_dict(data_dict) - return df - - def test_create_column_format_for_tex(self): - df = pd.DataFrame(np.ones((2, 1))) - df_col = PreProcessing.create_column_format_for_tex(df) # len: 1+1 - assert df_col == 'lr' - assert len(df_col) == 2 - df = pd.DataFrame(np.ones((2, 2))) - df_col = PreProcessing.create_column_format_for_tex(df) # len: 2+1 - assert df_col == 'lcr' - assert len(df_col) == 3 - df = pd.DataFrame(np.ones((2, 3))) - df_col = PreProcessing.create_column_format_for_tex(df) # len: 3+1 - assert df_col == 'lccr' - assert len(df_col) == 4 + # @pytest.fixture + # def dummy_df(self): + # data_dict = {'station_name': {'DEBW013': 'Stuttgart Bad Cannstatt', 'DEBW076': 'Baden-Baden', + # 'DEBW087': 'Schwäbische_Alb', 'DEBW107': 'Tübingen', + # 'DEBY081': 'Garmisch-Partenkirchen/Kreuzeckbahnstraße', '# Stations': np.nan, + # '# Samples': np.nan}, + # 'station_lon': {'DEBW013': 9.2297, 'DEBW076': 8.2202, 'DEBW087': 9.2076, 'DEBW107': 9.0512, + # 'DEBY081': 11.0631, '# Stations': np.nan, '# Samples': np.nan}, + # 'station_lat': {'DEBW013': 48.8088, 'DEBW076': 48.7731, 'DEBW087': 48.3458, 'DEBW107': 48.5077, + # 'DEBY081': 47.4764, '# Stations': np.nan, '# Samples': np.nan}, + # 'station_alt': {'DEBW013': 235.0, 'DEBW076': 148.0, 'DEBW087': 798.0, 'DEBW107': 325.0, + # 'DEBY081': 735.0, '# Stations': np.nan, '# Samples': np.nan}, + # 'train': {'DEBW013': 1413, 'DEBW076': 3002, 'DEBW087': 3016, 'DEBW107': 1782, 'DEBY081': 2837, + # '# Stations': 6, '# Samples': 12050}, + # 'val': {'DEBW013': 698, 'DEBW076': 715, 'DEBW087': 700, 'DEBW107': 701, 'DEBY081': 456, + # '# Stations': 6, '# Samples': 3270}, + # 'test': {'DEBW013': 1066, 'DEBW076': 696, 'DEBW087': 1080, 'DEBW107': 1080, 'DEBY081': 700, + # '# Stations': 6, '# Samples': 4622}} + # df = pd.DataFrame.from_dict(data_dict) + # return df