diff --git a/mlair/run_modules/training.py b/mlair/run_modules/training.py index d4badfe25c94133a53a93ca69f1b2f63a955803c..5f895b77d53d45bedc255bc7ff051f9d6a8d20a3 100644 --- a/mlair/run_modules/training.py +++ b/mlair/run_modules/training.py @@ -10,13 +10,15 @@ from typing import Union import keras from keras.callbacks import Callback, History +import psutil +import pandas as pd from mlair.data_handler import KerasIterator from mlair.model_modules.keras_extensions import CallbackHandler from mlair.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate from mlair.run_modules.run_environment import RunEnvironment from mlair.configuration import path_config -from mlair.helpers import to_list +from mlair.helpers import to_list, tables class Training(RunEnvironment): @@ -141,7 +143,8 @@ class Training(RunEnvironment): verbose=2, validation_data=self.val_set, validation_steps=len(self.val_set), - callbacks=self.callbacks.get_callbacks(as_dict=False)) + callbacks=self.callbacks.get_callbacks(as_dict=False), + workers=psutil.cpu_count(logical=False)) else: logging.info("Found locally stored model and checkpoints. Training is resumed from the last checkpoint.") self.callbacks.load_callbacks() @@ -156,7 +159,8 @@ class Training(RunEnvironment): validation_data=self.val_set, validation_steps=len(self.val_set), callbacks=self.callbacks.get_callbacks(as_dict=False), - initial_epoch=initial_epoch) + initial_epoch=initial_epoch, + workers=psutil.cpu_count(logical=False)) history = hist try: lr = self.callbacks.get_callback_by_name("lr") @@ -233,22 +237,26 @@ class Training(RunEnvironment): PlotModelLearningRate(filename=os.path.join(path, f"{name}_history_learning_rate.pdf"), lr_sc=lr_sc) def report_training(self): + # create training summary data = {"mini batches": len(self.train_set), "upsampling extremes": self.train_set.upsampling, "shuffling": self.train_set.shuffle, "created new model": self._create_new_model, "epochs": self.epochs, "batch size": self.batch_size} - import pandas as pd df = pd.DataFrame.from_dict(data, orient="index", columns=["training setting"]) df.sort_index(inplace=True) - column_format = "ll" path = os.path.join(self.data_store.get("experiment_path"), "latex_report") path_config.check_path_and_create(path) - df.to_latex(os.path.join(path, "training_settings.tex"), na_rep='---', column_format=column_format) - df.to_markdown(open(os.path.join(path, "training_settings.md"), mode="w", encoding='utf-8'), - tablefmt="github") - val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0, steps=1) - for index, item in enumerate(to_list(val_score)): - logging.info(f"{self.model.metrics_names[index]} (val), {item}") + # store as .tex and .md + tables.save_to_tex(path, "training_settings.tex", column_format="ll", df=df) + tables.save_to_md(path, "training_settings.md", df=df) + + # calculate val scores + val_score = self.model.evaluate_generator(generator=self.val_set, use_multiprocessing=True, verbose=0) + path = self.data_store.get("model_path") + with open(os.path.join(path, "val_scores.txt"), "a") as f: + for index, item in enumerate(to_list(val_score)): + logging.info(f"{self.model.metrics_names[index]} (val), {item}") + f.write(f"{self.model.metrics_names[index]}, {item}\n")