diff --git a/src/helpers/statistics.py b/src/helpers/statistics.py index a38f8915dc06b1e8760f758dfb50c449a1cbd71d..74bac269fb904db81365450e92536e8c494bf027 100644 --- a/src/helpers/statistics.py +++ b/src/helpers/statistics.py @@ -117,41 +117,39 @@ class SkillScores: r""" Calculate different kinds of skill scores. - **Skill score on MSE**: + Skill score on MSE: + Calculate skill score based on MSE for given forecast, reference and observations. - Calculate skill score based on MSE for given forecast, reference and observations. + .. math:: - .. math:: + \text{SkillScore} = 1 - \frac{\text{MSE(obs, for)}}{\text{MSE(obs, ref)}} - \text{SkillScore} = 1 - \frac{\text{MSE(obs, for)}}{\text{MSE(obs, ref)}} + To run: - To run: + .. code-block:: python - .. code-block:: python + skill_scores = SkillScores(None).general_skill_score(data, observation_name, forecast_name, reference_name) - skill_scores = SkillScores(None).general_skill_score(data, observation_name, forecast_name, reference_name) + Competitive skill score: + Calculate skill scores to highlight differences between forecasts. This skill score is also based on the MSE. + Currently required forecasts are CNN, OLS and persi, as well as the observation obs. - **Competitive skill score**: + .. code-block:: python - Calculate skill scores to highlight differences between forecasts. This skill score is also based on the MSE. - Currently required forecasts are CNN, OLS and persi, as well as the observation obs. + skill_scores_class = SkillScores(internal_data) # must contain columns CNN, OLS, persi and obs. + skill_scores = skill_scores_class.skill_scores(window_lead_time=3) - .. code-block:: python + Skill score according to Murphy: + Follow climatological skill score definition of Murphy (1988). External data is data from another time period + than the internal data set on initialisation. In other terms, this should be the train and validation data + whereas the internal data is the test data. This sounds perhaps counter-intuitive, but if a skill score is + evaluated to a model to another, this must be performend test data set. Therefore, for this case the foreign + data is train and val data. - skill_scores_class = SkillScores(internal_data) # must contain columns CNN, OLS, persi and obs. - skill_scores = skill_scores_class.skill_scores(window_lead_time=3) + .. code-block:: python - **Skill score according to Murphy**: - - Follow climatological skill score definition of Murphy (1988). External data is data from another time period than - the internal data set on initialisation. In other terms, this should be the train and validation data whereas the - internal data is the test data. This sounds perhaps counter-intuitive, but if a skill score is evaluated to a model - to another, this must be performend test data set. Therefore, for this case the foreign data is train and val data. - - .. code-block:: python - - skill_scores_class = SkillScores(internal_data) # must contain columns obs and CNN. - skill_scores_clim = skill_scores_class.climatological_skill_scores(external_data, window_lead_time=3) + skill_scores_class = SkillScores(internal_data) # must contain columns obs and CNN. + skill_scores_clim = skill_scores_class.climatological_skill_scores(external_data, window_lead_time=3) """ diff --git a/src/model_modules/__init__.py b/src/model_modules/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..35f4060886036d3f51c24b4480738566ff80a445 100644 --- a/src/model_modules/__init__.py +++ b/src/model_modules/__init__.py @@ -0,0 +1 @@ +"""Collection of all modules that are related to a model.""" diff --git a/src/model_modules/keras_extensions.py b/src/model_modules/keras_extensions.py index 0b374bc4cfb55c945aeceb54112579716e1c6c17..c41d722197c2529f04f6643cc72b51f0d3fe0087 100644 --- a/src/model_modules/keras_extensions.py +++ b/src/model_modules/keras_extensions.py @@ -296,7 +296,7 @@ class CallbackHandler: else: return [clb["callback"] for clb in self._get_callbacks()] - def get_callback_by_name(self, obj_name: str) -> Callback: + def get_callback_by_name(self, obj_name: str) -> Union[Callback, History]: """ Get single callback by its name. diff --git a/src/run_modules/training.py b/src/run_modules/training.py index 93eb5762ec765b34191537e72abb2277cddeea7a..389e0eb4d64c44b4c94f99f41f0d3878d2a856a7 100644 --- a/src/run_modules/training.py +++ b/src/run_modules/training.py @@ -1,3 +1,5 @@ +"""Training module.""" + __author__ = "Lukas Leufen, Felix Kleinert" __date__ = '2019-12-05' @@ -7,16 +9,54 @@ import os from typing import Union import keras +from keras.callbacks import Callback, History from src.data_handling.data_distributor import Distributor -from src.model_modules.keras_extensions import LearningRateDecay, CallbackHandler +from src.model_modules.keras_extensions import CallbackHandler from src.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate from src.run_modules.run_environment import RunEnvironment class Training(RunEnvironment): + """ + Perform training. + #. set_generators(): set generators for training, validation and testing and distribute according to batch size + #. make_predict_function(): create predict function before distribution on multiple nodes (detailed information + in method description) + #. train(): start or resume training of model and save callbacks + #. save_model(): save best model from training as final model + + Required objects [scope] from data store: + * `model` [model] + * `batch_size` [model] + * `epochs` [model] + * `callbacks` [model] + * `model_name* [model] + * `experiment_name` [.] + * `experiment_path` [.] + * `trainable` [.] + * `create_new_model` [.] + * `generator` [train, val, test] + * `plot_path` [.] + + Optional objects + * `permute_data` [train, val, test] + * `upsampling` [train, val, test] + + Sets + * `best_model` [.] + + Creates + * `<exp_name>_model-best.h5` + * `<exp_name>_model-best-callbacks-<name>.h5` (all callbacks from CallbackHandler) + * `history.json` + * `history_lr.json` (optional) + * `<exp_name>_history_<name>.pdf` (different monitoring plots depending on loss metrics and callbacks) + + """ def __init__(self): + """Set up training.""" super().__init__() self.model: keras.Model = self.data_store.get("model", "model") self.train_set: Union[Distributor, None] = None @@ -31,17 +71,7 @@ class Training(RunEnvironment): self._run() def _run(self) -> None: - """ - Perform training - 1) set_generators(): - set generators for training, validation and testing and distribute according to batch size - 2) make_predict_function(): - create predict function before distribution on multiple nodes (detailed information in method description) - 3) train(): - start or resume training of model and save callbacks - 4) save_model(): - save best model from training as final model - """ + """Run training. Details in class description.""" self.set_generators() self.make_predict_function() if self._trainable: @@ -52,40 +82,44 @@ class Training(RunEnvironment): def make_predict_function(self) -> None: """ - Creates the predict function. Must be called before distributing. This is necessary, because tf will compile - the predict function just in the moment it is used the first time. This can cause problems, if the model is - distributed on different workers. To prevent this, the function is pre-compiled. See discussion @ + Create predict function. + + Must be called before distributing. This is necessary, because tf will compile the predict function just in + the moment it is used the first time. This can cause problems, if the model is distributed on different + workers. To prevent this, the function is pre-compiled. See discussion @ https://stackoverflow.com/questions/40850089/is-keras-thread-safe/43393252#43393252 """ self.model._make_predict_function() def _set_gen(self, mode: str) -> None: """ - Set and distribute the generators for given mode regarding batch size + Set and distribute the generators for given mode regarding batch size. :param mode: name of set, should be from ["train", "val", "test"] """ gen = self.data_store.get("generator", mode) - # permute_data = self.data_store.get_default("permute_data", mode, default=False) kwargs = self.data_store.create_args_dict(["permute_data", "upsampling"], scope=mode) setattr(self, f"{mode}_set", Distributor(gen, self.model, self.batch_size, **kwargs)) def set_generators(self) -> None: """ - Set all generators for training, validation, and testing subsets. The called sub-method will automatically - distribute the data according to the batch size. The subsets can be accessed as class variables train_set, - val_set, and test_set . + Set all generators for training, validation, and testing subsets. + + The called sub-method will automatically distribute the data according to the batch size. The subsets can be + accessed as class variables train_set, val_set, and test_set. """ for mode in ["train", "val", "test"]: self._set_gen(mode) def train(self) -> None: """ - Perform training using keras fit_generator(). Callbacks are stored locally in the experiment directory. Best - model from training is saved for class variable model. If the file path of checkpoint is not empty, this method - assumes, that this is not a new training starting from the very beginning, but a resumption from a previous - started but interrupted training (or a stopped and now continued training). Train will automatically load the - locally stored information and the corresponding model and proceed with the already started training. + Perform training using keras fit_generator(). + + Callbacks are stored locally in the experiment directory. Best model from training is saved for class + variable model. If the file path of checkpoint is not empty, this method assumes, that this is not a new + training starting from the very beginning, but a resumption from a previous started but interrupted training + (or a stopped and now continued training). Train will automatically load the locally stored information and the + corresponding model and proceed with the already started training. """ logging.info(f"Train with {len(self.train_set)} mini batches.") logging.info(f"Train with option upsampling={self.train_set.upsampling}.") @@ -105,7 +139,7 @@ class Training(RunEnvironment): self.callbacks.load_callbacks() self.callbacks.update_checkpoint() self.model = keras.models.load_model(checkpoint.filepath) - hist = self.callbacks.get_callback_by_name("hist") + hist: History = self.callbacks.get_callback_by_name("hist") initial_epoch = max(hist.epoch) + 1 _ = self.model.fit_generator(generator=self.train_set.distribute_on_batches(), steps_per_epoch=len(self.train_set), @@ -125,9 +159,7 @@ class Training(RunEnvironment): self.create_monitoring_plots(history, lr) def save_model(self) -> None: - """ - save model in local experiment directory. Model is named as <experiment_name>_<custom_model_name>.h5 . - """ + """Save model in local experiment directory. Model is named as `<experiment_name>_<custom_model_name>.h5`.""" model_name = self.data_store.get("model_name", "model") logging.debug(f"save best model to {model_name}") self.model.save(model_name) @@ -146,13 +178,15 @@ class Training(RunEnvironment): except OSError: logging.info('no weights to reload...') - def save_callbacks_as_json(self, history: keras.callbacks.History, lr_sc: keras.callbacks) -> None: + def save_callbacks_as_json(self, history: Callback, lr_sc: Callback) -> None: """ Save callbacks (history, learning rate) of training. + * history.history -> history.json * lr_sc.lr -> history_lr.json :param history: history object of training + :param lr_sc: learning rate object """ logging.debug("saving callbacks") path = self.data_store.get("experiment_path") @@ -162,13 +196,14 @@ class Training(RunEnvironment): with open(os.path.join(path, "history_lr.json"), "w") as f: json.dump(lr_sc.lr, f) - def create_monitoring_plots(self, history: keras.callbacks.History, lr_sc: LearningRateDecay) -> None: + def create_monitoring_plots(self, history: Callback, lr_sc: Callback) -> None: """ - Creates the history and learning rate plot in dependence of the number of epochs. The plots are saved in the - experiment's plot_path. History plot is named '<exp_name>_history_loss_val_loss.pdf', the learning rate with - '<exp_name>_history_learning_rate.pdf'. + Create plot of history and learning rate in dependence of the number of epochs. + + The plots are saved in the experiment's plot_path. History plot is named `<exp_name>_history_loss_val_loss.pdf`, + the learning rate with `<exp_name>_history_learning_rate.pdf`. - :param history: keras history object with losses to plot (must include 'loss' and 'val_loss') + :param history: keras history object with losses to plot (must at least include `loss` and `val_loss`) :param lr_sc: learning rate decay object with 'lr' attribute """ path = self.data_store.get("plot_path")