From 8e25915ce48e37875d8286eb3be481dd861a9aa0 Mon Sep 17 00:00:00 2001 From: lukas leufen <l.leufen@fz-juelich.de> Date: Tue, 3 Dec 2019 14:43:38 +0100 Subject: [PATCH] first model setup without any testing --- src/flatten.py | 32 +++++++ src/modules/model_setup.py | 172 +++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 src/flatten.py create mode 100644 src/modules/model_setup.py diff --git a/src/flatten.py b/src/flatten.py new file mode 100644 index 00000000..1166cf32 --- /dev/null +++ b/src/flatten.py @@ -0,0 +1,32 @@ +__author__ = "Lukas Leufen" +__date__ = '2019-12-02' + +import keras +from typing import Callable + + +def flatten_tail(input_X: keras.layers, name: str, bound_weight: bool = False, dropout_rate: float = 0.0, + window_lead_time: int = 4, activation: Callable = keras.activations.relu, + reduction_filter: int = 64, first_dense: int = 64): + + X_in = keras.layers.Conv2D(reduction_filter, (1, 1), padding='same', name='{}_Conv_1x1'.format(name))(input_X) + + X_in = activation(name='{}_conv_act'.format(name))(X_in) + + X_in = keras.layers.Flatten(name='{}'.format(name))(X_in) + + X_in = keras.layers.Dropout(dropout_rate, name='{}_Dropout_1'.format(name))(X_in) + X_in = keras.layers.Dense(first_dense, kernel_regularizer=keras.regularizers.l2(0.01), + name='{}_Dense_1'.format(name))(X_in) + if bound_weight: + X_in = keras.layers.Activation('tanh')(X_in) + else: + try: + X_in = activation(name='{}_act'.format(name))(X_in) + except: + X_in = activation()(X_in) + + X_in = keras.layers.Dropout(dropout_rate, name='{}_Dropout_2'.format(name))(X_in) + out = keras.layers.Dense(window_lead_time, activation='linear', kernel_regularizer=keras.regularizers.l2(0.01), + name='{}_Dense_2'.format(name))(X_in) + return out diff --git a/src/modules/model_setup.py b/src/modules/model_setup.py new file mode 100644 index 00000000..6cd0d687 --- /dev/null +++ b/src/modules/model_setup.py @@ -0,0 +1,172 @@ +__author__ = "Lukas Leufen" +__date__ = '2019-12-02' + + +import keras +from keras import losses, layers +from keras.callbacks import ModelCheckpoint +from keras.regularizers import l2 +from keras.optimizers import Adam, SGD +import tensorflow as tf +import logging + +from src.modules.run_environment import RunEnvironment +from src.helpers import l_p_loss, LearningRateDecay +from src.inception_model import InceptionModelBase +from src.flatten import flatten_tail + + +class ModelSetup(RunEnvironment): + + def __init__(self): + + # create run framework + super().__init__() + self.model = None + self.model_name = self.data_store.get("experiment_name", "general") + "model-best.h5" + self.scope = "general.model" + + def _run(self): + + # create checkpoint + self._set_checkpoint() + + # set all model settings + self.my_model_settings() + + # build model graph using settings from my_model_settings() + self.build_model() + + # plot model structure + self.plot_model() + + # load weights if no training shall be performed + if self.data_store.get("trainable", self.scope) is False: + self.load_weights() + + # compile model + self.compile_model() + + def compile_model(self): + optimizer = self.data_store.get("optimizer", self.scope) + loss = self.data_store.get("loss", self.scope) + self.model.compile(optimizer=optimizer, loss=loss, metrics=["mse", "mae"]) + + def _set_checkpoint(self): + ModelCheckpoint(self.model_name, verbose=1, monitor='val_loss', save_best_only=True, mode='auto') + + def load_weights(self): + #try: + logging.debug('reload weights...') + self.model.load_weights(self.model_name) + #except: + # print('no weights to reload...') + + def build_model(self): + args_list = ["activation", "window_size", "channels", "regularizer", "dropout_rate", "window_lead_time"] + args = self.data_store.create_args_dict(args_list, self.scope) + self.model = my_model(**args) + + def plot_model(self): + with tf.device("/cpu:0"): + file_name = self.data_store.get("experiment_name", "general") + "model.pdf" + keras.utils.plot_model(self.model, to_file=file_name, show_shapes=True, show_layer_names=True) + + def my_model_settings(self): + + scope = "general.model" + + # channels + X, y = self.data_store.get("generator", "general.train")[0] + channels = X.shape[-1] # input variables + self.data_store.put("channels", channels, scope) + + # dropout + self.data_store.put("dropout_rate", 0.1, scope) + + # regularizer + self.data_store.put("regularizer", l2(0.1), scope) + + # learning rate + initial_lr = 1e-2 + self.data_store.put("initial_lr", initial_lr, scope) + optimizer = SGD(lr=initial_lr, momentum=0.9) + # optimizer=Adam(lr=initial_lr, amsgrad=True) + self.data_store.put("optimizer", optimizer, scope) + self.data_store.put("lr_decay", LearningRateDecay(base_lr=initial_lr, drop=.94, epochs_drop=10), scope) + + # learning settings + self.data_store.put("epochs", 2, scope) + self.data_store.put("batch_size", int(256), scope) + + # activation + activation = layers.PReLU # ELU #LeakyReLU keras.activations.tanh # + self.data_store.put("activation", activation, scope) + + # set los + loss_all = my_loss() + self.data_store.put("loss", loss_all, scope) + + +def my_loss(): + loss = l_p_loss(4) + keras_loss = losses.mean_squared_error + loss_all = [loss] + [keras_loss] + return loss_all + + +def my_model(activation, window_size, channels, regularizer, dropout_rate, window_lead_time): + + conv_settings_dict1 = { + 'tower_1': {'reduction_filter': 8, 'tower_filter': 8 * 2, 'tower_kernel': (3, 1), 'activation': activation}, + 'tower_2': {'reduction_filter': 8, 'tower_filter': 8 * 2, 'tower_kernel': (5, 1), 'activation': activation}, + 'tower_3': {'reduction_filter': 8, 'tower_filter': 8 * 2, 'tower_kernel': (1, 1), 'activation': activation}, + } + + pool_settings_dict1 = {'pool_kernel': (3, 1), 'tower_filter': 8 * 2, 'activation': activation} + + conv_settings_dict2 = {'tower_1': {'reduction_filter': 8 * 2, 'tower_filter': 16 * 2 * 2, 'tower_kernel': (3, 1), + 'activation': activation}, + 'tower_2': {'reduction_filter': 8 * 2, 'tower_filter': 16 * 2 * 2, 'tower_kernel': (5, 1), + 'activation': activation}, + 'tower_3': {'reduction_filter': 8 * 2, 'tower_filter': 16 * 2 * 2, 'tower_kernel': (1, 1), + 'activation': activation}, + } + pool_settings_dict2 = {'pool_kernel': (3, 1), 'tower_filter': 16, 'activation': activation} + + conv_settings_dict3 = {'tower_1': {'reduction_filter': 16 * 4, 'tower_filter': 32 * 2, 'tower_kernel': (3, 1), + 'activation': activation}, + 'tower_2': {'reduction_filter': 16 * 4, 'tower_filter': 32 * 2, 'tower_kernel': (5, 1), + 'activation': activation}, + 'tower_3': {'reduction_filter': 16 * 4, 'tower_filter': 32 * 2, 'tower_kernel': (1, 1), + 'activation': activation}, + } + + pool_settings_dict3 = {'pool_kernel': (3, 1), 'tower_filter': 32, 'activation': activation} + + ########################################## + inception_model = InceptionModelBase() + + X_input = layers.Input(shape=(window_size + 1, 1, channels)) # add 1 to window_size to include current time step t0 + + X_in = inception_model.inception_block(X_input, conv_settings_dict1, pool_settings_dict1, regularizer=regularizer, + batch_normalisation=True) + + out_minor = flatten_tail(X_in, 'Minor_1', bound_weight=True, activation=activation, dropout_rate=dropout_rate, + reduction_filter=4, first_dense=32, window_lead_time=window_lead_time) + + X_in = layers.Dropout(dropout_rate)(X_in) + + X_in = inception_model.inception_block(X_in, conv_settings_dict2, pool_settings_dict2, regularizer=regularizer, + batch_normalisation=True) + + X_in = layers.Dropout(dropout_rate)(X_in) + + X_in = inception_model.inception_block(X_in, conv_settings_dict3, pool_settings_dict3, regularizer=regularizer, + batch_normalisation=True) + ############################################# + + out_main = flatten_tail(X_in, 'Main', activation=activation, bound_weight=True, dropout_rate=dropout_rate, + reduction_filter=64, first_dense=64, window_lead_time=window_lead_time) + + return keras.Model(inputs=X_input, outputs=[out_minor, out_main]) -- GitLab