diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py index 783148086a413aaaf8bfeb33b13d8da5c72f1df5..8f464b16cbc74a241f70ba1c5bb9f8fe611dba95 100644 --- a/mlair/model_modules/model_class.py +++ b/mlair/model_modules/model_class.py @@ -502,7 +502,68 @@ class MyLSTMModel(AbstractModelClass): # epochs_drop=10) self.loss = keras.losses.mean_squared_error self.compile_options = {"metrics": ["mse", "mae"]} - + + +class MyLuongAttentionLSTMModel(AbstractModelClass): + """ + Luong Attention LSTM from https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb + """ + + def __init__(self, input_shape: list, output_shape: list): + + super().__init__(input_shape[0], output_shape[0]) + + # settings + self.dropout_rate = 0.1 + self.initial_lr = 0.01 + self.clipnorm = 1 + + self.n_hidden = 100 + + # apply to model + self.set_model() + self.set_compile_options() + # self.set_custom_objects(loss=self.compile_options['loss']) + + def set_model(self): + input_train = keras.layers.Input(shape=self._input_shape) + # output_train = keras.layers.Input(shape=self._output_shape) + + encoder_stack_h, encoder_last_h, encoder_last_c = keras.layers.LSTM( + self.n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2, + return_state=True, return_sequences=True)(input_train) + + encoder_last_h = keras.layers.BatchNormalization(momentum=0.6)(encoder_last_h) + encoder_last_c = keras.layers.BatchNormalization(momentum=0.6)(encoder_last_c) + + # decoder_input = keras.layers.RepeatVector(output_train.shape[1])(encoder_last_h) + decoder_input = keras.layers.RepeatVector(self._output_shape)(encoder_last_h) + + decoder_stack_h = keras.layers.LSTM(self.n_hidden, activation='elu', dropout=0.2, recurrent_dropout=0.2, + return_state=False, return_sequences=True)( + decoder_input, initial_state=[encoder_last_h, encoder_last_c]) + + attention = keras.layers.dot([decoder_stack_h, encoder_stack_h], axes=[2, 2]) + attention = keras.layers.Activation('softmax')(attention) + + context = keras.layers.dot([attention, encoder_stack_h], axes=[2, 1]) + context = keras.layers.BatchNormalization(momentum=0.6)(context) + + decoder_combined_context = keras.layers.concatenate([context, decoder_stack_h]) + + # out = keras.layers.TimeDistributed(keras.layers.Dense(output_train.shape[2]))(decoder_combined_context) + out = keras.layers.TimeDistributed(keras.layers.Dense(1))(decoder_combined_context) + + # FK add + out = keras.layers.Flatten()(out) + + self.model = keras.Model(inputs=input_train, outputs=[out]) + + def set_compile_options(self): + + self.optimizer = keras.optimizers.Adam(lr=self.initial_lr, clipnorm=self.clipnorm) + self.loss = keras.losses.mean_squared_error + self.compile_options = {"metrics": ["mse", "mae"]} class MyCNNModel(AbstractModelClass):