diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py index cbe5d1454c9f1e092bc0947f98cf1143d5fa9822..95c48bc8659354c7c669bb03a7591dafbbe9f262 100644 --- a/mlair/model_modules/recurrent_networks.py +++ b/mlair/model_modules/recurrent_networks.py @@ -31,7 +31,7 @@ class RNN(AbstractModelClass): _rnn = {"lstm": keras.layers.LSTM, "gru": keras.layers.GRU} def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear", - activation_rnn="tanh", + activation_rnn="tanh", dropout_rnn=0, optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None, batch_normalization=False, rnn_type="lstm", add_dense_layer=False, **kwargs): """ @@ -42,8 +42,10 @@ class RNN(AbstractModelClass): Customize this RNN model via the following parameters: - :param activation: set your desired activation function. Chose from relu, tanh, sigmoid, linear, selu, prelu, - leakyrelu. (Default relu) + :param activation: set your desired activation function for appended dense layers (add_dense_layer=True=. Choose + from relu, tanh, sigmoid, linear, selu, prelu, leakyrelu. (Default relu) + :param activation_rnn: set your desired activation function of the rnn output. Choose from relu, tanh, sigmoid, + linear, selu, prelu, leakyrelu. (Default tanh) :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default linear) :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam) @@ -55,6 +57,8 @@ class RNN(AbstractModelClass): hidden layer. The number of hidden layers is equal to the total length of this list. :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the network at all. (Default None) + :param dropout_rnn: use recurrent dropout with given rate. This is applied along the recursion and not after + a rnn layer. (Default 0) :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer is added if set to false. (Default false) @@ -82,6 +86,8 @@ class RNN(AbstractModelClass): self.kernel_initializer = self._initializer.get(activation, "glorot_uniform") # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs) self.dropout, self.dropout_rate = self._set_dropout(activation, dropout) + assert 0 <= dropout_rnn <= 1 + self.dropout_rnn = dropout_rnn # apply to model self.set_model() @@ -105,7 +111,7 @@ class RNN(AbstractModelClass): for layer, n_hidden in enumerate(conf): return_sequences = (layer < len(conf) - 1) - x_in = self.RNN(n_hidden, return_sequences=return_sequences)(x_in) + x_in = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn)(x_in) if self.bn is True: x_in = keras.layers.BatchNormalization()(x_in) x_in = self.activation_rnn(name=f"{self.activation_rnn_name}_{layer + 1}")(x_in)