diff --git a/mlair/model_modules/branched_input_networks.py b/mlair/model_modules/branched_input_networks.py index 8da3b17edc47fe281738bac54d26a28836716fac..2c62c3cafc1537979e4a21bdb3bb6aa798e6e193 100644 --- a/mlair/model_modules/branched_input_networks.py +++ b/mlair/model_modules/branched_input_networks.py @@ -44,7 +44,8 @@ class BranchedInputRNN(RNN): # pragma: no cover for layer, n_hidden in enumerate(conf): return_sequences = (layer < len(conf) - 1) x_in_b = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn, - name=f"{self.RNN.__name__}_branch{branch + 1}_{layer + 1}")(x_in_b) + name=f"{self.RNN.__name__}_branch{branch + 1}_{layer + 1}", + kernel_regularizer=self.kernel_regularizer)(x_in_b) if self.bn is True: x_in_b = keras.layers.BatchNormalization()(x_in_b) x_in_b = self.activation_rnn(name=f"{self.activation_rnn_name}_branch{branch + 1}_{layer + 1}")(x_in_b) diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py index e909ae7696bdf90d4e9a95e020b75a97e15dfd50..e65255a630a0af75f7a8760a676d83cd343ddded 100644 --- a/mlair/model_modules/recurrent_networks.py +++ b/mlair/model_modules/recurrent_networks.py @@ -33,7 +33,8 @@ class RNN(AbstractModelClass): # pragma: no cover def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear", activation_rnn="tanh", dropout_rnn=0, optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None, - batch_normalization=False, rnn_type="lstm", add_dense_layer=False, dense_layer_configuration=None, **kwargs): + batch_normalization=False, rnn_type="lstm", add_dense_layer=False, dense_layer_configuration=None, + kernel_regularizer=None, **kwargs): """ Sets model and loss depending on the given arguments. @@ -42,10 +43,12 @@ class RNN(AbstractModelClass): # pragma: no cover Customize this RNN model via the following parameters: - :param activation: set your desired activation function for appended dense layers (add_dense_layer=True=. Choose + :param activation: set your desired activation function for appended dense layers (add_dense_layer=True). Choose from relu, tanh, sigmoid, linear, selu, prelu, leakyrelu. (Default relu) :param activation_rnn: set your desired activation function of the rnn output. Choose from relu, tanh, sigmoid, - linear, selu, prelu, leakyrelu. (Default tanh) + linear, selu, prelu, leakyrelu. To use the fast cuDNN implementation, tensorflow requires to use tanh as + activation. Note that this is not the recurrent activation (which is not mutable in this class) but the + activation of the cell. (Default tanh) :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default linear) :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam) @@ -58,7 +61,8 @@ class RNN(AbstractModelClass): # pragma: no cover :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the network at all. (Default None) :param dropout_rnn: use recurrent dropout with given rate. This is applied along the recursion and not after - a rnn layer. (Default 0) + a rnn layer. Be aware that tensorflow is only able to use the fast cuDNN implementation with no recurrent + dropout. (Default 0) :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer is added if set to false. (Default false) @@ -94,7 +98,7 @@ class RNN(AbstractModelClass): # pragma: no cover self.RNN = self._rnn.get(rnn_type.lower()) self._update_model_name(rnn_type) self.kernel_initializer = self._initializer.get(activation, "glorot_uniform") - # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs) + self.kernel_regularizer = self._set_regularizer(kernel_regularizer, **kwargs) self.dropout, self.dropout_rate = self._set_dropout(activation, dropout) assert 0 <= dropout_rnn <= 1 self.dropout_rnn = dropout_rnn @@ -121,7 +125,8 @@ class RNN(AbstractModelClass): # pragma: no cover for layer, n_hidden in enumerate(conf): return_sequences = (layer < len(conf) - 1) - x_in = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn)(x_in) + x_in = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn, + kernel_regularizer=self.kernel_regularizer)(x_in) if self.bn is True: x_in = keras.layers.BatchNormalization()(x_in) x_in = self.activation_rnn(name=f"{self.activation_rnn_name}_{layer + 1}")(x_in)