diff --git a/mlair/model_modules/branched_input_networks.py b/mlair/model_modules/branched_input_networks.py
index 8da3b17edc47fe281738bac54d26a28836716fac..2c62c3cafc1537979e4a21bdb3bb6aa798e6e193 100644
--- a/mlair/model_modules/branched_input_networks.py
+++ b/mlair/model_modules/branched_input_networks.py
@@ -44,7 +44,8 @@ class BranchedInputRNN(RNN):  # pragma: no cover
             for layer, n_hidden in enumerate(conf):
                 return_sequences = (layer < len(conf) - 1)
                 x_in_b = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn,
-                                  name=f"{self.RNN.__name__}_branch{branch + 1}_{layer + 1}")(x_in_b)
+                                  name=f"{self.RNN.__name__}_branch{branch + 1}_{layer + 1}",
+                                  kernel_regularizer=self.kernel_regularizer)(x_in_b)
                 if self.bn is True:
                     x_in_b = keras.layers.BatchNormalization()(x_in_b)
                 x_in_b = self.activation_rnn(name=f"{self.activation_rnn_name}_branch{branch + 1}_{layer + 1}")(x_in_b)
diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py
index e909ae7696bdf90d4e9a95e020b75a97e15dfd50..e65255a630a0af75f7a8760a676d83cd343ddded 100644
--- a/mlair/model_modules/recurrent_networks.py
+++ b/mlair/model_modules/recurrent_networks.py
@@ -33,7 +33,8 @@ class RNN(AbstractModelClass):  # pragma: no cover
     def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
                  activation_rnn="tanh", dropout_rnn=0,
                  optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
-                 batch_normalization=False, rnn_type="lstm", add_dense_layer=False, dense_layer_configuration=None, **kwargs):
+                 batch_normalization=False, rnn_type="lstm", add_dense_layer=False, dense_layer_configuration=None,
+                 kernel_regularizer=None, **kwargs):
         """
         Sets model and loss depending on the given arguments.
 
@@ -42,10 +43,12 @@ class RNN(AbstractModelClass):  # pragma: no cover
 
         Customize this RNN model via the following parameters:
 
-        :param activation: set your desired activation function for appended dense layers (add_dense_layer=True=. Choose
+        :param activation: set your desired activation function for appended dense layers (add_dense_layer=True). Choose
             from relu, tanh, sigmoid, linear, selu, prelu, leakyrelu. (Default relu)
         :param activation_rnn: set your desired activation function of the rnn output. Choose from relu, tanh, sigmoid,
-            linear, selu, prelu, leakyrelu. (Default tanh)
+            linear, selu, prelu, leakyrelu. To use the fast cuDNN implementation, tensorflow requires to use tanh as
+            activation. Note that this is not the recurrent activation (which is not mutable in this class) but the
+            activation of the cell. (Default tanh)
         :param activation_output: same as activation parameter but exclusively applied on output layer only. (Default
             linear)
         :param optimizer: set optimizer method. Can be either adam or sgd. (Default adam)
@@ -58,7 +61,8 @@ class RNN(AbstractModelClass):  # pragma: no cover
         :param dropout: use dropout with given rate. If no value is provided, dropout layers are not added to the
             network at all. (Default None)
         :param dropout_rnn: use recurrent dropout with given rate. This is applied along the recursion and not after
-            a rnn layer. (Default 0)
+            a rnn layer. Be aware that tensorflow is only able to use the fast cuDNN implementation with no recurrent
+            dropout. (Default 0)
         :param batch_normalization: use batch normalization layer in the network if enabled. These layers are inserted
             between the linear part of a layer (the nn part) and the non-linear part (activation function). No BN layer
             is added if set to false. (Default false)
@@ -94,7 +98,7 @@ class RNN(AbstractModelClass):  # pragma: no cover
         self.RNN = self._rnn.get(rnn_type.lower())
         self._update_model_name(rnn_type)
         self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
-        # self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
+        self.kernel_regularizer = self._set_regularizer(kernel_regularizer, **kwargs)
         self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
         assert 0 <= dropout_rnn <= 1
         self.dropout_rnn = dropout_rnn
@@ -121,7 +125,8 @@ class RNN(AbstractModelClass):  # pragma: no cover
 
         for layer, n_hidden in enumerate(conf):
             return_sequences = (layer < len(conf) - 1)
-            x_in = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn)(x_in)
+            x_in = self.RNN(n_hidden, return_sequences=return_sequences, recurrent_dropout=self.dropout_rnn,
+                            kernel_regularizer=self.kernel_regularizer)(x_in)
             if self.bn is True:
                 x_in = keras.layers.BatchNormalization()(x_in)
             x_in = self.activation_rnn(name=f"{self.activation_rnn_name}_{layer + 1}")(x_in)