diff --git a/mlair/model_modules/recurrent_networks.py b/mlair/model_modules/recurrent_networks.py index 6ec920c1cde08c0d2fc6064528eea800fbdde2a7..e909ae7696bdf90d4e9a95e020b75a97e15dfd50 100644 --- a/mlair/model_modules/recurrent_networks.py +++ b/mlair/model_modules/recurrent_networks.py @@ -33,7 +33,7 @@ class RNN(AbstractModelClass): # pragma: no cover def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear", activation_rnn="tanh", dropout_rnn=0, optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None, - batch_normalization=False, rnn_type="lstm", add_dense_layer=False, **kwargs): + batch_normalization=False, rnn_type="lstm", add_dense_layer=False, dense_layer_configuration=None, **kwargs): """ Sets model and loss depending on the given arguments. @@ -64,6 +64,15 @@ class RNN(AbstractModelClass): # pragma: no cover is added if set to false. (Default false) :param rnn_type: define which kind of recurrent network should be applied. Chose from either lstm or gru. All units will be of this kind. (Default lstm) + :param add_dense_layer: set True to use additional dense layers between last recurrent layer and output layer. + If no further specification is made on the exact dense_layer_configuration, a single layer as added with n + neurons where n is equal to min(n_previous_layer, n_output**2). If set to False, the output layer directly + follows after the last recurrent layer. + :param dense_layer_configuration: specify the number of dense layers and the number of neurons given as list + where each element corresponds to the number of neurons to add. The position / length of the list specifies + the number of layers to add. The last layer is followed by the output layer. In case a value is given for + the number of neurons that is less than the number of output neurons, the addition of dense layers is + stopped immediately. """ assert len(input_shape) == 1 @@ -80,6 +89,7 @@ class RNN(AbstractModelClass): # pragma: no cover self.optimizer = self._set_optimizer(optimizer.lower(), **kwargs) self.bn = batch_normalization self.add_dense_layer = add_dense_layer + self.dense_layer_configuration = dense_layer_configuration or [] self.layer_configuration = (n_layer, n_hidden) if layer_configuration is None else layer_configuration self.RNN = self._rnn.get(rnn_type.lower()) self._update_model_name(rnn_type) @@ -119,9 +129,22 @@ class RNN(AbstractModelClass): # pragma: no cover x_in = self.dropout(self.dropout_rate)(x_in) if self.add_dense_layer is True: - x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), name=f"Dense_{len(conf) + 1}", - kernel_initializer=self.kernel_initializer, )(x_in) - x_in = self.activation(name=f"{self.activation_name}_{len(conf) + 1}")(x_in) + if len(self.dense_layer_configuration) == 0: + x_in = keras.layers.Dense(min(self._output_shape ** 2, conf[-1]), name=f"Dense_{len(conf) + 1}", + kernel_initializer=self.kernel_initializer, )(x_in) + x_in = self.activation(name=f"{self.activation_name}_{len(conf) + 1}")(x_in) + if self.dropout is not None: + x_in = self.dropout(self.dropout_rate)(x_in) + else: + for layer, n_hidden in enumerate(self.dense_layer_configuration): + if n_hidden < self._output_shape: + break + x_in = keras.layers.Dense(n_hidden, name=f"Dense_{len(conf) + layer + 1}", + kernel_initializer=self.kernel_initializer, )(x_in) + x_in = self.activation(name=f"{self.activation_name}_{len(conf) + layer + 1}")(x_in) + if self.dropout is not None: + x_in = self.dropout(self.dropout_rate)(x_in) + x_in = keras.layers.Dense(self._output_shape)(x_in) out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in) self.model = keras.Model(inputs=x_input, outputs=[out])