Merge branch 'lukas_issue284_feat_create-fcn-model-class' into 'develop'

use he init when using relu activations See merge request toar/mlair!273

Merge branch 'lukas_issue284_feat_create-fcn-model-class' into 'develop'
12b3630e · lukas leufen · a2d9d124 · 1f13155f · 12b3630e · 12b3630e
Commit 12b3630e authored 4 years ago by lukas leufen
--- a/mlair/model_modules/abstract_model_class.py
+++ b/mlair/model_modules/abstract_model_class.py
@@ -82,7 +82,7 @@ class AbstractModelClass(ABC):
        self.__custom_objects = value
    @property
-    def compile_options(self) -> Callable:
+    def compile_options(self) -> Dict:
        """
        The compile options property allows the user to use all keras.compile() arguments. They can ether be passed as
        dictionary (1), as attribute, without setting compile_options (2) or as mixture (partly defined as instance
@@ -116,7 +116,7 @@ class AbstractModelClass(ABC):
            def set_compile_options(self):
                self.optimizer = keras.optimizers.SGD()
                self.loss = keras.losses.mean_squared_error
-                self.compile_options = {"optimizer" = keras.optimizers.Adam(), "metrics": ["mse", "mae"]}
+                self.compile_options = {"optimizer": keras.optimizers.Adam(), "metrics": ["mse", "mae"]}
        Note:
        * As long as the attribute and the dict value have exactly the same values, the setter method will not raise

--- a/mlair/model_modules/fully_connected_networks.py
+++ b/mlair/model_modules/fully_connected_networks.py
@@ -10,53 +10,6 @@ from mlair.model_modules.loss import var_loss, custom_loss
 import keras
-class FCN_64_32_16(AbstractModelClass):
-    """
-    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
-    on the window_lead_time parameter.
-    """
-    def __init__(self, input_shape: list, output_shape: list):
-        """
-        Sets model and loss depending on the given arguments.
-        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
-        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
-        """
-        assert len(input_shape) == 1
-        assert len(output_shape) == 1
-        super().__init__(input_shape[0], output_shape[0])
-        # settings
-        self.activation = keras.layers.PReLU
-        # apply to model
-        self.set_model()
-        self.set_compile_options()
-        self.set_custom_objects(loss=self.compile_options['loss'])
-    def set_model(self):
-        """
-        Build the model.
-        """
-        x_input = keras.layers.Input(shape=self._input_shape)
-        x_in = keras.layers.Flatten()(x_input)
-        x_in = keras.layers.Dense(64, name="Dense_64")(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(32, name="Dense_32")(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(16, name="Dense_16")(x_in)
-        x_in = self.activation()(x_in)
-        x_in = keras.layers.Dense(self._output_shape, name="Dense_output")(x_in)
-        out_main = self.activation()(x_in)
-        self.model = keras.Model(inputs=x_input, outputs=[out_main])
-    def set_compile_options(self):
-        self.optimizer = keras.optimizers.adam(lr=1e-2)
-        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
 class FCN(AbstractModelClass):
    """
    A customisable fully connected network (64, 32, 16, window_lead_time), where the last layer is the output layer depending
@@ -66,11 +19,15 @@ class FCN(AbstractModelClass):
    _activation = {"relu": keras.layers.ReLU, "tanh": partial(keras.layers.Activation, "tanh"),
                   "sigmoid": partial(keras.layers.Activation, "sigmoid"),
                   "linear": partial(keras.layers.Activation, "linear"),
-                   "selu": partial(keras.layers.Activation, "selu")}
+                   "selu": partial(keras.layers.Activation, "selu"),
-    _initializer = {"selu": keras.initializers.lecun_normal()}
+                   "prelu": partial(keras.layers.PReLU, alpha_initializer=keras.initializers.constant(value=0.25))}
+    _initializer = {"tanh": "glorot_uniform", "sigmoid": "glorot_uniform", "linear": "glorot_uniform",
+                    "relu": keras.initializers.he_normal(), "selu": keras.initializers.lecun_normal(),
+                    "prelu": keras.initializers.he_normal()}
    _optimizer = {"adam": keras.optimizers.adam, "sgd": keras.optimizers.SGD}
    _regularizer = {"l1": keras.regularizers.l1, "l2": keras.regularizers.l2, "l1_l2": keras.regularizers.l1_l2}
    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad", "momentum", "nesterov", "l1", "l2"]
+    _dropout = {"selu": keras.layers.AlphaDropout}
    def __init__(self, input_shape: list, output_shape: list, activation="relu", activation_output="linear",
                 optimizer="adam", n_layer=1, n_hidden=10, regularizer=None, dropout=None, layer_configuration=None,
@@ -96,12 +53,12 @@ class FCN(AbstractModelClass):
        self._update_model_name()
        self.kernel_initializer = self._initializer.get(activation, "glorot_uniform")
        self.kernel_regularizer = self._set_regularizer(regularizer, **kwargs)
-        self.dropout = self._set_dropout(dropout)
+        self.dropout, self.dropout_rate = self._set_dropout(activation, dropout)
        # apply to model
        self.set_model()
        self.set_compile_options()
-        self.set_custom_objects(loss=custom_loss([keras.losses.mean_squared_error, var_loss]), var_loss=var_loss)
+        self.set_custom_objects(loss=self.compile_options["loss"][0], var_loss=var_loss)
    def _set_activation(self, activation):
        try:
@@ -139,12 +96,11 @@ class FCN(AbstractModelClass):
        except KeyError:
            raise AttributeError(f"Given regularizer {regularizer} is not supported in this model class.")
-    @staticmethod
+    def _set_dropout(self, activation, dropout_rate):
-    def _set_dropout(dropout):
+        if dropout_rate is None:
-        if dropout is None:
+            return None, None
-            return dropout
+        assert 0 <= dropout_rate < 1
-        assert 0 <= dropout < 1
+        return self._dropout.get(activation, keras.layers.Dropout), dropout_rate
-        return dropout
    def _update_model_name(self):
        n_input = str(reduce(lambda x, y: x * y, self._input_shape))
@@ -168,7 +124,7 @@ class FCN(AbstractModelClass):
                                          kernel_regularizer=self.kernel_regularizer)(x_in)
                x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
                if self.dropout is not None:
-                    x_in = keras.layers.Dropout(self.dropout)(x_in)
+                    x_in = self.dropout(self.dropout_rate)(x_in)
        else:
            assert isinstance(self.layer_configuration, list) is True
            for layer, n_hidden in enumerate(self.layer_configuration):
@@ -176,7 +132,7 @@ class FCN(AbstractModelClass):
                                          kernel_regularizer=self.kernel_regularizer)(x_in)
                x_in = self.activation(name=f"{self.activation_name}_{layer + 1}")(x_in)
                if self.dropout is not None:
-                    x_in = keras.layers.Dropout(self.dropout)(x_in)
+                    x_in = self.dropout(self.dropout_rate)(x_in)
        x_in = keras.layers.Dense(self._output_shape)(x_in)
        out = self.activation_output(name=f"{self.activation_output_name}_output")(x_in)
        self.model = keras.Model(inputs=x_input, outputs=[out])
@@ -184,3 +140,30 @@ class FCN(AbstractModelClass):
    def set_compile_options(self):
        self.compile_options = {"loss": [custom_loss([keras.losses.mean_squared_error, var_loss])],
                                "metrics": ["mse", "mae", var_loss]}
+class FCN_64_32_16(FCN):
+    """
+    A customised model 4 Dense layers (64, 32, 16, window_lead_time), where the last layer is the output layer depending
+    on the window_lead_time parameter.
+    """
+    _requirements = ["lr", "beta_1", "beta_2", "epsilon", "decay", "amsgrad"]
+    def __init__(self, input_shape: list, output_shape: list, **kwargs):
+        """
+        Sets model and loss depending on the given arguments.
+        :param input_shape: list of input shapes (expect len=1 with shape=(window_hist, station, variables))
+        :param output_shape: list of output shapes (expect len=1 with shape=(window_forecast))
+        """
+        lr = kwargs.pop("lr", 1e-2)
+        super().__init__(input_shape, output_shape, activation="prelu", activation_output="linear",
+                         layer_configuration=[64, 32, 16], optimizer="adam", lr=lr, **kwargs)
+    def set_compile_options(self):
+        self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]}
+    def _update_model_name(self):
+        self.model_name = "FCN"
+        super()._update_model_name()