diff --git a/mlair/model_modules/model_class.py b/mlair/model_modules/model_class.py index 9b2058ace77ee7ff4812ff7f4459e85225615553..ce74a89a6b7ce980a66557d9558914ab7e70eec0 100644 --- a/mlair/model_modules/model_class.py +++ b/mlair/model_modules/model_class.py @@ -486,7 +486,9 @@ class MyLSTMModel(AbstractModelClass): super().__init__(input_shape[0], output_shape[0]) # settings - self.dropout_rate = 0.25 + self.dropout_rate = 0.4 + self.stateful = False + self.initial_lr = 1e-5 # apply to model self.set_model() @@ -495,16 +497,181 @@ class MyLSTMModel(AbstractModelClass): def set_model(self): x_input = keras.layers.Input(shape=self._input_shape) - x_in = keras.layers.LSTM(16, return_sequences=True, name="First_LSTM", dropout=self.dropout_rate)(x_input) - x_in = keras.layers.LSTM(32, name="Second_LSTM", dropout=self.dropout_rate)(x_in) + x_in = keras.layers.LSTM(16, return_sequences=True, name="First_LSTM", dropout=self.dropout_rate, stateful=self.stateful)(x_input) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + x_in = keras.layers.LSTM(16, name="Second_LSTM", dropout=self.dropout_rate, stateful=self.stateful)(x_in) out_main = keras.layers.Dense(self._output_shape, name='Output_Dense')(x_in) self.model = keras.Model(inputs=x_input, outputs=[out_main]) def set_compile_options(self): - self.initial_lr = 1e-4 +# self.initial_lr = 1e-4 + #self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9) + self.optimizer = keras.optimizers.Adam(lr=self.initial_lr) +# self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, +# drop=.94, +# epochs_drop=10) + self.loss = keras.losses.mean_squared_error + self.compile_options = {"metrics": ["mse", "mae"]} + + + +class MyCNNModel(AbstractModelClass): + + def __init__(self, input_shape: list, output_shape: list): + + super().__init__(input_shape[0], output_shape[0]) + + # settings + self.dropout_rate = 0.4 + self.batchnormalization = keras.layers.normalization.BatchNormalization +# self.activation = keras.layers.LeakyReLU + self.activation = keras.layers.ELU +# self.regularizer = keras.regularizers.l2(0.001) + + self.initial_lr = 0.01 + + + # apply to model + self.set_model() + self.set_compile_options() + self.set_custom_objects(SymmetricPadding2D=SymmetricPadding2D) + + def set_model(self): + + first_kernel = (3, 1) + first_filters = 16 + pad_size1 = PadUtils.get_padding_for_same(first_kernel) + + pool_kernel = (3,1) + pad_size_pool = PadUtils.get_padding_for_same(pool_kernel) + + second_kernel = (3, 1) + second_filters = 32 + pad_size2 = PadUtils.get_padding_for_same(second_kernel) + + x_input = keras.layers.Input(shape=self._input_shape) + + x_in = Padding2D("SymPad2D")(padding=pad_size1, name="SymPad1")(x_input) + x_in = keras.layers.Conv2D(filters=first_filters, + kernel_size=first_kernel, +# kernel_regularizer=self.regularizer, + name="First_conv_{}x{}".format(first_kernel[0], first_kernel[1]))(x_in) + x_in = self.batchnormalization()(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + +# x_in = Padding2D("SymPad2D")(padding=pad_size_pool, name="SymPad_pool")(x_in) +# x_in = keras.layers.MaxPool2D(pool_kernel)(x_in) + + x_in = Padding2D("SymPad2D")(padding=pad_size2, name="SymPad2")(x_in) + x_in = keras.layers.Conv2D(filters=second_filters, + kernel_size=second_kernel, +# kernel_regularizer=self.regularizer, + name="Second_conv_{}x{}".format(second_kernel[0], second_kernel[1]))(x_in) + x_in = self.batchnormalization()(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + + + x_in = keras.layers.Flatten()(x_in) + + x_in = keras.layers.Dense(32)(x_in) + x_in = self.batchnormalization()(x_in) + x_in = self.activation()(x_in) + + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + + out_main = keras.layers.Dense(self._output_shape, name='Output_Dense')(x_in) + + self.model = keras.Model(inputs=x_input, outputs=[out_main]) + + def set_compile_options(self): + +# self.optimizer = keras.optimizers.Adam() self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9) - self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, - drop=.94, - epochs_drop=10) +# self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, +# drop=.94, +# epochs_drop=10) + self.loss = keras.losses.mean_squared_error + self.compile_options = {"metrics": ["mse", "mae"]} + +class MyCNNModelSect(AbstractModelClass): + + def __init__(self, input_shape: list, output_shape: list): + + super().__init__(input_shape[0], output_shape[0]) + + # settings + self.dropout_rate = 0.35 + self.batchnormalization = keras.layers.normalization.BatchNormalization +# self.activation = keras.layers.LeakyReLU + self.activation = keras.layers.ELU +# self.regularizer = keras.regularizers.l2(0.001) + + self.initial_lr = 0.01 + + + # apply to model + self.set_model() + self.set_compile_options() + self.set_custom_objects(SymmetricPadding2D=SymmetricPadding2D) + + def set_model(self): + + first_kernel = (3, 1) + first_filters = 16 + pad_size1 = PadUtils.get_padding_for_same(first_kernel) + + pool_kernel = (3,1) + pad_size_pool = PadUtils.get_padding_for_same(pool_kernel) + + second_kernel = (3, 1) + second_filters = 32 + pad_size2 = PadUtils.get_padding_for_same(second_kernel) + + x_input = keras.layers.Input(shape=self._input_shape) + + x_in = Padding2D("SymPad2D")(padding=pad_size1, name="SymPad1")(x_input) + x_in = keras.layers.Conv2D(filters=first_filters, + kernel_size=first_kernel, +# kernel_regularizer=self.regularizer, + name="First_conv_{}x{}".format(first_kernel[0], first_kernel[1]))(x_in) + x_in = self.batchnormalization()(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + +# x_in = Padding2D("SymPad2D")(padding=pad_size_pool, name="SymPad_pool")(x_in) +# x_in = keras.layers.MaxPool2D(pool_kernel)(x_in) + + x_in = Padding2D("SymPad2D")(padding=pad_size2, name="SymPad2")(x_in) + x_in = keras.layers.Conv2D(filters=second_filters, + kernel_size=second_kernel, +# kernel_regularizer=self.regularizer, + name="Second_conv_{}x{}".format(second_kernel[0], second_kernel[1]))(x_in) + x_in = self.batchnormalization()(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + + + x_in = keras.layers.Flatten()(x_in) + + x_in = keras.layers.Dense(32)(x_in) + x_in = self.batchnormalization()(x_in) + x_in = self.activation()(x_in) + + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + + out_main = keras.layers.Dense(self._output_shape, name='Output_Dense')(x_in) + + self.model = keras.Model(inputs=x_input, outputs=[out_main]) + + def set_compile_options(self): + +# self.optimizer = keras.optimizers.Adam() + self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9) +# self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, +# drop=.94, +# epochs_drop=10) self.loss = keras.losses.mean_squared_error self.compile_options = {"metrics": ["mse", "mae"]} + diff --git a/run_wrf_dh.py b/run_wrf_dh.py index 717ce02b1732daf92c1059c18d1f3236f1b0872a..dff06cf88a0250a9f19faf72b985d0ac887ebe11 100644 --- a/run_wrf_dh.py +++ b/run_wrf_dh.py @@ -30,8 +30,10 @@ def main(parser_args): train_model=False, create_new_model=True, network="UBA", evaluate_bootstraps=False, # plot_list=["PlotCompetitiveSkillScore"], - competitors=["test_model", "test_model2"], - competitor_path=os.path.join(os.getcwd(), "data", "comp_test"), +# competitors=["test_model", "test_model2"], +# competitor_path=os.path.join(os.getcwd(), "data", "comp_test"), + competitors=["baseline", "sector_baseline"], + competitor_path="/p/scratch/deepacf/kleinert1/IASS_proc_monthyl/competitors/o3", train_min_length=1, val_min_length=1, test_min_length=1, # data_handler=DataHandlerSingleStation, # data_handler=DataHandlerSingleGridColumn, @@ -59,11 +61,16 @@ def main(parser_args): # data_path='/home/felix/Data/WRF-Chem/test_cut_nc/', # data_path='/home/felix/Data/WRF-Chem/test_cut_nc_joint', data_path="/home/felix/Data/WRF-Chem/test_cut_nc_joint/short_test", + # data_path = "/p/scratch/deepacf/kleinert1/IASS_proc_monthyl", # data_path="/media/felix/INTENSO/WRF_CHEM/JFM_2009", - date_format_of_nc_file="%Y-%m", - time_dim='XTIME', + # external data coords external_coords_file='/home/felix/Data/WRF-Chem/test_cut_nc/coords.nc', + # external_coords_file = "/p/scratch/deepacf/kleinert1/IASS_proc/coords.nc", + + date_format_of_nc_file = "%Y-%m", + common_file_starter = "wrfout_d01", + time_dim = 'XTIME', transformation={ "T2": {"method": "standardise"}, @@ -94,22 +101,42 @@ def main(parser_args): radius=100, # km start='2009-01-01', - end='2009-01-04', - + #end='2009-01-04', + #end='2009-01-31', + end='2009-03-31', + + #train_start='2009-01-01', + #train_end='2009-01-02', train_start='2009-01-01', - train_end='2009-01-02', - - val_start='2009-01-02', - val_end='2009-01-03', - - test_start='2009-01-03', - test_end='2009-01-04', - + #train_end='2009-01-15', + train_end='2009-02-28', + + #val_start='2009-01-02', + #val_end='2009-01-03', + ################################### + #val_start='2009-01-15', + #val_end='2009-01-22', + ################################### + val_start='2009-03-01', + val_end='2009-03-14', + + #test_start='2009-01-03', + #test_end='2009-01-04', + ################################### + #test_start='2009-01-22', + #test_end='2009-01-31', + ################################### + test_start='2009-03-15', + test_end='2009-03-31', + sampling='hourly', interpolation_limit=0, # as_image_like_data_format=False, # model=MyLSTMModel, + use_multiprocessing=True, + # as_image_like_data_format=True, + # model=MyLSTMModel, model=MyCNNModel, **parser_args.__dict__) diff --git a/run_wrf_dh_sector.py b/run_wrf_dh_sector.py new file mode 100644 index 0000000000000000000000000000000000000000..d77bfd74653b0183402dc3d9180badf6548129d7 --- /dev/null +++ b/run_wrf_dh_sector.py @@ -0,0 +1,138 @@ +__author__ = "Lukas Leufen" +__date__ = '2020-06-29' + +import argparse +# from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation +from mlair.data_handler.data_handler_wrf_chem import DataHandlerWRF, DataHandlerMainSectWRF +from mlair.workflows import DefaultWorkflow +from mlair.helpers import remove_items +from mlair.configuration.defaults import DEFAULT_PLOT_LIST +from mlair.model_modules.model_class import MyPaperModel, MyLSTMModel, MyCNNModel, MyCNNModelSect + +import os + + +def load_stations(): + import json + try: + filename = 'supplement/station_list_north_german_plain_rural.json' + with open(filename, 'r') as jfile: + stations = json.load(jfile) + except FileNotFoundError: + stations = None + return stations + + +def main(parser_args): + plots = remove_items(DEFAULT_PLOT_LIST, "PlotConditionalQuantiles") + workflow = DefaultWorkflow( # stations=load_stations(), + # stations=["DEBW087","DEBW013", "DEBW107", "DEBW076"], + + train_model=False, create_new_model=True, network="UBA", + evaluate_bootstraps=False, # plot_list=["PlotCompetitiveSkillScore"], +# competitors=["test_model", "test_model2"], +# competitor_path=os.path.join(os.getcwd(), "data", "comp_test"), + competitors=["baseline", "sector_baseline"], + competitor_path="/p/scratch/deepacf/kleinert1/IASS_proc_monthyl/competitors/o3", + train_min_length=1, val_min_length=1, test_min_length=1, + # data_handler=DataHandlerSingleStation, + # data_handler=DataHandlerSingleGridColumn, + epochs=1000, + window_lead_time=2, + window_history_size=6, + stations=["coords__48_8479__10_0963", "coords__51_8376__14_1417", + "coords__50_7536__7_0827", "coords__51_4070__6_9656", + "coords__49_8421__7_8662", "coords__49_7410__7_1935", + "coords__51_1566__11_8182", "coords__51_4065__6_9660", + # "coords__50_7333__7_1000", "coords__50_0000__8_0000", + # "coords__48_7444__7_6000", "coords__51_0000__11_0000", + # "coords__52_7555__8_1000", "coords__50_0000__2_0000", + # "coords__51_7666__8_6000", "coords__50_0000__3_0000", + # "coords__45_7777__9_1000", "coords__50_0000__4_0000", + ], +# data_handler=DataHandlerWRF, + data_handler=DataHandlerMainSectWRF, #, + data_path="/p/scratch/deepacf/kleinert1/IASS_proc_monthyl", + #data_path="/p/scratch/deepacf/kleinert1/IASS_proc", + #data_path="/p/project/deepacf/intelliaq/kleinert1/DATA/WRF_CHEM_soft_ln_small_test", + common_file_starter="wrfout_d01", + date_format_of_nc_file="%Y-%m", + time_dim='XTIME', + #external_coords_file='/p/project/deepacf/inbound_data/IASS_upload/coords.nc', + external_coords_file="/p/scratch/deepacf/kleinert1/IASS_proc/coords.nc", + transformation={ + "T2": {"method": "standardise"}, + "Q2": {"method": "standardise"}, + "PBLH": {"method": "standardise"}, + "Ull": {"method": "standardise"}, + "Vll": {"method": "standardise"}, + "wdir10ll": {"method": "min_max", "min": 0., "max": 360.}, + "wspd10ll": {"method": "standardise"}, + 'no': {"method": "standardise"}, + 'no2': {"method": "standardise"}, + 'co': {"method": "standardise"}, + 'PSFC': {"method": "standardise"}, + 'CLDFRA': {"method": "min_max"}, + }, + variables=['T2', 'o3', 'wdir10ll', 'wspd10ll', 'no', 'no2', 'co', 'PSFC', 'PBLH', 'CLDFRA'], + target_var='o3', + statistics_per_var={'T2': None, 'o3': None, 'wdir10ll': None, 'wspd10ll': None, + 'no': None, 'no2': None, 'co': None, 'PSFC': None, 'PBLH': None, 'CLDFRA': None, }, + # variables=['T2', 'Q2', 'PBLH', 'U10ll', 'V10ll', 'wdir10ll', 'wspd10ll'], + # target_var=["T2"], + # statistics_per_var={'T2': None, 'Q2': None, 'PBLH': None, + # 'U10ll': None, 'V10ll': None, 'wdir10ll': None, 'wspd10ll': None}, + wind_sectors=['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW'], + var_logical_z_coord_selector=0, + targetvar_logical_z_coord_selector=0, + aggregation_dim='bottom_top', + + radius=100, # km + + start='2009-01-01', + #end='2009-01-04', + #end='2009-01-31', + end='2009-03-31', + + #train_start='2009-01-01', + #train_end='2009-01-02', + train_start='2009-01-01', + #train_end='2009-01-15', + train_end='2009-02-28', + + #val_start='2009-01-02', + #val_end='2009-01-03', + ################################### + #val_start='2009-01-15', + #val_end='2009-01-22', + ################################### + val_start='2009-03-01', + val_end='2009-03-14', + + #test_start='2009-01-03', + #test_end='2009-01-04', + ################################### + #test_start='2009-01-22', + #test_end='2009-01-31', + ################################### + test_start='2009-03-15', + test_end='2009-03-31', + + sampling='hourly', + use_multiprocessing=True, + + interpolation_limit=0, +# as_image_like_data_format=False, +# model=MyLSTMModel, + model=MyCNNModelSect, + + **parser_args.__dict__) + workflow.run() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default="testrun", + help="set experiment date as string") + args = parser.parse_args() + main(args)