diff --git a/.gitignore b/.gitignore index 366a3e3907a4b0bed1bd400cc2e377b7cdbe92bc..305a5d1b9420eb62da24772fc1f4b263c1f3efe1 100644 --- a/.gitignore +++ b/.gitignore @@ -73,7 +73,7 @@ report.html # secret variables # #################### -/src/configuration/join_settings.py +/mlair/configuration/join_settings.py # ignore locally build documentation # ###################################### diff --git a/CHANGELOG.md b/CHANGELOG.md index a08bab0068246f8d57b3789e10f6f0f4105817ad..823c37005922ca5b8a621b55f7bdb5528f7f9b76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,37 @@ # Changelog All notable changes to this project will be documented in this file. +## v0.10.0 - 2020-07-15 - MLAir is official name, Workflows, easy Model plug-in + +### general +- Official project name is released: MLAir (Machine Learning on Air data) +- a model class can now easily be plugged in into MLAir. #121 +- introduced new concept of workflows, #134 + +### new features +- workflows are used to execute a sequence of run modules, #134 +- default workflows for standard and the Juelich HPC systems are available, custom workflows can be defined, #134 +- seasonal decomposition is available for conditional quantile plot, #112 +- map plot is created with coordinates, #108 +- `flatten_tails` are now more general and easier to customise, #114 +- model classes have custom compile options (replaces `set_loss`), #110 +- model can be set in ExperimentSetup from outside, #121 +- default experiment settings can be queried using `get_defaults()`, #123 +- training and model settings are reported as MarkDown and Tex tables, #145 + +### technical +- Juelich HPC systems are supported and installation scripts are available, #106 +- data store is tracked, I/O is saved and illustrated in a plot, #116 +- batch size, epoch parameter have to be defined in ExperimentSetup, #127, #122 +- automatic documentation with sphinx, #109 +- default experiment settings are updated, #123 +- refactoring of experiment path and its default naming, #124 +- refactoring of some parameter names, #146 +- preparation for package distribution with pip, #119 +- all run scripts are updated to run with workflows, #134 +- the experiment folder is restructured, #130 + + ## v0.9.0 - 2020-04-15 - faster bootstraps, extreme value upsamling ### general - improved and faster bootstrap workflow diff --git a/LICENSE b/LICENSE index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a79ea789a5b55f7328d1fd987293376838112048 100644 --- a/LICENSE +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Lukas Leufen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index baae0af91036da10ba70f154ac875c18908858c3..5c55b4094232908a56cdcf61ba437976f8714e8b 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,217 @@ -# MachineLearningTools +# MLAir - Machine Learning on Air Data -This is a collection of all relevant functions used for ML stuff in the ESDE group +MLAir (Machine Learning on Air data) is an environment that simplifies and accelerates the creation of new machine +learning (ML) models for the analysis and forecasting of meteorological and air quality time series. -## Inception Model +# Installation -See a description [here](https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202) -or take a look on the papers [Going Deeper with Convolutions (Szegedy et al., 2014)](https://arxiv.org/abs/1409.4842) -and [Network In Network (Lin et al., 2014)](https://arxiv.org/abs/1312.4400). +MLAir is based on several python frameworks. To work properly, you have to install all packages from the +`requirements.txt` file. Additionally to support the geographical plotting part it is required to install geo +packages built for your operating system. Name names of these package may differ for different systems, we refer +here to the opensuse / leap OS. The geo plot can be removed from the `plot_list`, in this case there is no need to +install the geo packages. +* (geo) Install **proj** on your machine using the console. E.g. for opensuse / leap `zypper install proj` +* (geo) A c++ compiler is required for the installation of the program **cartopy** +* Install all requirements from [`requirements.txt`](https://gitlab.version.fz-juelich.de/toar/machinelearningtools/-/blob/master/requirements.txt) + preferably in a virtual environment +* (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't + find any compatibility errors. Please note, that tf-1.13 and 1.15 have two distinct branches each, the default branch + for CPU support, and the "-gpu" branch for GPU support. If the GPU version is installed, MLAir will make use of the GPU + device. +* Installation of **MLAir**: + * Either clone MLAir from the [gitlab repository](https://gitlab.version.fz-juelich.de/toar/machinelearningtools.git) + and use it without installation (beside the requirements) + * or download the distribution file (?? .whl) and install it via `pip install <??>`. In this case, you can simply + import MLAir in any python script inside your virtual environment using `import mlair`. -# Installation +# How to start with MLAir -* Install __proj__ on your machine using the console. E.g. for opensuse / leap `zypper install proj` -* c++ compiler required for cartopy installation +In this section, we show three examples how to work with MLAir. -## HPC - JUWELS and HDFML setup -The following instruction guide you throug the installation on JUWELS and HDFML. -* Clone the repo to HPC system (we recommend to place it in `/p/projects/<project name>`. -* Setup venv by executing `source setupHPC.sh`. This script loads all pre-installed modules and creates a venv for -all other packages. Furthermore, it creates slurm/batch scripts to execute code on compute nodes. <br> -You have to enter the HPC project's budget name (--account flag). -* The default external data path on JUWELS and HDFML is set to `/p/project/deepacf/intelliaq/<user>/DATA/toar_<sampling>`. -<br>To choose a different location open `run.py` and add the following keyword argument to `ExperimentSetup`: -`data_path=<your>/<custom>/<path>`. -* Execute `python run.py` on a login node to download example data. The program will throw an OSerror after downloading. -* Execute either `sbatch run_juwels_develgpus.bash` or `sbatch run_hdfml_batch.bash` to verify that the setup went well. -* Currently cartopy is not working on our HPC system, therefore PlotStations does not create any output. +## Example 1 -### HPC JUWELS and HDFML remarks -Please note, that the HPC setup is customised for JUWELS and HDFML. When using another HPC system, you can use the HPC setup files as a skeleton and customise it to your needs. +We start MLAir in a dry run without any modification. Just import mlair and run it. +```python +import mlair -Note: The method `PartitionCheck` currently only checks if the hostname starts with `ju` or `hdfmll`. -Therefore, it might be necessary to adopt the `if` statement in `PartitionCheck._run`. +# just give it a dry run without any modification +mlair.run() +``` +The logging output will show you many informations. Additional information (including debug messages) are collected +inside the experiment path in the logging folder. +```log +INFO: mlair started +INFO: ExperimentSetup started +INFO: Experiment path is: /home/<usr>/mlair/testrun_network +... +INFO: load data for DEBW001 from JOIN +... +INFO: Training started +... +INFO: mlair finished after 00:00:12 (hh:mm:ss) +``` +## Example 2 -# Security +Now we update the stations and customise the window history size parameter. -* To use hourly data from ToarDB via JOIN interface, a private token is required. Request your personal access token and -add it to `src/join_settings.py` in the hourly data section. Replace the `TOAR_SERVICE_URL` and the `Authorization` -value. To make sure, that this **sensitive** data is not uploaded to the remote server, use the following command to -prevent git from tracking this file: `git update-index --assume-unchanged src/join_settings.py -` +```python +import mlair + +# our new stations to use +stations = ['DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW107'] + +# expanded temporal context to 14 (days, because of default sampling="daily") +window_history_size = 14 + +# restart the experiment with little customisation +mlair.run(stations=stations, + window_history_size=window_history_size) +``` +The output looks similar, but we can see, that the new stations are loaded. +```log +INFO: mlair started +INFO: ExperimentSetup started +... +INFO: load data for DEBW030 from JOIN +INFO: load data for DEBW037 from JOIN +... +INFO: Training started +... +INFO: mlair finished after 00:00:24 (hh:mm:ss) +``` + +## Example 3 + +Let's just apply our trained model to new data. Therefore we keep the window history size parameter but change the stations. +In the run method, we need to disable the trainable and create new model parameters. MLAir will use the model we have +trained before. Note, this only works if the experiment path has not changed or a suitable trained model is placed +inside the experiment path. +```python +import mlair + +# our new stations to use +stations = ['DEBY002', 'DEBY079'] + +# same setting for window_history_size +window_history_size = 14 + +# run experiment without training +mlair.run(stations=stations, + window_history_size=window_history_size, + create_new_model=False, + trainable=False) +``` +We can see from the terminal that no training was performed. Analysis is now made on the new stations. +```log +INFO: mlair started +... +INFO: No training has started, because trainable parameter was false. +... +INFO: mlair finished after 00:00:06 (hh:mm:ss) +``` + +# Customised workflows and models + +# Custom Workflow + +MLAir provides a default workflow. If additional steps are to be performed, you have to append custom run modules to +the workflow. + +```python +import mlair +import logging + +class CustomStage(mlair.RunEnvironment): + """A custom MLAir stage for demonstration.""" + + def __init__(self, test_string): + super().__init__() # always call super init method + self._run(test_string) # call a class method + + def _run(self, test_string): + logging.info("Just running a custom stage.") + logging.info("test_string = " + test_string) + epochs = self.data_store.get("epochs") + logging.info("epochs = " + str(epochs)) -# Customise your experiment + +# create your custom MLAir workflow +CustomWorkflow = mlair.Workflow() +# provide stages without initialisation +CustomWorkflow.add(mlair.ExperimentSetup, epochs=128) +# add also keyword arguments for a specific stage +CustomWorkflow.add(CustomStage, test_string="Hello World") +# finally execute custom workflow in order of adding +CustomWorkflow.run() +``` +```log +INFO: mlair started +... +INFO: ExperimentSetup finished after 00:00:12 (hh:mm:ss) +INFO: CustomStage started +INFO: Just running a custom stage. +INFO: test_string = Hello World +INFO: epochs = 128 +INFO: CustomStage finished after 00:00:01 (hh:mm:ss) +INFO: mlair finished after 00:00:13 (hh:mm:ss) +``` + +## Custom Model + +Each model has to inherit from the abstract model class to ensure a smooth training and evaluation behaviour. It is +required to implement the set model and set compile options methods. The later has to set the loss at least. + +```python + +import keras +from keras.losses import mean_squared_error as mse +from keras.optimizers import SGD + +from mlair.model_modules import AbstractModelClass + +class MyLittleModel(AbstractModelClass): + """ + A customised model with a 1x1 Conv, and 3 Dense layers (32, 16 + window_lead_time). Dropout is used after Conv layer. + """ + def __init__(self, window_history_size, window_lead_time, channels): + super().__init__() + # settings + self.window_history_size = window_history_size + self.window_lead_time = window_lead_time + self.channels = channels + self.dropout_rate = 0.1 + self.activation = keras.layers.PReLU + self.lr = 1e-2 + # apply to model + self.set_model() + self.set_compile_options() + self.set_custom_objects(loss=self.compile_options['loss']) + + def set_model(self): + # add 1 to window_size to include current time step t0 + shape = (self.window_history_size + 1, 1, self.channels) + x_input = keras.layers.Input(shape=shape) + x_in = keras.layers.Conv2D(32, (1, 1), padding='same')(x_input) + x_in = self.activation()(x_in) + x_in = keras.layers.Flatten()(x_in) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + x_in = keras.layers.Dense(32)(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dense(16)(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dense(self.window_lead_time)(x_in) + out = self.activation()(x_in) + self.model = keras.Model(inputs=x_input, outputs=[out]) + + def set_compile_options(self): + self.compile_options = {"optimizer": SGD(lr=self.lr), + "loss": mse, + "metrics": ["mse"]} +``` -This section summarises which parameters can be customised for a training. ## Transformation @@ -97,3 +264,36 @@ station-wise std is a decent estimate of the true std. scaling values instead of the calculation method. For method *centre*, std can still be None, but is required for the *standardise* method. **Important**: Format of given values **must** match internal data format of DataPreparation class: `xr.DataArray` with `dims=["variables"]` and one value for each variable. + + + + + +# Special Remarks + +## Special instructions for installation on Jülich HPC systems + +_Please note, that the HPC setup is customised for JUWELS and HDFML. When using another HPC system, you can use the HPC +setup files as a skeleton and customise it to your needs._ + +The following instruction guide you through the installation on JUWELS and HDFML. +* Clone the repo to HPC system (we recommend to place it in `/p/projects/<project name>`). +* Setup venv by executing `source setupHPC.sh`. This script loads all pre-installed modules and creates a venv for +all other packages. Furthermore, it creates slurm/batch scripts to execute code on compute nodes. <br> +You have to enter the HPC project's budget name (--account flag). +* The default external data path on JUWELS and HDFML is set to `/p/project/deepacf/intelliaq/<user>/DATA/toar_<sampling>`. +<br>To choose a different location open `run.py` and add the following keyword argument to `ExperimentSetup`: +`data_path=<your>/<custom>/<path>`. +* Execute `python run.py` on a login node to download example data. The program will throw an OSerror after downloading. +* Execute either `sbatch run_juwels_develgpus.bash` or `sbatch run_hdfml_batch.bash` to verify that the setup went well. +* Currently cartopy is not working on our HPC system, therefore PlotStations does not create any output. + +Note: The method `PartitionCheck` currently only checks if the hostname starts with `ju` or `hdfmll`. +Therefore, it might be necessary to adopt the `if` statement in `PartitionCheck._run`. + +## Security using JOIN + +* To use hourly data from ToarDB via JOIN interface, a private token is required. Request your personal access token and +add it to `src/join_settings.py` in the hourly data section. Replace the `TOAR_SERVICE_URL` and the `Authorization` +value. To make sure, that this **sensitive** data is not uploaded to the remote server, use the following command to +prevent git from tracking this file: `git update-index --assume-unchanged src/join_settings.py` diff --git a/docs/_source/_plots/padding_example1.png b/docs/_source/_plots/padding_example1.png new file mode 100755 index 0000000000000000000000000000000000000000..e609cbb9fe22f406c97ceb8637751e484d139409 Binary files /dev/null and b/docs/_source/_plots/padding_example1.png differ diff --git a/docs/_source/_plots/padding_example2.png b/docs/_source/_plots/padding_example2.png new file mode 100755 index 0000000000000000000000000000000000000000..cfc84c6961eb6d24aef135d9e8fc5bae74a78f8a Binary files /dev/null and b/docs/_source/_plots/padding_example2.png differ diff --git a/docs/_source/conf.py b/docs/_source/conf.py index 6363f57eb45e686f6f2ef8ab07806e4feba0fe2d..573918ee35e9757b8c0b32b2697fc0cc2bc0b38f 100644 --- a/docs/_source/conf.py +++ b/docs/_source/conf.py @@ -17,7 +17,7 @@ sys.path.insert(0, os.path.abspath('../..')) # -- Project information ----------------------------------------------------- -project = 'machinelearningtools' +project = 'MLAir' copyright = '2020, Lukas H Leufen, Felix Kleinert' author = 'Lukas H Leufen, Felix Kleinert' @@ -55,7 +55,7 @@ extensions = [ autosummary_generate = True autoapi_type = 'python' -autoapi_dirs = ['../../src/.'] +autoapi_dirs = ['../../mlair/.'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -118,7 +118,7 @@ latex_elements = { # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'machinelearningtools.tex', 'MachineLearningTools Documentation', + (master_doc, 'mlair.tex', 'MLAir Documentation', author, 'manual'), ] diff --git a/docs/_source/get-started.rst b/docs/_source/get-started.rst index e5a82fdcf1d16ca2188a04e3dce76dc7ba9d477a..98a96d43675a0263be5bfc2d452b8af1c2626b60 100644 --- a/docs/_source/get-started.rst +++ b/docs/_source/get-started.rst @@ -1,16 +1,232 @@ -Get started with MachineLearningTools -===================================== +Get started with MLAir +====================== -<what is machinelearningtools?> +Install MLAir +------------- -MLT Module and Funtion Documentation ------------------------------------- +MLAir is based on several python frameworks. To work properly, you have to install all packages from the +`requirements.txt` file. Additionally to support the geographical plotting part it is required to install geo +packages built for your operating system. Name names of these package may differ for different systems, we refer +here to the opensuse / leap OS. The geo plot can be removed from the `plot_list`, in this case there is no need to +install the geo packages. -Install MachineLearningTools ----------------------------- +* (geo) Install **proj** on your machine using the console. E.g. for opensuse / leap `zypper install proj` +* (geo) A c++ compiler is required for the installation of the program **cartopy** +* Install all requirements from [`requirements.txt`](https://gitlab.version.fz-juelich.de/toar/machinelearningtools/-/blob/master/requirements.txt) + preferably in a virtual environment +* (tf) Currently, TensorFlow-1.13 is mentioned in the requirements. We already tested the TensorFlow-1.15 version and couldn't + find any compatibility errors. Please note, that tf-1.13 and 1.15 have two distinct branches each, the default branch + for CPU support, and the "-gpu" branch for GPU support. If the GPU version is installed, MLAir will make use of the GPU + device. +* Installation of **MLAir**: + * Either clone MLAir from the [gitlab repository](https://gitlab.version.fz-juelich.de/toar/machinelearningtools.git) + and use it without installation (beside the requirements) + * or download the distribution file (?? .whl) and install it via `pip install <??>`. In this case, you can simply + import MLAir in any python script inside your virtual environment using `import mlair`. -Dependencies + +How to start with MLAir +----------------------- + +In this section, we show three examples how to work with MLAir. + +Example 1 +~~~~~~~~~ + +We start MLAir in a dry run without any modification. Just import mlair and run it. + +.. code-block:: python + + import mlair + + # just give it a dry run without any modification + mlair.run() + + +The logging output will show you many informations. Additional information (including debug messages) are collected +inside the experiment path in the logging folder. + +.. code-block:: + + INFO: mlair started + INFO: ExperimentSetup started + INFO: Experiment path is: /home/<usr>/mlair/testrun_network + ... + INFO: load data for DEBW001 from JOIN + ... + INFO: Training started + ... + INFO: mlair finished after 00:00:12 (hh:mm:ss) + + +Example 2 +~~~~~~~~~ + +Now we update the stations and customise the window history size parameter. + +.. code-block:: python + + import mlair + + # our new stations to use + stations = ['DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW107'] + + # expanded temporal context to 14 (days, because of default sampling="daily") + window_history_size = 14 + + # restart the experiment with little customisation + mlair.run(stations=stations, + window_history_size=window_history_size) + +The output looks similar, but we can see, that the new stations are loaded. + +.. code-block:: + + INFO: mlair started + INFO: ExperimentSetup started + ... + INFO: load data for DEBW030 from JOIN + INFO: load data for DEBW037 from JOIN + ... + INFO: Training started + ... + INFO: mlair finished after 00:00:24 (hh:mm:ss) + +Example 3 +~~~~~~~~~ + +Let's just apply our trained model to new data. Therefore we keep the window history size parameter but change the stations. +In the run method, we need to disable the trainable and create new model parameters. MLAir will use the model we have +trained before. Note, this only works if the experiment path has not changed or a suitable trained model is placed +inside the experiment path. + +.. code-block:: python + + import mlair + + # our new stations to use + stations = ['DEBY002', 'DEBY079'] + + # same setting for window_history_size + window_history_size = 14 + + # run experiment without training + mlair.run(stations=stations, + window_history_size=window_history_size, + create_new_model=False, + trainable=False) + +We can see from the terminal that no training was performed. Analysis is now made on the new stations. + +.. code-block:: + + INFO: mlair started + ... + INFO: No training has started, because trainable parameter was false. + ... + INFO: mlair finished after 00:00:06 (hh:mm:ss) + + + +Customised workflows and models +------------------------------- + +Custom Workflow +~~~~~~~~~~~~~~~ + +MLAir provides a default workflow. If additional steps are to be performed, you have to append custom run modules to +the workflow. + +.. code-block:: python + + import mlair + import logging + + class CustomStage(mlair.RunEnvironment): + """A custom MLAir stage for demonstration.""" + + def __init__(self, test_string): + super().__init__() # always call super init method + self._run(test_string) # call a class method + + def _run(self, test_string): + logging.info("Just running a custom stage.") + logging.info("test_string = " + test_string) + epochs = self.data_store.get("epochs") + logging.info("epochs = " + str(epochs)) + + + # create your custom MLAir workflow + CustomWorkflow = mlair.Workflow() + # provide stages without initialisation + CustomWorkflow.add(mlair.ExperimentSetup, epochs=128) + # add also keyword arguments for a specific stage + CustomWorkflow.add(CustomStage, test_string="Hello World") + # finally execute custom workflow in order of adding + CustomWorkflow.run() + +.. code-block:: + + INFO: mlair started + ... + INFO: ExperimentSetup finished after 00:00:12 (hh:mm:ss) + INFO: CustomStage started + INFO: Just running a custom stage. + INFO: test_string = Hello World + INFO: epochs = 128 + INFO: CustomStage finished after 00:00:01 (hh:mm:ss) + INFO: mlair finished after 00:00:13 (hh:mm:ss) + +Custom Model ~~~~~~~~~~~~ -Data -~~~~ +Each model has to inherit from the abstract model class to ensure a smooth training and evaluation behaviour. It is +required to implement the set model and set compile options methods. The later has to set the loss at least. + +.. code-block:: python + + import keras + from keras.losses import mean_squared_error as mse + from keras.optimizers import SGD + + from mlair.model_modules import AbstractModelClass + + class MyLittleModel(AbstractModelClass): + """ + A customised model with a 1x1 Conv, and 3 Dense layers (32, 16 + window_lead_time). Dropout is used after Conv layer. + """ + def __init__(self, window_history_size, window_lead_time, channels): + super().__init__() + # settings + self.window_history_size = window_history_size + self.window_lead_time = window_lead_time + self.channels = channels + self.dropout_rate = 0.1 + self.activation = keras.layers.PReLU + self.lr = 1e-2 + # apply to model + self.set_model() + self.set_compile_options() + self.set_custom_objects(loss=self.compile_options['loss']) + + def set_model(self): + # add 1 to window_size to include current time step t0 + shape = (self.window_history_size + 1, 1, self.channels) + x_input = keras.layers.Input(shape=shape) + x_in = keras.layers.Conv2D(32, (1, 1), padding='same')(x_input) + x_in = self.activation()(x_in) + x_in = keras.layers.Flatten()(x_in) + x_in = keras.layers.Dropout(self.dropout_rate)(x_in) + x_in = keras.layers.Dense(32)(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dense(16)(x_in) + x_in = self.activation()(x_in) + x_in = keras.layers.Dense(self.window_lead_time)(x_in) + out = self.activation()(x_in) + self.model = keras.Model(inputs=x_input, outputs=[out]) + + def set_compile_options(self): + self.compile_options = {"optimizer": SGD(lr=self.lr), + "loss": mse, + "metrics": ["mse"]} diff --git a/src/__init__.py b/mlair/__init__.py similarity index 61% rename from src/__init__.py rename to mlair/__init__.py index 5b7073ff042f6173fd78362f55d698eb6745552f..7f55e47abd709d5747bf54d89595fa66f5839c64 100644 --- a/src/__init__.py +++ b/mlair/__init__.py @@ -1,12 +1,13 @@ __version_info__ = { 'major': 0, - 'minor': 9, + 'minor': 10, 'micro': 0, } -from src.run_modules import * -from src.workflows import DefaultWorkflow, Workflow - +from mlair.run_modules import RunEnvironment, ExperimentSetup, PreProcessing, ModelSetup, Training, PostProcessing +from mlair.workflows import DefaultWorkflow, Workflow +from mlair.run_script import run +from mlair.model_modules import AbstractModelClass def get_version(): diff --git a/src/configuration/.gitignore b/mlair/configuration/.gitignore similarity index 100% rename from src/configuration/.gitignore rename to mlair/configuration/.gitignore diff --git a/src/configuration/__init__.py b/mlair/configuration/__init__.py similarity index 100% rename from src/configuration/__init__.py rename to mlair/configuration/__init__.py diff --git a/src/configuration/defaults.py b/mlair/configuration/defaults.py similarity index 96% rename from src/configuration/defaults.py rename to mlair/configuration/defaults.py index 4bb1ab2eef43ce2230fb2dfd3781322c9fc405cf..a83b3f597e202dde44447a301e6ca10688ff1b79 100644 --- a/src/configuration/defaults.py +++ b/mlair/configuration/defaults.py @@ -29,8 +29,8 @@ DEFAULT_TARGET_VAR = "o3" DEFAULT_TARGET_DIM = "variables" DEFAULT_WINDOW_LEAD_TIME = 3 DEFAULT_DIMENSIONS = {"new_index": ["datetime", "Stations"]} -DEFAULT_INTERPOLATE_DIM = "datetime" -DEFAULT_INTERPOLATE_METHOD = "linear" +DEFAULT_INTERPOLATION_DIM = "datetime" +DEFAULT_INTERPOLATION_METHOD = "linear" DEFAULT_LIMIT_NAN_FILL = 1 DEFAULT_TRAIN_START = "1997-01-01" DEFAULT_TRAIN_END = "2007-12-31" diff --git a/src/configuration/join_settings.py b/mlair/configuration/join_settings.py similarity index 100% rename from src/configuration/join_settings.py rename to mlair/configuration/join_settings.py diff --git a/src/configuration/path_config.py b/mlair/configuration/path_config.py similarity index 69% rename from src/configuration/path_config.py rename to mlair/configuration/path_config.py index 29dcd24e3626aca2ad3f24612399c24469eb3218..9b3d6f250d97d93dd1d06004690885f44de30073 100644 --- a/src/configuration/path_config.py +++ b/mlair/configuration/path_config.py @@ -6,7 +6,8 @@ import re import socket from typing import Tuple -ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +# ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +ROOT_PATH = os.getcwd() def prepare_host(create_new=True, data_path=None, sampling="daily") -> str: @@ -23,35 +24,38 @@ def prepare_host(create_new=True, data_path=None, sampling="daily") -> str: :return: full path to data """ - hostname = get_host() - user = getpass.getuser() - runner_regex = re.compile(r"runner-.*-project-2411-concurrent-\d+") - if hostname == "ZAM144": - path = f"/home/{user}/Data/toar_{sampling}/" - elif hostname == "zam347": - path = f"/home/{user}/Data/toar_{sampling}/" - elif hostname == "linux-aa9b": - path = f"/home/{user}/mlair/data/toar_{sampling}/" - elif (len(hostname) > 2) and (hostname[:2] == "jr"): - path = f"/p/project/cjjsc42/{user}/DATA/toar_{sampling}/" - elif (len(hostname) > 2) and (hostname[:2] in ['jw', 'ju'] or hostname[:5] in ['hdfml']): - path = f"/p/project/deepacf/intelliaq/{user}/DATA/toar_{sampling}/" - elif runner_regex.match(hostname) is not None: - path = f"/home/{user}/mlair/data/toar_{sampling}/" - else: - raise OSError(f"unknown host '{hostname}'") - if not os.path.exists(path): + if data_path is None: + hostname = get_host() + user = getpass.getuser() + runner_regex = re.compile(r"runner-.*-project-2411-concurrent-\d+") + if hostname == "ZAM144": + data_path = f"/home/{user}/Data/toar_{sampling}/" + elif hostname == "zam347": + data_path = f"/home/{user}/Data/toar_{sampling}/" + elif hostname == "linux-aa9b": + data_path = f"/home/{user}/mlair/data/toar_{sampling}/" + elif (len(hostname) > 2) and (hostname[:2] == "jr"): + data_path = f"/p/project/cjjsc42/{user}/DATA/toar_{sampling}/" + elif (len(hostname) > 2) and (hostname[:2] in ['jw', 'ju'] or hostname[:5] in ['hdfml']): + data_path = f"/p/project/deepacf/intelliaq/{user}/DATA/toar_{sampling}/" + elif runner_regex.match(hostname) is not None: + data_path = f"/home/{user}/mlair/data/toar_{sampling}/" + else: + data_path = os.path.join(os.getcwd(), "data", sampling) + # raise OSError(f"unknown host '{hostname}'") + + if not os.path.exists(data_path): try: if create_new: - check_path_and_create(path) - return path + check_path_and_create(data_path) + return data_path else: raise PermissionError except PermissionError: - raise NotADirectoryError(f"path '{path}' does not exist for host '{hostname}'.") + raise NotADirectoryError(f"path '{data_path}' does not exist for host '{hostname}'.") else: - logging.debug(f"set path to: {path}") - return path + logging.debug(f"set path to: {data_path}") + return data_path def set_experiment_path(name: str, path: str = None) -> str: diff --git a/src/data_handler/__init__.py b/mlair/data_handler/__init__.py similarity index 100% rename from src/data_handler/__init__.py rename to mlair/data_handler/__init__.py diff --git a/src/data_handler/advanced_data_handler.py b/mlair/data_handler/advanced_data_handler.py similarity index 98% rename from src/data_handler/advanced_data_handler.py rename to mlair/data_handler/advanced_data_handler.py index c9c25ca7a6ce765db2eb67d9b6b7d9144e54987a..31244540a81babdf3348e350473a0af3c2f06419 100644 --- a/src/data_handler/advanced_data_handler.py +++ b/mlair/data_handler/advanced_data_handler.py @@ -3,7 +3,7 @@ __author__ = 'Lukas Leufen' __date__ = '2020-07-08' -from src.helpers import to_list, remove_items +from mlair.helpers import to_list, remove_items import numpy as np import xarray as xr import pickle @@ -17,8 +17,8 @@ import copy from typing import Union, List, Tuple, Dict import logging from functools import reduce -from src.data_handler.station_preparation import StationPrep -from src.helpers.join import EmptyQueryResult +from mlair.data_handler.station_preparation import StationPrep +from mlair.helpers.join import EmptyQueryResult number = Union[float, int] diff --git a/src/data_handler/bootstraps.py b/mlair/data_handler/bootstraps.py similarity index 98% rename from src/data_handler/bootstraps.py rename to mlair/data_handler/bootstraps.py index 4ccc1350ee36df49dba0683eea896fc4ed398b60..91603b41822b92e28fbd077c502d84707fff746f 100644 --- a/src/data_handler/bootstraps.py +++ b/mlair/data_handler/bootstraps.py @@ -19,7 +19,7 @@ from itertools import chain import numpy as np import xarray as xr -from src.data_handler.advanced_data_handler import AbstractDataPreparation +from mlair.data_handler.advanced_data_handler import AbstractDataPreparation class BootstrapIterator(Iterator): diff --git a/src/data_handler/data_preparation_neighbors.py b/mlair/data_handler/data_preparation_neighbors.py similarity index 91% rename from src/data_handler/data_preparation_neighbors.py rename to mlair/data_handler/data_preparation_neighbors.py index 508716b14d085ab1bb2aaaeb02471480608b6a27..93d21f3ae2cb8a8b287bfc23f38b427bb56ec677 100644 --- a/src/data_handler/data_preparation_neighbors.py +++ b/mlair/data_handler/data_preparation_neighbors.py @@ -3,9 +3,9 @@ __author__ = 'Lukas Leufen' __date__ = '2020-07-17' -from src.helpers import to_list -from src.data_handler.station_preparation import StationPrep -from src.data_handler.advanced_data_handler import DefaultDataPreparation +from mlair.helpers import to_list +from mlair.data_handler.station_preparation import StationPrep +from mlair.data_handler.advanced_data_handler import DefaultDataPreparation import os from typing import Union, List @@ -53,7 +53,7 @@ if __name__ == "__main__": "sampling": 'daily', "target_dim": 'variables', "target_var": 'o3', - "interpolate_dim": 'datetime', + "interpolation_dim": 'datetime', "window_history_size": 7, "window_lead_time": 3, "neighbors": ["DEBW034"], diff --git a/src/data_handler/iterator.py b/mlair/data_handler/iterator.py similarity index 99% rename from src/data_handler/iterator.py rename to mlair/data_handler/iterator.py index 14047ba25bef4d2703d278dcd8047cb8ddff9a3e..49569405a587920da795820d48f8d968a8142cc7 100644 --- a/src/data_handler/iterator.py +++ b/mlair/data_handler/iterator.py @@ -65,7 +65,6 @@ class DataCollection(Iterable): return list(self._mapping.keys()) - class KerasIterator(keras.utils.Sequence): def __init__(self, collection: DataCollection, batch_size: int, batch_path: str, shuffle_batches: bool = False, diff --git a/src/data_handler/station_preparation.py b/mlair/data_handler/station_preparation.py similarity index 97% rename from src/data_handler/station_preparation.py rename to mlair/data_handler/station_preparation.py index 42d94e277415c637d4fc9a5262692a6b3150b0a7..57dd60653908d76815742324e4d78c3344a1465f 100644 --- a/src/data_handler/station_preparation.py +++ b/mlair/data_handler/station_preparation.py @@ -13,9 +13,9 @@ import numpy as np import pandas as pd import xarray as xr -from src.configuration import check_path_and_create -from src import helpers -from src.helpers import join, statistics +from mlair.configuration import check_path_and_create +from mlair import helpers +from mlair.helpers import join, statistics # define a more general date type for type hinting date = Union[dt.date, dt.datetime] @@ -39,7 +39,7 @@ class AbstractStationPrep(object): class StationPrep(AbstractStationPrep): def __init__(self, station, data_path, statistics_per_var, station_type, network, sampling, - target_dim, target_var, interpolate_dim, window_history_size, window_lead_time, + target_dim, target_var, interpolation_dim, window_history_size, window_lead_time, overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True, min_length: int = 0, start=None, end=None, **kwargs): super().__init__() # path, station, statistics_per_var, transformation, **kwargs) @@ -53,7 +53,7 @@ class StationPrep(AbstractStationPrep): self.sampling = sampling self.target_dim = target_dim self.target_var = target_var - self.interpolate_dim = interpolate_dim + self.interpolation_dim = interpolation_dim self.window_history_size = window_history_size self.window_lead_time = window_lead_time self.overwrite_local_data = overwrite_local_data @@ -99,7 +99,7 @@ class StationPrep(AbstractStationPrep): f"statistics_per_var={self.statistics_per_var}, " \ f"station_type='{self.station_type}', network='{self.network}', " \ f"sampling='{self.sampling}', target_dim='{self.target_dim}', target_var='{self.target_var}', " \ - f"interpolate_dim='{self.interpolate_dim}', window_history_size={self.window_history_size}, " \ + f"interpolate_dim='{self.interpolation_dim}', window_history_size={self.window_history_size}, " \ f"window_lead_time={self.window_lead_time}, overwrite_local_data={self.overwrite_local_data}, " \ f"transformation={self._print_transformation_as_string}, **{self.kwargs})" @@ -144,7 +144,7 @@ class StationPrep(AbstractStationPrep): return coords.rename(index={"station_lon": "lon", "station_lat": "lat"}).to_dict()[str(self)] def call_transform(self, inverse=False): - self.transform(dim=self.interpolate_dim, method=self.transformation["method"], + self.transform(dim=self.interpolation_dim, method=self.transformation["method"], mean=self.transformation['mean'], std=self.transformation["std"], min_val=self.transformation["min"], max_val=self.transformation["max"], inverse=inverse @@ -164,10 +164,10 @@ class StationPrep(AbstractStationPrep): self.make_samples() def make_samples(self): - self.make_history_window(self.target_dim, self.window_history_size, self.interpolate_dim) - self.make_labels(self.target_dim, self.target_var, self.interpolate_dim, self.window_lead_time) - self.make_observation(self.target_dim, self.target_var, self.interpolate_dim) - self.remove_nan(self.interpolate_dim) + self.make_history_window(self.target_dim, self.window_history_size, self.interpolation_dim) + self.make_labels(self.target_dim, self.target_var, self.interpolation_dim, self.window_lead_time) + self.make_observation(self.target_dim, self.target_var, self.interpolation_dim) + self.remove_nan(self.interpolation_dim) def read_data_from_disk(self, source_name=""): """ @@ -658,13 +658,13 @@ if __name__ == "__main__": sp = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', statistics_per_var=statistics_per_var, station_type='background', network='UBA', sampling='daily', target_dim='variables', target_var='o3', - interpolate_dim='datetime', window_history_size=7, window_lead_time=3, + interpolation_dim='datetime', window_history_size=7, window_lead_time=3, ) # transformation={'method': 'standardise'}) # sp.set_transformation({'method': 'standardise', 'mean': sp.mean+2, 'std': sp.std+1}) sp2 = StationPrep(data_path='/home/felix/PycharmProjects/mlt_new/data/', station='DEBY122', statistics_per_var=statistics_per_var, station_type='background', network='UBA', sampling='daily', target_dim='variables', target_var='o3', - interpolate_dim='datetime', window_history_size=7, window_lead_time=3, + interpolation_dim='datetime', window_history_size=7, window_lead_time=3, transformation={'method': 'standardise'}) sp2.transform(inverse=True) sp.get_X() diff --git a/src/helpers/__init__.py b/mlair/helpers/__init__.py similarity index 100% rename from src/helpers/__init__.py rename to mlair/helpers/__init__.py diff --git a/src/helpers/datastore.py b/mlair/helpers/datastore.py similarity index 100% rename from src/helpers/datastore.py rename to mlair/helpers/datastore.py diff --git a/src/helpers/helpers.py b/mlair/helpers/helpers.py similarity index 100% rename from src/helpers/helpers.py rename to mlair/helpers/helpers.py diff --git a/src/helpers/join.py b/mlair/helpers/join.py similarity index 98% rename from src/helpers/join.py rename to mlair/helpers/join.py index 7d9c3aad23c402ae63f26bdf998074a86e35ffbf..a3c6876e3ea43ff4d03243430cf6cd791d62dec2 100644 --- a/src/helpers/join.py +++ b/mlair/helpers/join.py @@ -9,8 +9,8 @@ from typing import Iterator, Union, List, Dict import pandas as pd import requests -from src import helpers -from src.configuration.join_settings import join_settings +from mlair import helpers +from mlair.configuration.join_settings import join_settings # join_url_base = 'https://join.fz-juelich.de/services/rest/surfacedata/' str_or_none = Union[str, None] diff --git a/src/helpers/logger.py b/mlair/helpers/logger.py similarity index 100% rename from src/helpers/logger.py rename to mlair/helpers/logger.py diff --git a/src/helpers/statistics.py b/mlair/helpers/statistics.py similarity index 100% rename from src/helpers/statistics.py rename to mlair/helpers/statistics.py diff --git a/src/helpers/testing.py b/mlair/helpers/testing.py similarity index 100% rename from src/helpers/testing.py rename to mlair/helpers/testing.py diff --git a/src/helpers/time_tracking.py b/mlair/helpers/time_tracking.py similarity index 100% rename from src/helpers/time_tracking.py rename to mlair/helpers/time_tracking.py diff --git a/mlair/model_modules/GUIDE.md b/mlair/model_modules/GUIDE.md new file mode 100644 index 0000000000000000000000000000000000000000..3cda63538b06a83afe9c0c20d9c6ef46d00633fe --- /dev/null +++ b/mlair/model_modules/GUIDE.md @@ -0,0 +1,49 @@ + +## Model Extensions + +### Inception Blocks + +MLAir provides an easy interface to add extensions. Specifically, the code comes with an extension for inception blocks +as proposed by Szegedy et al. (2014). Those inception blocks are a collection of multiple network towers. A tower is a +collection of successive (standard) layers and generally contains at least a padding layer, and one convolution or a +pooling layer. Additionally such towers can also contain an additional convolutional layer of kernel size 1x1 for +information compression (reduction of filter size), or batch normalisation layers. + +After initialising the the inception blocks by using *InceptionModelBase*, one can add an arbitrary number of +individual inception blocks. The initialisation sets all counters for internal naming conventions. + +The inception model requires two dictionaries as inputs specifying the convolutional and the pooling towers, +respectively. The convolutional dictionary contains dictionaries for each individual tower, allowing to use different +reduction filters, kernel and filter sizes of the main convolution and the activation function. + +See a description [here](https://towardsdatascience.com/a-simple-guide-to-the-versions-of-the-inception-network-7fc52b863202) +or take a look on the papers [Going Deeper with Convolutions (Szegedy et al., 2014)](https://arxiv.org/abs/1409.4842) +and [Network In Network (Lin et al., 2014)](https://arxiv.org/abs/1312.4400). + + +### Paddings + +For some network layers like convolutions, it is common to pad the input data to prevent shrinking of dimensions. In +classical image recognition tasks zero paddings are most often used. In the context of meteorology, a zero padding might +create artificial effects on the boundaries. We therefore adopted the symmetric and reflection padding layers from +*TensorFlow*, to be used as *Keras* layers. The layers are named *SymmetricPadding2D* and *ReflectionPadding2D*. Both +layers need the information on *padding* size. We provide a helper function to calculate the padding size given a +convolutional kernel size. + + + +Additionally, we provide the wrapper class *Padding2D*, which combines symmetric, refection and zero padding. This class +allows to switch between different types of padding while keeping the overall model structure untouched. + + + +This figure shows an example on how to easily apply the wrapper Padding2D and specify the *padding_type* (e.g. +"SymmetricPadding2D" or "ReflectionPadding2D"). The following table lists all padding types which are currently +supported. The padding wrapper can also handle other user specific padding types. + +| padding layer (long name) | short name | +|---------------------------|------------| +| ReflectionPadding2D* | RefPad2D | +| SymmetricPadding2D* | SymPad2D | +| ZeroPadding2D** | ZeroPad2D | +\* implemented in MLAir \** implemented in keras diff --git a/src/model_modules/__init__.py b/mlair/model_modules/__init__.py similarity index 57% rename from src/model_modules/__init__.py rename to mlair/model_modules/__init__.py index 35f4060886036d3f51c24b4480738566ff80a445..ea2067bdfdaacb6290157be681786212b0422812 100644 --- a/src/model_modules/__init__.py +++ b/mlair/model_modules/__init__.py @@ -1 +1,3 @@ """Collection of all modules that are related to a model.""" + +from .model_class import AbstractModelClass diff --git a/src/model_modules/advanced_paddings.py b/mlair/model_modules/advanced_paddings.py similarity index 100% rename from src/model_modules/advanced_paddings.py rename to mlair/model_modules/advanced_paddings.py diff --git a/src/model_modules/flatten.py b/mlair/model_modules/flatten.py similarity index 100% rename from src/model_modules/flatten.py rename to mlair/model_modules/flatten.py diff --git a/src/model_modules/inception_model.py b/mlair/model_modules/inception_model.py similarity index 99% rename from src/model_modules/inception_model.py rename to mlair/model_modules/inception_model.py index 74cd4d806f706a70d554adae468e7fa8c5de153e..d7354c37899bbb7d8f80bc76b4cd9237c7df96dc 100644 --- a/src/model_modules/inception_model.py +++ b/mlair/model_modules/inception_model.py @@ -6,7 +6,7 @@ import logging import keras import keras.layers as layers -from src.model_modules.advanced_paddings import PadUtils, ReflectionPadding2D, Padding2D +from mlair.model_modules.advanced_paddings import PadUtils, ReflectionPadding2D, Padding2D class InceptionModelBase: diff --git a/src/model_modules/keras_extensions.py b/mlair/model_modules/keras_extensions.py similarity index 99% rename from src/model_modules/keras_extensions.py rename to mlair/model_modules/keras_extensions.py index 479913811a668d8330a389b2876360f096f57dbf..33358e566ef80f28ee7740531b71d1a83abde115 100644 --- a/src/model_modules/keras_extensions.py +++ b/mlair/model_modules/keras_extensions.py @@ -13,7 +13,7 @@ import numpy as np from keras import backend as K from keras.callbacks import History, ModelCheckpoint, Callback -from src import helpers +from mlair import helpers class HistoryAdvanced(History): diff --git a/src/model_modules/linear_model.py b/mlair/model_modules/linear_model.py similarity index 100% rename from src/model_modules/linear_model.py rename to mlair/model_modules/linear_model.py diff --git a/src/model_modules/loss.py b/mlair/model_modules/loss.py similarity index 100% rename from src/model_modules/loss.py rename to mlair/model_modules/loss.py diff --git a/src/model_modules/model_class.py b/mlair/model_modules/model_class.py similarity index 94% rename from src/model_modules/model_class.py rename to mlair/model_modules/model_class.py index ca54840c8b995a4719041e2a8bc9ccd46351a89f..56e7b4c347a69781854a9cf8ad9a719f7d6ac8b9 100644 --- a/src/model_modules/model_class.py +++ b/mlair/model_modules/model_class.py @@ -2,7 +2,7 @@ Module for neural models to use during experiment. To work properly, each customised model needs to inherit from AbstractModelClass and needs an implementation of the -set_model and set_loss method. +set_model method. In this module, you can find some exemplary model classes that have been build and were running in a experiment. @@ -33,10 +33,11 @@ How to create a customised model? # apply to model self.set_model() - self.set_loss() - self.set_custom_objects(loss=self.loss) + self.set_compile_options() + self.set_custom_objects(loss=self.compile_options['loss']) -* Make sure to add the `super().__init__()` and at least `set_model()` and `set_loss()` to your custom init method. +* Make sure to add the `super().__init__()` and at least `set_model()` and `set_compile_options()` to your custom init + method. * If you have custom objects in your model, that are not part of keras, you need to add them to custom objects. To do this, call `set_custom_objects` with arbitrarily kwargs. In the shown example, the loss has been added, because it wasn't a standard loss. Apart from this, we always encourage you to add the loss as custom object, to prevent @@ -60,14 +61,20 @@ How to create a customised model? self.model = keras.Model(inputs=x_input, outputs=[out_main]) * Your are free, how to design your model. Just make sure to save it in the class attribute model. -* Finally, set your custom loss. +* Additionally, set your custom compile options including the loss. .. code-block:: python class MyCustomisedModel(AbstractModelClass): - def set_loss(self): + def set_compile_options(self): + self.initial_lr = 1e-2 + self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9) + self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, + drop=.94, + epochs_drop=10) self.loss = keras.losses.mean_squared_error + self.compile_options = {"metrics": ["mse", "mae"]} * If you have a branched model with multiple outputs, you need either set only a single loss for all branch outputs or to provide the same number of loss functions considering the right order. E.g. @@ -80,7 +87,7 @@ How to create a customised model? ... self.model = keras.Model(inputs=x_input, outputs=[out_minor_1, out_minor_2, out_main]) - def set_loss(self): + def set_compile_options(self): self.loss = [keras.losses.mean_absolute_error] + # for out_minor_1 [keras.losses.mean_squared_error] + # for out_minor_2 [keras.losses.mean_squared_error] # for out_main @@ -108,10 +115,9 @@ True """ -import src.model_modules.keras_extensions +import mlair.model_modules.keras_extensions __author__ = "Lukas Leufen, Felix Kleinert" -# __date__ = '2019-12-12' __date__ = '2020-05-12' from abc import ABC @@ -119,9 +125,9 @@ from typing import Any, Callable, Dict import keras import tensorflow as tf -from src.model_modules.inception_model import InceptionModelBase -from src.model_modules.flatten import flatten_tail -from src.model_modules.advanced_paddings import PadUtils, Padding2D +from mlair.model_modules.inception_model import InceptionModelBase +from mlair.model_modules.flatten import flatten_tail +from mlair.model_modules.advanced_paddings import PadUtils, Padding2D class AbstractModelClass(ABC): @@ -387,8 +393,8 @@ class MyLittleModel(AbstractModelClass): def set_compile_options(self): self.initial_lr = 1e-2 self.optimizer = keras.optimizers.adam(lr=self.initial_lr) - self.lr_decay = src.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, - epochs_drop=10) + self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, + epochs_drop=10) self.compile_options = {"loss": [keras.losses.mean_squared_error], "metrics": ["mse", "mae"]} @@ -451,8 +457,8 @@ class MyBranchedModel(AbstractModelClass): def set_compile_options(self): self.initial_lr = 1e-2 self.optimizer = keras.optimizers.SGD(lr=self.initial_lr, momentum=0.9) - self.lr_decay = src.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, - epochs_drop=10) + self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, + epochs_drop=10) self.compile_options = {"loss": [keras.losses.mean_absolute_error] + [keras.losses.mean_squared_error] + [ keras.losses.mean_squared_error], "metrics": ["mse", "mae"]} @@ -475,8 +481,8 @@ class MyTowerModel(AbstractModelClass): self.dropout_rate = 1e-2 self.regularizer = keras.regularizers.l2(0.1) self.initial_lr = 1e-2 - self.lr_decay = src.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, - epochs_drop=10) + self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, + epochs_drop=10) self.activation = keras.layers.PReLU # apply to model @@ -573,8 +579,8 @@ class MyPaperModel(AbstractModelClass): self.dropout_rate = .3 self.regularizer = keras.regularizers.l2(0.001) self.initial_lr = 1e-3 - self.lr_decay = src.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, - epochs_drop=10) + self.lr_decay = mlair.model_modules.keras_extensions.LearningRateDecay(base_lr=self.initial_lr, drop=.94, + epochs_drop=10) self.activation = keras.layers.ELU self.padding = "SymPad2D" diff --git a/src/plotting/__init__.py b/mlair/plotting/__init__.py similarity index 100% rename from src/plotting/__init__.py rename to mlair/plotting/__init__.py diff --git a/src/plotting/postprocessing_plotting.py b/mlair/plotting/postprocessing_plotting.py similarity index 98% rename from src/plotting/postprocessing_plotting.py rename to mlair/plotting/postprocessing_plotting.py index 284606a318f28b98ef7208ccbe8ccb27b53eb5b0..6e9a2a401f2990fe1fa7cfddf8711f37eda4bf48 100644 --- a/src/plotting/postprocessing_plotting.py +++ b/mlair/plotting/postprocessing_plotting.py @@ -18,9 +18,9 @@ import seaborn as sns import xarray as xr from matplotlib.backends.backend_pdf import PdfPages -from src import helpers -from src.data_handler.iterator import DataCollection -from src.helpers import TimeTrackingWrapper +from mlair import helpers +from mlair.data_handler.iterator import DataCollection +from mlair.helpers import TimeTrackingWrapper logging.getLogger('matplotlib').setLevel(logging.WARNING) @@ -256,14 +256,28 @@ class PlotStationMap(AbstractPlotClass): from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER fig = plt.figure(figsize=(10, 5)) self._ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree()) - self._ax.set_extent([4, 17, 44, 58], crs=ccrs.PlateCarree()) self._gl = self._ax.gridlines(xlocs=range(0, 21, 5), ylocs=range(44, 59, 2), draw_labels=True) self._gl.xformatter = LONGITUDE_FORMATTER self._gl.yformatter = LATITUDE_FORMATTER self._draw_background() self._plot_stations(generators) + self._adjust_extent() plt.tight_layout() + def _adjust_extent(self): + import cartopy.crs as ccrs + + def diff(arr): + return arr[1] - arr[0], arr[3] - arr[2] + + def find_ratio(delta, reference=5): + return max(abs(reference / delta[0]), abs(reference / delta[1])) + + extent = self._ax.get_extent(crs=ccrs.PlateCarree()) + ratio = find_ratio(diff(extent)) + new_extent = extent + np.array([-1, 1, -1, 1]) * ratio + self._ax.set_extent(new_extent, crs=ccrs.PlateCarree()) + @TimeTrackingWrapper class PlotConditionalQuantiles(AbstractPlotClass): diff --git a/src/plotting/tracker_plot.py b/mlair/plotting/tracker_plot.py similarity index 99% rename from src/plotting/tracker_plot.py rename to mlair/plotting/tracker_plot.py index 20db5d9d9f22df548b1d499c4e8e0faa3fbfa1ee..406c32feb1ebda2d32d886051e32778d6c17f5db 100644 --- a/src/plotting/tracker_plot.py +++ b/mlair/plotting/tracker_plot.py @@ -4,7 +4,7 @@ import numpy as np import os from typing import Union, List, Optional, Dict -from src.helpers import to_list +from mlair.helpers import to_list from matplotlib import pyplot as plt, lines as mlines, ticker as ticker from matplotlib.patches import Rectangle diff --git a/src/plotting/training_monitoring.py b/mlair/plotting/training_monitoring.py similarity index 98% rename from src/plotting/training_monitoring.py rename to mlair/plotting/training_monitoring.py index 473b966ce52ee7e2885bc14beef2e68b8835b15e..913c11dd8a4e0d23c2bde6864c12f17c65922644 100644 --- a/src/plotting/training_monitoring.py +++ b/mlair/plotting/training_monitoring.py @@ -10,7 +10,7 @@ import matplotlib import matplotlib.pyplot as plt import pandas as pd -from src.model_modules.keras_extensions import LearningRateDecay +from mlair.model_modules.keras_extensions import LearningRateDecay matplotlib.use('Agg') history_object = Union[Dict, keras.callbacks.History] diff --git a/src/run_modules/README.md b/mlair/run_modules/README.md similarity index 100% rename from src/run_modules/README.md rename to mlair/run_modules/README.md diff --git a/mlair/run_modules/__init__.py b/mlair/run_modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ba38d3e90fb5d66c4129f6645ef34b8137e48375 --- /dev/null +++ b/mlair/run_modules/__init__.py @@ -0,0 +1,7 @@ +from mlair.run_modules.experiment_setup import ExperimentSetup +from mlair.run_modules.model_setup import ModelSetup +from mlair.run_modules.partition_check import PartitionCheck +from mlair.run_modules.post_processing import PostProcessing +from mlair.run_modules.pre_processing import PreProcessing +from mlair.run_modules.run_environment import RunEnvironment +from mlair.run_modules.training import Training diff --git a/src/run_modules/experiment_setup.py b/mlair/run_modules/experiment_setup.py similarity index 95% rename from src/run_modules/experiment_setup.py rename to mlair/run_modules/experiment_setup.py index 6fec871327d218cb2f42b84c518807c558c9c53d..9b74c473c3c1bf33efdbff3a8f38ee482250cbed 100644 --- a/src/run_modules/experiment_setup.py +++ b/mlair/run_modules/experiment_setup.py @@ -6,21 +6,21 @@ import logging import os from typing import Union, Dict, Any, List -from src.configuration import path_config -from src import helpers -from src.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_NETWORK, DEFAULT_STATION_TYPE, \ +from mlair.configuration import path_config +from mlair import helpers +from mlair.configuration.defaults import DEFAULT_STATIONS, DEFAULT_VAR_ALL_DICT, DEFAULT_NETWORK, DEFAULT_STATION_TYPE, \ DEFAULT_START, DEFAULT_END, DEFAULT_WINDOW_HISTORY_SIZE, DEFAULT_OVERWRITE_LOCAL_DATA, DEFAULT_TRANSFORMATION, \ DEFAULT_HPC_LOGIN_LIST, DEFAULT_HPC_HOST_LIST, DEFAULT_CREATE_NEW_MODEL, DEFAULT_TRAINABLE, \ DEFAULT_FRACTION_OF_TRAINING, DEFAULT_EXTREME_VALUES, DEFAULT_EXTREMES_ON_RIGHT_TAIL_ONLY, DEFAULT_PERMUTE_DATA, \ DEFAULT_BATCH_SIZE, DEFAULT_EPOCHS, DEFAULT_TARGET_VAR, DEFAULT_TARGET_DIM, DEFAULT_WINDOW_LEAD_TIME, \ - DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATE_DIM, DEFAULT_INTERPOLATE_METHOD, DEFAULT_LIMIT_NAN_FILL, \ + DEFAULT_DIMENSIONS, DEFAULT_INTERPOLATION_DIM, DEFAULT_INTERPOLATION_METHOD, DEFAULT_LIMIT_NAN_FILL, \ DEFAULT_TRAIN_START, DEFAULT_TRAIN_END, DEFAULT_TRAIN_MIN_LENGTH, DEFAULT_VAL_START, DEFAULT_VAL_END, \ DEFAULT_VAL_MIN_LENGTH, DEFAULT_TEST_START, DEFAULT_TEST_END, DEFAULT_TEST_MIN_LENGTH, DEFAULT_TRAIN_VAL_MIN_LENGTH, \ DEFAULT_USE_ALL_STATIONS_ON_ALL_DATA_SETS, DEFAULT_EVALUATE_BOOTSTRAPS, DEFAULT_CREATE_NEW_BOOTSTRAPS, \ DEFAULT_NUMBER_OF_BOOTSTRAPS, DEFAULT_PLOT_LIST -from src.data_handler.advanced_data_handler import DefaultDataPreparation -from src.run_modules.run_environment import RunEnvironment -from src.model_modules.model_class import MyLittleModel as VanillaModel +from mlair.data_handler.advanced_data_handler import DefaultDataPreparation +from mlair.run_modules.run_environment import RunEnvironment +from mlair.model_modules.model_class import MyLittleModel as VanillaModel class ExperimentSetup(RunEnvironment): @@ -66,8 +66,8 @@ class ExperimentSetup(RunEnvironment): # interpolation self._set_param("dimensions", dimensions, default={'new_index': ['datetime', 'Stations']}) - self._set_param("interpolate_dim", interpolate_dim, default='datetime') - self._set_param("interpolate_method", interpolate_method, default='linear') + self._set_param("interpolation_dim", interpolation_dim, default='datetime') + self._set_param("interpolation_method", interpolation_method, default='linear') self._set_param("limit_nan_fill", limit_nan_fill, default=1) # train set parameters @@ -140,8 +140,8 @@ class ExperimentSetup(RunEnvironment): :param window_lead_time: number of time steps to predict by model (default 3). Time steps `t_0+1` to `t_0+w` are predicted. :param dimensions: - :param interpolate_dim: - :param interpolate_method: + :param interpolation_dim: + :param interpolation_method: :param limit_nan_fill: :param train_start: :param train_end: @@ -220,8 +220,8 @@ class ExperimentSetup(RunEnvironment): target_dim=None, window_lead_time: int = None, dimensions=None, - interpolate_dim=None, - interpolate_method=None, + interpolation_dim=None, + interpolation_method=None, limit_nan_fill=None, train_start=None, train_end=None, val_start=None, val_end=None, test_start=None, test_end=None, use_all_stations_on_all_data_sets=None, trainable: bool = None, fraction_of_train: float = None, experiment_path=None, plot_path: str = None, forecast_path: str = None, overwrite_local_data = None, sampling: str = "daily", @@ -309,8 +309,8 @@ class ExperimentSetup(RunEnvironment): # interpolation self._set_param("dimensions", dimensions, default=DEFAULT_DIMENSIONS) - self._set_param("interpolate_dim", interpolate_dim, default=DEFAULT_INTERPOLATE_DIM) - self._set_param("interpolate_method", interpolate_method, default=DEFAULT_INTERPOLATE_METHOD) + self._set_param("interpolation_dim", interpolation_dim, default=DEFAULT_INTERPOLATION_DIM) + self._set_param("interpolation_method", interpolation_method, default=DEFAULT_INTERPOLATION_METHOD) self._set_param("limit_nan_fill", limit_nan_fill, default=DEFAULT_LIMIT_NAN_FILL) # train set parameters diff --git a/src/run_modules/model_setup.py b/mlair/run_modules/model_setup.py similarity index 77% rename from src/run_modules/model_setup.py rename to mlair/run_modules/model_setup.py index ea6199c9f0371334083604b6b1637542a9b26f7e..8a2951ef336680b9515b1f6538ab3190ef61035c 100644 --- a/src/run_modules/model_setup.py +++ b/mlair/run_modules/model_setup.py @@ -5,12 +5,15 @@ __date__ = '2019-12-02' import logging import os +import re import keras +import pandas as pd import tensorflow as tf -from src.model_modules.keras_extensions import HistoryAdvanced, CallbackHandler -from src.run_modules.run_environment import RunEnvironment +from mlair.model_modules.keras_extensions import HistoryAdvanced, CallbackHandler +from mlair.run_modules.run_environment import RunEnvironment +from mlair.configuration import path_config class ModelSetup(RunEnvironment): @@ -86,6 +89,9 @@ class ModelSetup(RunEnvironment): # compile model self.compile_model() + # report settings + self.report_model() + def _set_shapes(self): """Set input and output shapes from train collection.""" shape = list(map(lambda x: x.shape[1:], self.data_store.get("data_collection", "train")[0].get_X())) @@ -147,3 +153,30 @@ class ModelSetup(RunEnvironment): with tf.device("/cpu:0"): file_name = f"{self.model_name.rsplit('.', 1)[0]}.pdf" keras.utils.plot_model(self.model, to_file=file_name, show_shapes=True, show_layer_names=True) + + def report_model(self): + model_settings = self.model.get_settings() + model_settings.update(self.model.compile_options) + df = pd.DataFrame(columns=["model setting"]) + for k, v in model_settings.items(): + if v is None: + continue + if isinstance(v, list): + v = ",".join(self._clean_name(str(u)) for u in v) + if "<" in str(v): + v = self._clean_name(str(v)) + df.loc[k] = v + df.sort_index(inplace=True) + column_format = "ll" + path = os.path.join(self.data_store.get("experiment_path"), "latex_report") + path_config.check_path_and_create(path) + df.to_latex(os.path.join(path, "model_settings.tex"), na_rep='---', column_format=column_format) + df.to_markdown(open(os.path.join(path, "model_settings.md"), mode="w", encoding='utf-8'), + tablefmt="github") + + @staticmethod + def _clean_name(orig_name: str): + mod_name = re.sub(r'^{0}'.format(re.escape("<")), '', orig_name).replace("'", "").split(" ") + mod_name = mod_name[1] if any(map(lambda x: x in mod_name[0], ["class", "function", "method"])) else mod_name[0] + return mod_name[:-1] if mod_name[-1] == ">" else mod_name + diff --git a/src/run_modules/partition_check.py b/mlair/run_modules/partition_check.py similarity index 93% rename from src/run_modules/partition_check.py rename to mlair/run_modules/partition_check.py index 8f4c703e6b94f11905121d93c44dd8bf583abdec..c45f350079756282fbb43a1732d256c960f9e274 100644 --- a/src/run_modules/partition_check.py +++ b/mlair/run_modules/partition_check.py @@ -1,7 +1,7 @@ __author__ = "Felix Kleinert" __date__ = '2020-04-07' -from src.run_modules.run_environment import RunEnvironment +from mlair.run_modules.run_environment import RunEnvironment class PartitionCheck(RunEnvironment): diff --git a/src/run_modules/post_processing.py b/mlair/run_modules/post_processing.py similarity index 97% rename from src/run_modules/post_processing.py rename to mlair/run_modules/post_processing.py index eca3cebbb9a0152faec932c99ebaa5029d9029c3..c781d593d9bf8d8747ebc823fc15038c083ac81a 100644 --- a/src/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -13,14 +13,14 @@ import numpy as np import pandas as pd import xarray as xr -from src.data_handler import BootStraps, KerasIterator -from src.helpers.datastore import NameNotFoundInDataStore -from src.helpers import TimeTracking, statistics, extract_value -from src.model_modules.linear_model import OrdinaryLeastSquaredModel -from src.model_modules.model_class import AbstractModelClass -from src.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \ +from mlair.data_handler import BootStraps, KerasIterator +from mlair.helpers.datastore import NameNotFoundInDataStore +from mlair.helpers import TimeTracking, statistics, extract_value +from mlair.model_modules.linear_model import OrdinaryLeastSquaredModel +from mlair.model_modules.model_class import AbstractModelClass +from mlair.plotting.postprocessing_plotting import PlotMonthlySummary, PlotStationMap, PlotClimatologicalSkillScore, \ PlotCompetitiveSkillScore, PlotTimeSeries, PlotBootstrapSkillScore, PlotAvailability, PlotConditionalQuantiles -from src.run_modules.run_environment import RunEnvironment +from mlair.run_modules.run_environment import RunEnvironment class PostProcessing(RunEnvironment): @@ -264,7 +264,7 @@ class PostProcessing(RunEnvironment): path = self.data_store.get("forecast_path") plot_list = self.data_store.get("plot_list", "postprocessing") - time_dimension = self.data_store.get("interpolate_dim") + time_dimension = self.data_store.get("interpolation_dim") if self.bootstrap_skill_scores is not None and "PlotBootstrapSkillScore" in plot_list: PlotBootstrapSkillScore(self.bootstrap_skill_scores, plot_folder=self.plot_path, model_setup="CNN") @@ -317,7 +317,7 @@ class PostProcessing(RunEnvironment): be found inside `forecast_path`. """ logging.debug("start make_prediction") - time_dimension = self.data_store.get("interpolate_dim") + time_dimension = self.data_store.get("interpolation_dim") for i, data in enumerate(self.test_data): input_data = data.get_X() target_data = data.get_Y(as_numpy=False) diff --git a/src/run_modules/pre_processing.py b/mlair/run_modules/pre_processing.py similarity index 97% rename from src/run_modules/pre_processing.py rename to mlair/run_modules/pre_processing.py index 4b6de8253a58ce0b65184ae506f198a8a6b17aad..6b8c0f3c7003b194265308d580ba0f2b4df76df1 100644 --- a/src/run_modules/pre_processing.py +++ b/mlair/run_modules/pre_processing.py @@ -10,13 +10,13 @@ from typing import Tuple, Dict, List import numpy as np import pandas as pd -from src.data_handler import DataCollection -from src.helpers import TimeTracking -from src.configuration import path_config -from src.helpers.join import EmptyQueryResult -from src.run_modules.run_environment import RunEnvironment +from mlair.data_handler import DataCollection +from mlair.helpers import TimeTracking +from mlair.configuration import path_config +from mlair.helpers.join import EmptyQueryResult +from mlair.run_modules.run_environment import RunEnvironment -DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolate_dim", "target_dim", "target_var"] +DEFAULT_ARGS_LIST = ["data_path", "stations", "variables", "interpolation_dim", "target_dim", "target_var"] DEFAULT_KWARGS_LIST = ["limit_nan_fill", "window_history_size", "window_lead_time", "statistics_per_var", "min_length", "station_type", "overwrite_local_data", "start", "end", "sampling", "transformation", "extreme_values", "extremes_on_right_tail_only", "network", "data_preparation"] @@ -203,8 +203,8 @@ class PreProcessing(RunEnvironment): loading time are logged in debug mode. :param args: Dictionary with required parameters for DataGenerator class (`data_path`, `network`, `stations`, - `variables`, `interpolate_dim`, `target_dim`, `target_var`). - :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolate_method`, + `variables`, `interpolation_dim`, `target_dim`, `target_var`). + :param kwargs: positional parameters for the DataGenerator class (e.g. `start`, `interpolation_method`, `window_lead_time`). :param all_stations: All stations to check. :param name: name to display in the logging info message diff --git a/src/run_modules/run_environment.py b/mlair/run_modules/run_environment.py similarity index 86% rename from src/run_modules/run_environment.py rename to mlair/run_modules/run_environment.py index 45d0a4a019b305d477838bd9ec4c5b6f920ac6fb..ecb55282f25c369d6f5eddd81907a7d28ec7d62b 100644 --- a/src/run_modules/run_environment.py +++ b/mlair/run_modules/run_environment.py @@ -9,11 +9,11 @@ import os import shutil import time -from src.helpers.datastore import DataStoreByScope as DataStoreObject -from src.helpers.datastore import NameNotFoundInDataStore -from src.helpers import Logger -from src.helpers import TimeTracking -from src.plotting.tracker_plot import TrackPlot +from mlair.helpers.datastore import DataStoreByScope as DataStoreObject +from mlair.helpers.datastore import NameNotFoundInDataStore +from mlair.helpers import Logger +from mlair.helpers import TimeTracking +from mlair.plotting.tracker_plot import TrackPlot class RunEnvironment(object): @@ -88,12 +88,16 @@ class RunEnvironment(object): # set data store and logger (both are mutable!) del_by_exit = False - data_store = DataStoreObject() - logger = Logger() + data_store = None + logger = None tracker_list = [] def __init__(self): """Start time tracking automatically and logs as info.""" + if RunEnvironment.data_store is None: + RunEnvironment.data_store = DataStoreObject() + if RunEnvironment.logger is None: + RunEnvironment.logger = Logger() self.time = TimeTracking() logging.info(f"{self.__class__.__name__} started") # atexit.register(self.__del__) @@ -117,7 +121,7 @@ class RunEnvironment(object): try: self.__plot_tracking() self.__save_tracking() - self.__copy_log_file() + self.__move_log_file() except FileNotFoundError: pass self.data_store.clear_data_store() @@ -132,11 +136,15 @@ class RunEnvironment(object): logging.error(exc_val, exc_info=(exc_type, exc_val, exc_tb)) self.__del__() - def __copy_log_file(self): + def __move_log_file(self): try: new_file = self.__find_file_pattern("logging_%03i.log") - logging.info(f"Copy log file to {new_file}") - shutil.copyfile(self.logger.log_file, new_file) + logging.info(f"Move log file to {new_file}") + shutil.move(self.logger.log_file, new_file) + try: + os.rmdir(os.path.dirname(self.logger.log_file)) + except (OSError, FileNotFoundError): + pass except (NameNotFoundInDataStore, FileNotFoundError): pass diff --git a/src/run_modules/training.py b/mlair/run_modules/training.py similarity index 89% rename from src/run_modules/training.py rename to mlair/run_modules/training.py index 8148475c9350d7bf6965b576f4f91ad0eeeabace..f8909e15341f959455b1e8da0b0cb7502bdfa81b 100644 --- a/src/run_modules/training.py +++ b/mlair/run_modules/training.py @@ -11,10 +11,11 @@ from typing import Union import keras from keras.callbacks import Callback, History -from src.data_handler import KerasIterator -from src.model_modules.keras_extensions import CallbackHandler -from src.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate -from src.run_modules.run_environment import RunEnvironment +from mlair.data_handler import KerasIterator +from mlair.model_modules.keras_extensions import CallbackHandler +from mlair.plotting.training_monitoring import PlotModelHistory, PlotModelLearningRate +from mlair.run_modules.run_environment import RunEnvironment +from mlair.configuration import path_config class Training(RunEnvironment): @@ -82,6 +83,7 @@ class Training(RunEnvironment): if self._trainable: self.train() self.save_model() + self.report_training() else: logging.info("No training has started, because trainable parameter was false.") @@ -228,3 +230,20 @@ class Training(RunEnvironment): # plot learning rate if lr_sc: PlotModelLearningRate(filename=os.path.join(path, f"{name}_history_learning_rate.pdf"), lr_sc=lr_sc) + + def report_training(self): + data = {"mini batches": len(self.train_set), + "upsampling extremes": self.train_set.upsampling, + "shuffling": self.train_set.shuffle, + "created new model": self._create_new_model, + "epochs": self.epochs, + "batch size": self.batch_size} + import pandas as pd + df = pd.DataFrame.from_dict(data, orient="index", columns=["training setting"]) + df.sort_index(inplace=True) + column_format = "ll" + path = os.path.join(self.data_store.get("experiment_path"), "latex_report") + path_config.check_path_and_create(path) + df.to_latex(os.path.join(path, "training_settings.tex"), na_rep='---', column_format=column_format) + df.to_markdown(open(os.path.join(path, "training_settings.md"), mode="w", encoding='utf-8'), + tablefmt="github") \ No newline at end of file diff --git a/src/run.py b/mlair/run_script.py similarity index 97% rename from src/run.py rename to mlair/run_script.py index 1244c25d1b67d1f80b7da2b1e18210186ac3a9f0..a4451c6bda3cea1d6e1f433750984d1e40b583f0 100644 --- a/src/run.py +++ b/mlair/run_script.py @@ -1,7 +1,7 @@ __author__ = "Lukas Leufen" __date__ = '2020-06-29' -from src.workflows import DefaultWorkflow +from mlair.workflows import DefaultWorkflow import inspect diff --git a/mlair/workflows/__init__.py b/mlair/workflows/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..27c060f10975d86aa35c1f2d45e66966002ecd63 --- /dev/null +++ b/mlair/workflows/__init__.py @@ -0,0 +1,2 @@ +from mlair.workflows.abstract_workflow import Workflow +from mlair.workflows.default_workflow import DefaultWorkflow, DefaultWorkflowHPC \ No newline at end of file diff --git a/src/workflows/abstract_workflow.py b/mlair/workflows/abstract_workflow.py similarity index 97% rename from src/workflows/abstract_workflow.py rename to mlair/workflows/abstract_workflow.py index 350008eace4598567779228b1302a83c7375fd06..d3fe480fdfe09393fbf2051d8795735e9217a8ad 100644 --- a/src/workflows/abstract_workflow.py +++ b/mlair/workflows/abstract_workflow.py @@ -5,7 +5,7 @@ __date__ = '2020-06-26' from collections import OrderedDict -from src import RunEnvironment +from mlair import RunEnvironment class Workflow: diff --git a/src/workflows/default_workflow.py b/mlair/workflows/default_workflow.py similarity index 94% rename from src/workflows/default_workflow.py rename to mlair/workflows/default_workflow.py index bbad7428cb4ffa81e968420332caaaca7925fdc5..f42c0389d81f655fb0c8582a15e42acc853f757d 100644 --- a/src/workflows/default_workflow.py +++ b/mlair/workflows/default_workflow.py @@ -4,9 +4,9 @@ __author__ = "Lukas Leufen" __date__ = '2020-06-26' import inspect -from src.helpers import remove_items -from src.run_modules import ExperimentSetup, PreProcessing, PartitionCheck, ModelSetup, Training, PostProcessing -from src.workflows.abstract_workflow import Workflow +from mlair.helpers import remove_items +from mlair.run_modules import ExperimentSetup, PreProcessing, PartitionCheck, ModelSetup, Training, PostProcessing +from mlair.workflows.abstract_workflow import Workflow class DefaultWorkflow(Workflow): diff --git a/run.py b/run.py index a9d8190628e1692c4b2812d3c8790bccd6b1b589..15f30a7ee775948fa744832a464562cd40c3e460 100644 --- a/run.py +++ b/run.py @@ -2,7 +2,7 @@ __author__ = "Lukas Leufen" __date__ = '2020-06-29' import argparse -from src.workflows import DefaultWorkflow +from mlair.workflows import DefaultWorkflow def main(parser_args): diff --git a/run_HPC.py b/run_HPC.py index fc2ead406469f0a254f5819e43c1e0d3542bb8d9..d6dbb4dc61e88a1e139b3cbe549bc6a3f2f0ab8a 100644 --- a/run_HPC.py +++ b/run_HPC.py @@ -2,7 +2,7 @@ __author__ = "Lukas Leufen" __date__ = '2020-06-29' import argparse -from src.workflows import DefaultWorkflowHPC +from mlair.workflows import DefaultWorkflowHPC def main(parser_args): diff --git a/run_hourly.py b/run_hourly.py index 682988f6f730d02be713c074dd63fc732e2868dc..b831cf1e1ee733a3c652c6cea364013b44cf2c0d 100644 --- a/run_hourly.py +++ b/run_hourly.py @@ -3,7 +3,7 @@ __date__ = '2019-11-14' import argparse -from src.workflows import DefaultWorkflow +from mlair.workflows import DefaultWorkflow def main(parser_args): diff --git a/run_zam347.py b/run_zam347.py index 2d351a8925e67b0bdfc010e92a3937435e160b2f..9027bec807ad9beafcdac573a70aa32d34491034 100644 --- a/run_zam347.py +++ b/run_zam347.py @@ -5,13 +5,13 @@ import argparse import json import logging -from src.run_modules.experiment_setup import ExperimentSetup -from src.run_modules.model_setup import ModelSetup -from src.run_modules.post_processing import PostProcessing -from src.run_modules.pre_processing import PreProcessing -from src.run_modules.run_environment import RunEnvironment -from src.run_modules.training import Training -from src.workflows import DefaultWorkflowHPC +from mlair.run_modules.experiment_setup import ExperimentSetup +from mlair.run_modules.model_setup import ModelSetup +from mlair.run_modules.post_processing import PostProcessing +from mlair.run_modules.pre_processing import PreProcessing +from mlair.run_modules.run_environment import RunEnvironment +from mlair.run_modules.training import Training +from mlair.workflows import DefaultWorkflowHPC def load_stations(): diff --git a/setup.py b/setup.py index 8e08e921f5fb728f7b1758e4bb385efc7d71c29b..f708febb5a70c957a91059d840a1f4e140ad35c0 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import setuptools -from src import __version__, __author__, __email__ +from mlair import __version__, __author__, __email__ with open("README.md", "r") as fh: @@ -9,7 +9,7 @@ with open("README.md", "r") as fh: setuptools.setup( - name="mlt", + name="mlair", version=__version__, author=__author__, author_email=__email__, @@ -17,8 +17,7 @@ setuptools.setup( long_description=long_description, long_description_content_type="text/markdown", url="https://gitlab.version.fz-juelich.de/toar/machinelearningtools", - package_dir={'': 'src'}, - packages=setuptools.find_packages(where="src"), + packages=setuptools.find_packages(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", # to be adjusted diff --git a/src/run_modules/__init__.py b/src/run_modules/__init__.py deleted file mode 100644 index 0c70ae4205ff38fdc876538c42c44ca0bc8cb9c0..0000000000000000000000000000000000000000 --- a/src/run_modules/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from src.run_modules.experiment_setup import ExperimentSetup -from src.run_modules.model_setup import ModelSetup -from src.run_modules.partition_check import PartitionCheck -from src.run_modules.post_processing import PostProcessing -from src.run_modules.pre_processing import PreProcessing -from src.run_modules.run_environment import RunEnvironment -from src.run_modules.training import Training diff --git a/src/workflows/__init__.py b/src/workflows/__init__.py deleted file mode 100644 index 57e514cd9ced32fbf1dbb290b1008deffcec52d3..0000000000000000000000000000000000000000 --- a/src/workflows/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from src.workflows.abstract_workflow import Workflow -from src.workflows.default_workflow import DefaultWorkflow, DefaultWorkflowHPC \ No newline at end of file diff --git a/test/test_configuration/test_path_config.py b/test/test_configuration/test_path_config.py index 055036ec7809a103ae2320203f262d4beebb01ef..b97763632922fc2aaffaf267cfbc76ff99e25b6f 100644 --- a/test/test_configuration/test_path_config.py +++ b/test/test_configuration/test_path_config.py @@ -4,9 +4,9 @@ import os import mock import pytest -from src.configuration import prepare_host, set_experiment_name, set_bootstrap_path, check_path_and_create, \ +from mlair.configuration import prepare_host, set_experiment_name, set_bootstrap_path, check_path_and_create, \ set_experiment_path, ROOT_PATH -from src.helpers import PyTestRegex +from mlair.helpers import PyTestRegex class TestPrepareHost: @@ -25,13 +25,11 @@ class TestPrepareHost: @mock.patch("socket.gethostname", return_value="NotExistingHostName") @mock.patch("getpass.getuser", return_value="zombie21") - def test_error_handling_unknown_host(self, mock_user, mock_host): - with pytest.raises(OSError) as e: - prepare_host() - assert "unknown host 'NotExistingHostName'" in e.value.args[0] + def test_prepare_host_unknown(self, mock_user, mock_host): + assert prepare_host() == os.path.join(os.path.abspath(os.getcwd()), 'data', 'daily') @mock.patch("getpass.getuser", return_value="zombie21") - @mock.patch("src.configuration.path_config.check_path_and_create", side_effect=PermissionError) + @mock.patch("mlair.configuration.path_config.check_path_and_create", side_effect=PermissionError) @mock.patch("os.path.exists", return_value=False) def test_error_handling(self, mock_path_exists, mock_cpath, mock_user): # if "runner-6HmDp9Qd-project-2411-concurrent" not in platform.node(): diff --git a/test/test_data_handler/old_t_bootstraps.py b/test/test_data_handler/old_t_bootstraps.py index 90e446d6a6234a84e7d4dfc0d981365c87c93680..9616ed3f457d74e44e8a9eae5a3ed862fa804011 100644 --- a/test/test_data_handler/old_t_bootstraps.py +++ b/test/test_data_handler/old_t_bootstraps.py @@ -7,7 +7,7 @@ import numpy as np import pytest import xarray as xr -from src.data_handler.bootstraps import BootStraps +from mlair.data_handler.bootstraps import BootStraps from src.data_handler import DataPrepJoin @@ -73,7 +73,7 @@ class TestCreateShuffledData: return CreateShuffledData(orig_generator, 20, data_path) @pytest.fixture - @mock.patch("src.data_handling.bootstraps.CreateShuffledData.create_shuffled_data", return_value=None) + @mock.patch("mlair.data_handling.bootstraps.CreateShuffledData.create_shuffled_data", return_value=None) def shuffled_data_no_creation(self, mock_create_shuffle_data, orig_generator, data_path): return CreateShuffledData(orig_generator, 20, data_path) @@ -174,7 +174,7 @@ class TestBootStraps: return BootStraps(orig_generator, data_path, 20) @pytest.fixture - @mock.patch("src.data_handling.bootstraps.CreateShuffledData", return_value=None) + @mock.patch("mlair.data_handling.bootstraps.CreateShuffledData", return_value=None) def bootstrap_no_shuffling(self, mock_create_shuffle_data, orig_generator, data_path): shutil.rmtree(data_path) return BootStraps(orig_generator, data_path, 20) @@ -211,7 +211,7 @@ class TestBootStraps: assert xr.testing.assert_equal(gen.history, expected.sel(variables=var_others)) is None assert gen.shuffled.variables == "o3" - @mock.patch("src.data_handling.data_generator.DataGenerator._load_pickle_data", side_effect=FileNotFoundError) + @mock.patch("mlair.data_handling.data_generator.DataGenerator._load_pickle_data", side_effect=FileNotFoundError) def test_get_generator_different_generator(self, mock_load_pickle, data_path, orig_generator): BootStraps(orig_generator, data_path, 20) # to create orig_generator.window_history_size = 4 diff --git a/test/test_data_handler/old_t_data_generator.py b/test/test_data_handler/old_t_data_generator.py index 4d3939283802fdfd35a9fcc099c6751a99ee8dfb..cd2a849ec2d24af940fcf5731597cc8e9a16f517 100644 --- a/test/test_data_handler/old_t_data_generator.py +++ b/test/test_data_handler/old_t_data_generator.py @@ -6,8 +6,8 @@ import numpy as np import pytest import xarray as xr -from src.data_handler import DataPrepJoin -from src.helpers.join import EmptyQueryResult +from mlair.data_hander import DataPrepJoin +from mlair.helpers.join import EmptyQueryResult class TestDataGenerator: @@ -79,10 +79,10 @@ class TestDataGenerator: assert gen.stations == ['DEBW107'] assert gen.variables == ['o3', 'temp'] assert gen.station_type is None - assert gen.interpolate_dim == 'datetime' + assert gen.interpolation_dim == 'datetime' assert gen.target_dim == 'variables' assert gen.target_var == 'o3' - assert gen.interpolate_method == "linear" + assert gen.interpolation_method == "linear" assert gen.limit_nan_fill == 1 assert gen.window_history_size == 7 assert gen.window_lead_time == 4 @@ -92,7 +92,7 @@ class TestDataGenerator: def test_repr(self, gen): path = os.path.join(os.path.dirname(__file__), 'data') assert gen.__repr__().rstrip() == f"DataGenerator(path='{path}', stations=['DEBW107'], " \ - f"variables=['o3', 'temp'], station_type=None, interpolate_dim='datetime', " \ + f"variables=['o3', 'temp'], station_type=None, interpolation_dim='datetime', " \ f"target_dim='variables', target_var='o3', **{{'start': 2010, 'end': 2014}})" \ .rstrip() diff --git a/test/test_data_handler/old_t_data_preparation.py b/test/test_data_handler/old_t_data_preparation.py index 181d883ea7d6b6684042164c2aca1eb350acb8e0..586e17158a93880e2a98bf64189fa947299a64f3 100644 --- a/test/test_data_handler/old_t_data_preparation.py +++ b/test/test_data_handler/old_t_data_preparation.py @@ -8,9 +8,9 @@ import pandas as pd import pytest import xarray as xr -from src.data_handler.data_preparation import AbstractDataPrep -from src.data_handler import DataPrepJoin as DataPrep -from src.helpers.join import EmptyQueryResult +from mlair.data_handler.data_preparation import AbstractDataPrep +from mlair.data_handler import DataPrepJoin as DataPrep +from mlair.helpers.join import EmptyQueryResult class TestAbstractDataPrep: diff --git a/test/test_data_handler/test_iterator.py b/test/test_data_handler/test_iterator.py index a166df9b2b2f53a878cf823895a8b00439bab574..ff81fc7b89b2cede0f47cdf209e77e373cd0d656 100644 --- a/test/test_data_handler/test_iterator.py +++ b/test/test_data_handler/test_iterator.py @@ -1,7 +1,7 @@ -from src.data_handler.iterator import DataCollection, StandardIterator, KerasIterator -from src.helpers.testing import PyTestAllEqual -from src.model_modules.model_class import MyLittleModel, MyBranchedModel +from mlair.data_handler.iterator import DataCollection, StandardIterator, KerasIterator +from mlair.helpers.testing import PyTestAllEqual +from mlair.model_modules.model_class import MyLittleModel, MyBranchedModel import numpy as np import pytest diff --git a/test/test_datastore.py b/test/test_datastore.py index 9aca1eef35927242df0b5f659eece716f81f6c13..662c90bf04e11b8b4ff9647506c1981c8883f30b 100644 --- a/test/test_datastore.py +++ b/test/test_datastore.py @@ -3,8 +3,8 @@ __date__ = '2019-11-22' import pytest -from src.helpers.datastore import AbstractDataStore, DataStoreByVariable, DataStoreByScope, CorrectScope -from src.helpers.datastore import NameNotFoundInDataStore, NameNotFoundInScope, EmptyScope +from mlair.helpers.datastore import AbstractDataStore, DataStoreByVariable, DataStoreByScope, CorrectScope +from mlair.helpers.datastore import NameNotFoundInDataStore, NameNotFoundInScope, EmptyScope class TestAbstractDataStore: diff --git a/test/test_helpers/test_helpers.py b/test/test_helpers/test_helpers.py index 49051e1017826dbe8b61053799f87d69595f441d..281d60e07463c6b5118f36714d80144443a03050 100644 --- a/test/test_helpers/test_helpers.py +++ b/test/test_helpers/test_helpers.py @@ -5,13 +5,14 @@ import datetime as dt import logging import math import time +import os import mock import pytest -from src.helpers import to_list, dict_to_xarray, float_round, remove_items -from src.helpers import PyTestRegex -from src.helpers import Logger, TimeTracking +from mlair.helpers import to_list, dict_to_xarray, float_round, remove_items +from mlair.helpers import PyTestRegex +from mlair.helpers import Logger, TimeTracking class TestToList: @@ -236,8 +237,8 @@ class TestLogger: def test_setup_logging_path_none(self): log_file = Logger.setup_logging_path(None) - assert PyTestRegex( - ".*mlair/logging/logging_\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\.log") == log_file + test_regex = os.getcwd() + r"/logging/logging_\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}\.log" + assert PyTestRegex(test_regex) == log_file @mock.patch("os.makedirs", side_effect=None) def test_setup_logging_path_given(self, mock_makedirs): diff --git a/test/test_join.py b/test/test_join.py index 5adc013cfbd446c4feaf4a2b344f07d6f170077d..791723335e16cf2124512629414ebe626bc20e9c 100644 --- a/test/test_join.py +++ b/test/test_join.py @@ -2,9 +2,9 @@ from typing import Iterable import pytest -from src.helpers.join import * -from src.helpers.join import _save_to_pandas, _correct_stat_name, _lower_list -from src.configuration.join_settings import join_settings +from mlair.helpers.join import * +from mlair.helpers.join import _save_to_pandas, _correct_stat_name, _lower_list +from mlair.configuration.join_settings import join_settings class TestJoinUrlBase: diff --git a/test/test_model_modules/test_advanced_paddings.py b/test/test_model_modules/test_advanced_paddings.py index 8c7cae91ad12cc2b06ec82ba64f91c792a620756..8ca81c42c0b807b28c444badba8d92a255341eb4 100644 --- a/test/test_model_modules/test_advanced_paddings.py +++ b/test/test_model_modules/test_advanced_paddings.py @@ -1,7 +1,7 @@ import keras import pytest -from src.model_modules.advanced_paddings import * +from mlair.model_modules.advanced_paddings import * class TestPadUtils: diff --git a/test/test_model_modules/test_flatten_tail.py b/test/test_model_modules/test_flatten_tail.py index 0de138ec2323aea3409d5deadfb26c9741b89f50..623d51c07f6b27c8d6238d8a5189dea33837115e 100644 --- a/test/test_model_modules/test_flatten_tail.py +++ b/test/test_model_modules/test_flatten_tail.py @@ -1,6 +1,6 @@ import keras import pytest -from src.model_modules.flatten import flatten_tail, get_activation +from mlair.model_modules.flatten import flatten_tail, get_activation class TestGetActivation: diff --git a/test/test_model_modules/test_inception_model.py b/test/test_model_modules/test_inception_model.py index ca0126a44fa0f8ccd2ed2a7ea79c872c4731fea1..2dfc2c9c1c0510355216769b2ab83152a0a02118 100644 --- a/test/test_model_modules/test_inception_model.py +++ b/test/test_model_modules/test_inception_model.py @@ -1,9 +1,9 @@ import keras import pytest -from src.helpers import PyTestRegex -from src.model_modules.advanced_paddings import ReflectionPadding2D, SymmetricPadding2D -from src.model_modules.inception_model import InceptionModelBase +from mlair.helpers import PyTestRegex +from mlair.model_modules.advanced_paddings import ReflectionPadding2D, SymmetricPadding2D +from mlair.model_modules.inception_model import InceptionModelBase class TestInceptionModelBase: diff --git a/test/test_model_modules/test_keras_extensions.py b/test/test_model_modules/test_keras_extensions.py index 56c60ec43173e9fdd438214862603caba632bc65..78559ee0e54c725d242194133549d8b17699b729 100644 --- a/test/test_model_modules/test_keras_extensions.py +++ b/test/test_model_modules/test_keras_extensions.py @@ -4,8 +4,8 @@ import keras import mock import pytest -from src.model_modules.loss import l_p_loss -from src.model_modules.keras_extensions import * +from mlair.model_modules.loss import l_p_loss +from mlair.model_modules.keras_extensions import * class TestHistoryAdvanced: diff --git a/test/test_model_modules/test_loss.py b/test/test_model_modules/test_loss.py index c47f3f188a4b360bda08470fb00fd1d88a9f754c..e54e0b00de4a71d241f30e0b6b0c1a2e8fa1a19c 100644 --- a/test/test_model_modules/test_loss.py +++ b/test/test_model_modules/test_loss.py @@ -1,7 +1,7 @@ import keras import numpy as np -from src.model_modules.loss import l_p_loss +from mlair.model_modules.loss import l_p_loss class TestLoss: diff --git a/test/test_model_modules/test_model_class.py b/test/test_model_modules/test_model_class.py index f64ae62e56db69cf83921854dd4b36057cdc5c79..3e77fd17c4cd8151fe76816abf0bef323adb2e96 100644 --- a/test/test_model_modules/test_model_class.py +++ b/test/test_model_modules/test_model_class.py @@ -1,8 +1,8 @@ import keras import pytest -from src.model_modules.model_class import AbstractModelClass -from src.model_modules.model_class import MyPaperModel +from mlair.model_modules.model_class import AbstractModelClass +from mlair.model_modules.model_class import MyPaperModel class Paddings: diff --git a/test/test_plotting/test_tracker_plot.py b/test/test_plotting/test_tracker_plot.py index 9a92360a819c130c213d06b89a48a896e082adad..196879657452fe12238c990fc419cb0848c9ec9c 100644 --- a/test/test_plotting/test_tracker_plot.py +++ b/test/test_plotting/test_tracker_plot.py @@ -7,8 +7,8 @@ import shutil from matplotlib import pyplot as plt import numpy as np -from src.plotting.tracker_plot import TrackObject, TrackChain, TrackPlot -from src.helpers import PyTestAllEqual +from mlair.plotting.tracker_plot import TrackObject, TrackChain, TrackPlot +from mlair.helpers import PyTestAllEqual class TestTrackObject: diff --git a/test/test_plotting/test_training_monitoring.py b/test/test_plotting/test_training_monitoring.py index 6e5e0abbc5da0978e200f19019700c4dedd14ad0..18009bc19947bd3318c6f1d220d303c1efeec972 100644 --- a/test/test_plotting/test_training_monitoring.py +++ b/test/test_plotting/test_training_monitoring.py @@ -3,8 +3,8 @@ import os import keras import pytest -from src.model_modules.keras_extensions import LearningRateDecay -from src.plotting.training_monitoring import PlotModelLearningRate, PlotModelHistory +from mlair.model_modules.keras_extensions import LearningRateDecay +from mlair.plotting.training_monitoring import PlotModelLearningRate, PlotModelHistory @pytest.fixture diff --git a/test/test_run_modules/test_experiment_setup.py b/test/test_run_modules/test_experiment_setup.py index 5b7d517e658de6bd71e1b4190bb5114dc005216e..0f1f7a0cb918b4a1ab4e776fe9f9a563eb244149 100644 --- a/test/test_run_modules/test_experiment_setup.py +++ b/test/test_run_modules/test_experiment_setup.py @@ -4,9 +4,9 @@ import os import pytest -from src.helpers import TimeTracking -from src.configuration.path_config import prepare_host -from src.run_modules.experiment_setup import ExperimentSetup +from mlair.helpers import TimeTracking +from mlair.configuration.path_config import prepare_host +from mlair.run_modules.experiment_setup import ExperimentSetup class TestExperimentSetup: @@ -14,7 +14,7 @@ class TestExperimentSetup: @pytest.fixture def empty_obj(self, caplog): obj = object.__new__(ExperimentSetup) - obj.time = TimeTracking() + super(ExperimentSetup, obj).__init__() caplog.set_level(logging.DEBUG) return obj @@ -43,7 +43,7 @@ class TestExperimentSetup: assert data_store.get("fraction_of_training", "general") == 0.8 # set experiment name assert data_store.get("experiment_name", "general") == "TestExperiment_daily" - path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "TestExperiment_daily")) + path = os.path.abspath(os.path.join(os.getcwd(), "TestExperiment_daily")) assert data_store.get("experiment_path", "general") == path default_statistics_per_var = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', @@ -64,8 +64,8 @@ class TestExperimentSetup: assert data_store.get("window_lead_time", "general") == 3 # interpolation assert data_store.get("dimensions", "general") == {'new_index': ['datetime', 'Stations']} - assert data_store.get("interpolate_dim", "general") == "datetime" - assert data_store.get("interpolate_method", "general") == "linear" + assert data_store.get("interpolation_dim", "general") == "datetime" + assert data_store.get("interpolation_method", "general") == "linear" assert data_store.get("limit_nan_fill", "general") == 1 # train parameters assert data_store.get("start", "general.train") == "1997-01-01" @@ -93,7 +93,7 @@ class TestExperimentSetup: stations=['DEBY053', 'DEBW059', 'DEBW027'], network="INTERNET", station_type="background", variables=["o3", "temp"], start="1999-01-01", end="2001-01-01", window_history_size=4, target_var="relhum", target_dim="target", window_lead_time=10, dimensions="dim1", - interpolate_dim="int_dim", interpolate_method="cubic", limit_nan_fill=5, train_start="2000-01-01", + interpolation_dim="int_dim", interpolation_method="cubic", limit_nan_fill=5, train_start="2000-01-01", train_end="2000-01-02", val_start="2000-01-03", val_end="2000-01-04", test_start="2000-01-05", test_end="2000-01-06", use_all_stations_on_all_data_sets=False, trainable=False, fraction_of_train=0.5, experiment_path=experiment_path, create_new_model=True, val_min_length=20) @@ -125,8 +125,8 @@ class TestExperimentSetup: assert data_store.get("window_lead_time", "general") == 10 # interpolation assert data_store.get("dimensions", "general") == "dim1" - assert data_store.get("interpolate_dim", "general") == "int_dim" - assert data_store.get("interpolate_method", "general") == "cubic" + assert data_store.get("interpolation_dim", "general") == "int_dim" + assert data_store.get("interpolation_method", "general") == "cubic" assert data_store.get("limit_nan_fill", "general") == 5 # train parameters assert data_store.get("start", "general.train") == "2000-01-01" diff --git a/test/test_run_modules/test_model_setup.py b/test/test_run_modules/test_model_setup.py index c92236aecf71caec9fe60c19915583fb2cee9bbb..1b3e43b2bbfda44f1a5b5463e876adc578360ff3 100644 --- a/test/test_run_modules/test_model_setup.py +++ b/test/test_run_modules/test_model_setup.py @@ -4,13 +4,13 @@ import shutil import pytest -from src.data_handler import KerasIterator -from src.data_handler import DataCollection -from src.helpers.datastore import EmptyScope -from src.model_modules.keras_extensions import CallbackHandler -from src.model_modules.model_class import AbstractModelClass, MyLittleModel -from src.run_modules.model_setup import ModelSetup -from src.run_modules.run_environment import RunEnvironment +from mlair.data_handler import KerasIterator +from mlair.data_handler import DataCollection +from mlair.helpers.datastore import EmptyScope +from mlair.model_modules.keras_extensions import CallbackHandler +from mlair.model_modules.model_class import AbstractModelClass, MyLittleModel +from mlair.run_modules.model_setup import ModelSetup +from mlair.run_modules.run_environment import RunEnvironment class TestModelSetup: diff --git a/test/test_run_modules/test_partition_check.py b/test/test_run_modules/test_partition_check.py index b04e01d13e9e160553f8ff66af8d97f65aa24bf0..1e576a8ce47c98e395468b76d3496dafa3cc0525 100644 --- a/test/test_run_modules/test_partition_check.py +++ b/test/test_run_modules/test_partition_check.py @@ -2,10 +2,10 @@ import logging import pytest import mock -from src.run_modules.experiment_setup import ExperimentSetup -from src.run_modules.partition_check import PartitionCheck -from src.run_modules.run_environment import RunEnvironment -from src.configuration import get_host +from mlair.run_modules.experiment_setup import ExperimentSetup +from mlair.run_modules.partition_check import PartitionCheck +from mlair.run_modules.run_environment import RunEnvironment +from mlair.configuration import get_host class TestPartitionCheck: diff --git a/test/test_run_modules/test_pre_processing.py b/test/test_run_modules/test_pre_processing.py index d08e3302fd55b8708e964bc5873209cc7d2dbbde..a7d9b0f0274f35d280f2a611cb1102913e7220dd 100644 --- a/test/test_run_modules/test_pre_processing.py +++ b/test/test_run_modules/test_pre_processing.py @@ -2,12 +2,12 @@ import logging import pytest -from src.data_handler import DefaultDataPreparation, DataCollection, AbstractDataPreparation -from src.helpers.datastore import NameNotFoundInScope -from src.helpers import PyTestRegex -from src.run_modules.experiment_setup import ExperimentSetup -from src.run_modules.pre_processing import PreProcessing, DEFAULT_ARGS_LIST, DEFAULT_KWARGS_LIST -from src.run_modules.run_environment import RunEnvironment +from mlair.data_handler import DefaultDataPreparation, DataCollection, AbstractDataPreparation +from mlair.helpers.datastore import NameNotFoundInScope +from mlair.helpers import PyTestRegex +from mlair.run_modules.experiment_setup import ExperimentSetup +from mlair.run_modules.pre_processing import PreProcessing, DEFAULT_ARGS_LIST, DEFAULT_KWARGS_LIST +from mlair.run_modules.run_environment import RunEnvironment class TestPreProcessing: diff --git a/test/test_run_modules/test_run_environment.py b/test/test_run_modules/test_run_environment.py index 59bb8535c4dab44e646bd6bc4aa83a8553be4d26..aa385e32673c2bf58db3f5666b2f64076af0193f 100644 --- a/test/test_run_modules/test_run_environment.py +++ b/test/test_run_modules/test_run_environment.py @@ -1,7 +1,7 @@ import logging -from src.helpers import TimeTracking, PyTestRegex -from src.run_modules.run_environment import RunEnvironment +from mlair.helpers import TimeTracking, PyTestRegex +from mlair.run_modules.run_environment import RunEnvironment class TestRunEnvironment: diff --git a/test/test_run_modules/test_training.py b/test/test_run_modules/test_training.py index 5885accc87e9cd1b95cdfbd5c2a4dff65b3a2c18..fddcdfdd9d7ecc73052e0038c8e7692104b249e2 100644 --- a/test/test_run_modules/test_training.py +++ b/test/test_run_modules/test_training.py @@ -9,13 +9,13 @@ import mock import pytest from keras.callbacks import History -from src.data_handler import DataCollection, KerasIterator, DefaultDataPreparation -from src.helpers import PyTestRegex -from src.model_modules.flatten import flatten_tail -from src.model_modules.inception_model import InceptionModelBase -from src.model_modules.keras_extensions import LearningRateDecay, HistoryAdvanced, CallbackHandler -from src.run_modules.run_environment import RunEnvironment -from src.run_modules.training import Training +from mlair.data_handler import DataCollection, KerasIterator, DefaultDataPreparation +from mlair.helpers import PyTestRegex +from mlair.model_modules.flatten import flatten_tail +from mlair.model_modules.inception_model import InceptionModelBase +from mlair.model_modules.keras_extensions import LearningRateDecay, HistoryAdvanced, CallbackHandler +from mlair.run_modules.run_environment import RunEnvironment +from mlair.run_modules.training import Training def my_test_model(activation, window_history_size, channels, output_size, dropout_rate, add_minor_branch=False): @@ -74,6 +74,7 @@ class TestTraining: os.makedirs(path_plot) obj.data_store.set("plot_path", path_plot, "general") obj._trainable = True + obj._create_new_model = False yield obj if os.path.exists(path): shutil.rmtree(path) @@ -127,7 +128,7 @@ class TestTraining: data_prep = DefaultDataPreparation.build(['DEBW107'], data_path=os.path.join(os.path.dirname(__file__), 'data'), statistics_per_var=statistics_per_var, station_type="background", network="AIRBASE", sampling="daily", target_dim="variables", - target_var="o3", interpolate_dim="datetime", + target_var="o3", interpolation_dim="datetime", window_history_size=window_history_size, window_lead_time=window_lead_time, name_affix="train") return DataCollection([data_prep]) diff --git a/test/test_statistics.py b/test/test_statistics.py index 3da7a47871f6d92472de268d165d788c343ce394..d4a72674ae89ecd106ff1861aa6ee26567da3243 100644 --- a/test/test_statistics.py +++ b/test/test_statistics.py @@ -3,7 +3,7 @@ import pandas as pd import pytest import xarray as xr -from src.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, centre_apply, \ +from mlair.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, centre_apply, \ apply_inverse_transformation lazy = pytest.lazy_fixture