diff --git a/README.md b/README.md index 6d07ecec3bd1ab76f6e29dc969e4339e5521593b..0e1df0561d15b743a85b0981b552a1444b6cc38c 100644 --- a/README.md +++ b/README.md @@ -518,3 +518,22 @@ add it to `src/join_settings.py` in the hourly data section. Replace the `TOAR_S value. To make sure, that this **sensitive** data is not uploaded to the remote server, use the following command to prevent git from tracking this file: `git update-index --assume-unchanged src/join_settings.py` + +## Known Issues + +### Problem with multiprocessing + +* cpython and python's native multiprocessing can crash when using the multiprocessing approach for preprocessing. This +is caused by an internal limitation in order of 2GB. When using long periods and therefore very big data, +multiprocessing is not able to handle these data correctly: +```shell +File "mlair/mlair/run_modules/pre_processing.py", line X, in validate_station + dh, s = p.get() +File "multiprocessing/pool.py", line 644, in get + raise self._value +multiprocessing.pool.MaybeEncodingError: Error sending result: '(DEMV012, 'DEMV012')'. Reason: 'error("'i' format requires -2147483648 <= number <= 2147483647",)' +``` +* to solve this issue, either update your python version to >=3.8 (warning, this version is not tested with MLAir) or +apply the patch that is applied in this commit +https://github.com/python/cpython/commit/bccacd19fa7b56dcf2fbfab15992b6b94ab6666b or as proposed in this comment +https://stackoverflow.com/questions/47776486/python-struct-error-i-format-requires-2147483648-number-2147483647 diff --git a/mlair/plotting/data_insight_plotting.py b/mlair/plotting/data_insight_plotting.py index 513f64f2c174d94cb7230b141387c9a850d678cb..335463454a3d3937cd93c739e63ab540f08ffd92 100644 --- a/mlair/plotting/data_insight_plotting.py +++ b/mlair/plotting/data_insight_plotting.py @@ -888,6 +888,8 @@ class PlotClimateFirFilter(AbstractPlotClass): from mlair.helpers.filter import fir_filter_convolve + logging.info(f"start PlotClimateFirFilter for ({name})") + # adjust default plot parameters rc_params = { 'axes.labelsize': 'large', diff --git a/run_climate_filter.py b/run_climate_filter.py new file mode 100755 index 0000000000000000000000000000000000000000..4aacab8817b2f6350de861ef383b4777790bc57c --- /dev/null +++ b/run_climate_filter.py @@ -0,0 +1,52 @@ +__author__ = "Lukas Leufen" +__date__ = '2019-11-14' + +import argparse + +from mlair.workflows import DefaultWorkflow +from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSamplingWithClimateFirFilter + +stats = {'o3': 'dma8eu', 'no': 'dma8eu', 'no2': 'dma8eu', + 'relhum': 'average_values', 'u': 'average_values', 'v': 'average_values', + 'cloudcover': 'average_values', 'pblheight': 'maximum', + 'temp': 'maximum'} +data_origin = {'o3': '', 'no': '', 'no2': '', + 'relhum': 'REA', 'u': 'REA', 'v': 'REA', + 'cloudcover': 'REA', 'pblheight': 'REA', + 'temp': 'REA'} + + +def main(parser_args): + args = dict(stations=["DEBW107", "DEBW013"], + network="UBA", + evaluate_bootstraps=False, plot_list=[], + data_origin=data_origin, data_handler=DataHandlerMixedSamplingWithClimateFirFilter, + interpolation_limit=(3, 1), overwrite_local_data=False, + lazy_preprocessing=True, + use_multiprocessing=True, + use_multiprocessing_on_debug=True, + sampling=("hourly", "daily"), + statistics_per_var=stats, + create_new_model=True, train_model=False, epochs=1, + window_history_size=6 * 24 + 16, + window_history_offset=16, + kz_filter_length=[100 * 24, 15 * 24], + kz_filter_iter=[4, 5], + filter_cutoff_period=[7, 0.8], + filter_order=[7, 2], + start="2006-01-01", + train_start="2006-01-01", + end="2011-12-31", + test_end="2011-12-31", + **parser_args.__dict__, + ) + workflow = DefaultWorkflow(**args, start_script=__file__) + workflow.run() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None, + help="set experiment date as string") + args = parser.parse_args(["--experiment_date", "testrun"]) + main(args)