too big data for memory
Bug
Error description
Error that MLAir is not able to allocate enough space on disk.
Error message
2021-08-20 15:31:20,863 - ERROR: Unable to allocate 178. MiB for an array with shape (3, 1, 96744, 1, 161) and data type float32 [run_environment.py:__exit__:137]
multiprocessing_on_dill.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/multiprocessing_on_dill/pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/default_data_handler.py", line 360, in f_proc
res = data_handler(station, **sp_keys)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_mixed_sampling.py", line 243, in __init__
super().__init__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_mixed_sampling.py", line 108, in __init__
super().__init__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_mixed_sampling.py", line 40, in __init__
super().__init__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_with_filter.py", line 368, in __init__
super().__init__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_with_filter.py", line 165, in __init__
super().__init__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_with_filter.py", line 51, in __init__
super().__init__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_single_station.py", line 109, in __init__
self.setup_samples()
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/helpers/time_tracking.py", line 40, in __call__
return self.__wrapped__(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_single_station.py", line 249, in setup_samples
self.call_transform()
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_with_filter.py", line 479, in call_transform
transformation_dim=self.target_dim)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_single_station.py", line 231, in transform
values, new_var_opts = locals()["f_apply" if _apply else "f"](data_var, **var_opts)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/data_handler/data_handler_single_station.py", line 188, in f
return statistics.standardise(data, dim)
File "/p/home/jusers/leufen1/hdfml/intelliaq/mlair_tmp/mlair/mlair/helpers/statistics.py", line 51, in standardise
return (data - data.mean(dim)) / data.std(dim), {"mean": data.mean(dim), "std": data.std(dim),
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/common.py", line 46, in wrapped_func
return self.reduce(func, dim, axis, skipna=skipna, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/dataarray.py", line 2518, in reduce
var = self.variable.reduce(func, dim, axis, keep_attrs, keepdims, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/variable.py", line 1631, in reduce
data = func(self.data, axis=axis, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/duck_array_ops.py", line 337, in f
return func(values, axis=axis, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/nanops.py", line 179, in nanstd
a, axis=axis, dtype=dtype, ddof=ddof
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/duck_array_ops.py", line 56, in f
return wrapped(*args, **kwargs)
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/xarray/core/nputils.py", line 227, in f
result = getattr(npmodule, name)(values, axis=axis, **kwargs)
File "<__array_function__ internals>", line 6, in nanstd
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/numpy/lib/nanfunctions.py", line 1665, in nanstd
keepdims=keepdims)
File "<__array_function__ internals>", line 6, in nanvar
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/numpy/lib/nanfunctions.py", line 1512, in nanvar
arr, mask = _replace_nan(a, 0)
File "/p/home/jusers/leufen1/hdfml/intelliaq/demystify-temporal-components/venv_hdfml/lib/python3.6/site-packages/numpy/lib/nanfunctions.py", line 107, in _replace_nan
a = np.array(a, subok=True, copy=True)
numpy.core._exceptions._ArrayMemoryError: Unable to allocate 178. MiB for an array with shape (3, 1, 96744, 1, 161) and data type float32
"""
First guess on error origin
I have no idea why data have grown that much. But we can try to remove data that are not required for further caluclation.
Add method for data handler to remove already used parameters: _data
, input_data
, target_data
followed by garbage collection.