From a9f84005b032b07013e3ff89faf67e76014e5022 Mon Sep 17 00:00:00 2001 From: leufen1 <l.leufen@fz-juelich.de> Date: Fri, 20 Aug 2021 15:55:55 +0200 Subject: [PATCH] added cleanup, /close #321 when HPC system test works --- .../data_handler_single_station.py | 9 ++++++ mlair/data_handler/default_data_handler.py | 3 +- mlair/helpers/helpers.py | 32 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/mlair/data_handler/data_handler_single_station.py b/mlair/data_handler/data_handler_single_station.py index 3392e416..8c22b42f 100644 --- a/mlair/data_handler/data_handler_single_station.py +++ b/mlair/data_handler/data_handler_single_station.py @@ -5,6 +5,8 @@ __date__ = '2020-07-20' import copy import datetime as dt +import gc + import dill import hashlib import logging @@ -107,6 +109,13 @@ class DataHandlerSingleStation(AbstractDataHandler): # create samples self.setup_samples() + self.clean_up() + + def clean_up(self): + self._data = None + self.input_data = None + self.target_data = None + gc.collect() def __str__(self): return self.station[0] diff --git a/mlair/data_handler/default_data_handler.py b/mlair/data_handler/default_data_handler.py index a17de954..3157248f 100644 --- a/mlair/data_handler/default_data_handler.py +++ b/mlair/data_handler/default_data_handler.py @@ -307,8 +307,7 @@ class DefaultDataHandler(AbstractDataHandler): n_process = min([psutil.cpu_count(logical=False), len(set_stations), max_process]) # use only physical cpus if n_process > 1 and kwargs.get("use_multiprocessing", True) is True: # parallel solution logging.info("use parallel transformation approach") - pool = multiprocessing.Pool( - min([psutil.cpu_count(logical=False), len(set_stations), 16])) # use only physical cpus + pool = multiprocessing.Pool(n_process) # use only physical cpus logging.info(f"running {getattr(pool, '_processes')} processes in parallel") output = [ pool.apply_async(f_proc, args=(cls.data_handler_transformation, station), kwds=sp_keys) diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py index 5ddaa3ee..4cc7310d 100644 --- a/mlair/helpers/helpers.py +++ b/mlair/helpers/helpers.py @@ -4,6 +4,7 @@ __date__ = '2019-10-21' import inspect import math +import sys import numpy as np import xarray as xr @@ -179,3 +180,34 @@ def convert2xrda(arr: Union[xr.DataArray, xr.Dataset, np.ndarray, int, float], kwargs.update({'dims': dims, 'coords': coords}) return xr.DataArray(arr, **kwargs) + + +# def convert_size(size_bytes): +# if size_bytes == 0: +# return "0B" +# size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB") +# i = int(math.floor(math.log(size_bytes, 1024))) +# p = math.pow(1024, i) +# s = round(size_bytes / p, 2) +# return "%s %s" % (s, size_name[i]) +# +# +# def get_size(obj, seen=None): +# """Recursively finds size of objects""" +# size = sys.getsizeof(obj) +# if seen is None: +# seen = set() +# obj_id = id(obj) +# if obj_id in seen: +# return 0 +# # Important mark as seen *before* entering recursion to gracefully handle +# # self-referential objects +# seen.add(obj_id) +# if isinstance(obj, dict): +# size += sum([get_size(v, seen) for v in obj.values()]) +# size += sum([get_size(k, seen) for k in obj.keys()]) +# elif hasattr(obj, '__dict__'): +# size += get_size(obj.__dict__, seen) +# elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)): +# size += sum([get_size(i, seen) for i in obj]) +# return size -- GitLab