diff --git a/mlair/data_handler/iterator.py b/mlair/data_handler/iterator.py index cedb06409a99bf66f4a3a1e695de6059bcd9e143..7aae1837e490d116b8dbeae31c5aeb6b459f9287 100644 --- a/mlair/data_handler/iterator.py +++ b/mlair/data_handler/iterator.py @@ -125,41 +125,6 @@ class KerasIterator(keras.utils.Sequence): """Concatenate two lists of data along axis=0.""" return list(map(lambda *_args: np.concatenate(_args, axis=0), *args)) - def _prepare_batches_orig(self): - """ - Prepare all batches as locally stored files. - - Walk through all elements of collection and split (or merge) data according to the batch size. Too long data - sets are divided into multiple batches. Not fully filled batches are merged with data from the next collection - element. If data is remaining after the last element, it is saved as smaller batch. All batches are enumerated - beginning from 0. A list with all batch numbers is stored in class's parameter indexes. - """ - index = 0 - remaining = None - mod_rank = self._get_model_rank() - for data in self._collection: - logging.debug(f"prepare batches for {str(data)}") - X, _Y = data.get_data(upsampling=self.upsampling) - Y = [_Y[0] for _ in range(mod_rank)] - if self.upsampling: - X, Y = _permute_data(X, Y) - if remaining is not None: - X, Y = self._concatenate(X, remaining[0]), self._concatenate(Y, remaining[1]) - length = X[0].shape[0] - batches = _get_number_of_mini_batches(length, self.batch_size) - for b in range(batches): - batch_X, batch_Y = _get_batch(X, b, self.batch_size), _get_batch(Y, b, self.batch_size) - _save_to_pickle(self._path, X=batch_X, Y=batch_Y, index=index) - index += 1 - if (batches * self.batch_size) < length: # keep remaining to concatenate with next data element - remaining = (_get_batch(X, batches, self.batch_size), _get_batch(Y, batches, self.batch_size)) - else: - remaining = None - if remaining is not None: # add remaining as smaller batch - _save_to_pickle(self._path, X=remaining[0], Y=remaining[1], index=index) - index += 1 - self.indexes = np.arange(0, index).tolist() - def _prepare_batches(self, use_multiprocessing=False, max_process=1) -> None: """ Prepare all batches as locally stored files. @@ -174,7 +139,6 @@ class KerasIterator(keras.utils.Sequence): index = 0 remaining = [] mod_rank = self._get_model_rank() - # max_process = 12 n_process = min([psutil.cpu_count(logical=False), len(self._collection), max_process]) # use only physical cpus if n_process > 1 and use_multiprocessing is True: # parallel solution pool = multiprocessing.Pool(n_process) diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py index 6b3895f3538379ab0a5faed87de072a711c17d5f..1500cdab23fca3058bffc838b6855fd0b3455f3d 100644 --- a/mlair/run_modules/post_processing.py +++ b/mlair/run_modules/post_processing.py @@ -349,7 +349,8 @@ class PostProcessing(RunEnvironment): return d[..., pos] # forecast - with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})"): + with TimeTracking(name=f"{inspect.stack()[0].function} ({bootstrap_type}, {bootstrap_method})", + log_on_enter=True): # extract all requirements from data store number_of_bootstraps = self.data_store.get("n_boots", "feature_importance") dims = [self.uncertainty_estimate_boot_dim, self.index_dim, self.ahead_dim, self.model_type_dim]