diff --git a/src/data_handling/advanced_data_handling.py b/src/data_handling/advanced_data_handling.py index ac9534c605f5670736bc6a9a8211f8970c599488..6fb0c723f7af70941959bf46723c802ebb921139 100644 --- a/src/data_handling/advanced_data_handling.py +++ b/src/data_handling/advanced_data_handling.py @@ -82,13 +82,16 @@ class AbstractDataPreparation: def get_data(self, upsampling=False, as_numpy=False): return self.get_X(upsampling, as_numpy), self.get_Y(upsampling, as_numpy) + def get_coordinates(self): + return None, None + class DefaultDataPreparation(AbstractDataPreparation): _requirements = remove_items(inspect.getfullargspec(StationPrep).args, ["self", "station"]) def __init__(self, id_class, data_path, min_length=0, - extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False): + extreme_values: num_or_list = None, extremes_on_right_tail_only: bool = False, name_affix=None): super().__init__() self.id_class = id_class self.interpolate_dim = "datetime" @@ -97,7 +100,8 @@ class DefaultDataPreparation(AbstractDataPreparation): self._Y = None self._X_extreme = None self._Y_extreme = None - self._save_file = os.path.join(data_path, f"data_preparation_{str(self.id_class)}.pickle") + _name_affix = str(f"{str(self.id_class)}_{name_affix}" if name_affix is not None else id(self)) + self._save_file = os.path.join(data_path, f"data_preparation_{_name_affix}.pickle") self._collection = self._create_collection() self.harmonise_X() self.multiply_extremes(extreme_values, extremes_on_right_tail_only, dim=self.interpolate_dim) @@ -292,6 +296,9 @@ class DefaultDataPreparation(AbstractDataPreparation): std_estimated = std.mean("Stations") return {"scope": scope, "method": method, "mean": mean_estimated, "std": std_estimated} + def get_coordinates(self): + return self.id_class.get_coordinates() + def run_data_prep(): diff --git a/src/data_handling/bootstraps.py b/src/data_handling/bootstraps.py index 8b0d052fda9611b13fd7cab1f408870942ed4993..4aaa3cba78239e8e160dc7198bb9756baf28bb23 100644 --- a/src/data_handling/bootstraps.py +++ b/src/data_handling/bootstraps.py @@ -14,371 +14,22 @@ __date__ = '2020-02-07' import logging import os -import re -from typing import List, Union, Pattern, Tuple +from collections import Iterator, Iterable +from itertools import chain import dask.array as da -import keras import numpy as np import xarray as xr -from src import helpers -from src.data_handling.data_generator import DataGenerator - - -class BootStrapGenerator(keras.utils.Sequence): - """ - Generator that returns bootstrapped history objects for given boot index while iteration. - - generator for bootstraps as keras sequence inheritance. Initialise with number of boots, the original history, the - shuffled data, all used variables and the current shuffled variable. While iterating over this generator, it returns - the bootstrapped history for given boot index (this is the iterator index) in the same format like the original - history ready to use. Note, that in some cases some samples can contain nan values (in these cases the entire data - row is null, not only single entries). - """ - - def __init__(self, number_of_boots: int, history: xr.DataArray, shuffled: xr.DataArray, variables: List[str], - shuffled_variable: str): - """ - Set up the generator. - - :param number_of_boots: number of bootstrap realisations - :param history: original history (the ground truth) - :param shuffled: the shuffled history - :param variables: list with all variables of interest - :param shuffled_variable: name of the variable that shall be bootstrapped - """ - self.number_of_boots = number_of_boots - self.variables = variables - self.history_orig = history - self.history = history.sel(variables=helpers.remove_items(self.variables, shuffled_variable)) - self.shuffled = shuffled.sel(variables=shuffled_variable) - - def __len__(self) -> int: - """ - Return number of bootstraps. - - :return: number of bootstraps - """ - return self.number_of_boots - - def __getitem__(self, index: int) -> xr.DataArray: - """ - Return bootstrapped history for given bootstrap index in same index structure like the original history object. - - :param index: boot index e [0, nboots-1] - :return: bootstrapped history ready to use - """ - logging.debug(f"boot: {index}") - boot_hist = self.history.copy() - boot_hist = boot_hist.combine_first(self.__get_shuffled(index)) - return boot_hist.reindex_like(self.history_orig) - - def __get_shuffled(self, index: int) -> xr.DataArray: - """ - Return shuffled data for given boot index from shuffled attribute. - - :param index: boot index e [0, nboots-1] - :return: shuffled data - """ - shuffled_var = self.shuffled.sel(boots=index).expand_dims("variables").drop("boots") - return shuffled_var.transpose("datetime", "window", "Stations", "variables") - - -class CreateShuffledData: - """ - Verify and create shuffled data for all data contained in given data generator class. - - Starts automatically on initialisation, no further calls are required. Check and new creations are all performed - inside bootstrap_path. - """ - - def __init__(self, data, number_of_bootstraps: int, bootstrap_path: str): - """ - Shuffled data is automatically created in initialisation. - - :param data: data to shuffle - :param number_of_bootstraps: - :param bootstrap_path: Path to find and store the bootstraps - """ - self.data = data - self.number_of_bootstraps = number_of_bootstraps - self.bootstrap_path = bootstrap_path - self.create_shuffled_data() - - def create_shuffled_data(self) -> None: - """ - Create shuffled data. - - Use original test data, add dimension 'boots' with length number of bootstraps and insert randomly selected - variables. If there is a suitable local file for requested window size and number of bootstraps, no additional - file will be created inside this function. - """ - logging.info("create / check shuffled bootstrap data") - variables = ["o3", "temp"] - # window = self.data.window_history_size - window = 3 - for station in self.data: - variables = ["o3", "temp"] - window = 3 - valid, nboot, variables, window = self.valid_bootstrap_file(str(station), variables, window) - if not valid: - logging.info(f'create bootstap data for {station}') - hist = station.get_X(as_numpy=False) - file_path = self._set_file_path(station, variables, window, nboot) - hist = list(map(lambda x: x.expand_dims({'boots': range(nboot)}, axis=-1), hist)) - shuffled_variable = [] - chunks = (100, *hist.shape[1:3], hist.shape[-1]) - for i, var in enumerate(hist.coords['variables']): - single_variable = hist.sel(variables=var).values - shuffled_variable.append(self.shuffle(single_variable, chunks=chunks)) - shuffled_variable_da = da.stack(shuffled_variable, axis=-2).rechunk("auto") - shuffled_data = xr.DataArray(shuffled_variable_da, coords=hist.coords, dims=hist.dims) - shuffled_data.to_netcdf(file_path) - - def _set_file_path(self, station: str, variables: str, window: int, nboots: int) -> str: - """ - Set file name. - - Set file name following naming convention <station>_<var1>_<var2>_..._hist<window>_nboots<nboots>_shuffled.nc - and create joined path using bootstrap_path attribute set on initialisation. - - :param station: station name - :param variables: variables already preprocessed as single string with all variables seperated by underscore - :param window: window length - :param nboots: number of boots - :return: full file path - """ - file_name = f"{station}_{'_'.join(sorted(variables))}_hist{window}_nboots{nboots}_shuffled.nc" - return os.path.join(self.bootstrap_path, file_name) - - def valid_bootstrap_file(self, station: str, variables: str, window: int) -> [bool, Union[None, int]]: - """ - Compare local bootstrap file with given settings for station, variables, window and number of bootstraps. - - If a match was found, this method returns a tuple (True, None). In any other case, it returns (False, - max_nboot), where max_nboot is the highest boot number found in the local storage. A match is defined so that - the window length is ge than given window size form args and the number of boots is also ge than the given - number of boots from this class. Furthermore, this functions deletes local files, if the match the station - pattern but don't fit the window and bootstrap condition. This is performed, because it is assumed, that the - corresponding file will be created with a longer or at the least same window size and numbers of bootstraps. - - :param station: name of the station to validate - :param variables: all variables already merged in single string seperated by underscore - :param window: required window size - :return: tuple containing information if valid file was found first and second the number of boots that needs to - be used for the new boot creation (this is only relevant, if no valid file was found - otherwise the return - statement is anyway None). - """ - regex = re.compile(rf"{station}_(.*)_hist(\d+)_nboots(\d+)_shuffled") - max_nboot = self.number_of_bootstraps - max_variables = set(variables) - max_window = window - for file in os.listdir(self.bootstrap_path): - match = regex.match(file) - if match: - variable_file = set(match.group(1).split("_")) - window_file = int(match.group(2)) - nboot_file = int(match.group(3)) - max_nboot = max([max_nboot, nboot_file]) - max_variables = variable_file.union(variables) - max_window = max([max_window, window_file]) - if (window_file >= window) \ - and (nboot_file >= self.number_of_bootstraps) \ - and variable_file >= set(variables): - return True, None, None, None - else: - os.remove(os.path.join(self.bootstrap_path, file)) - return False, max_nboot, max_variables, max_window - - @staticmethod - def shuffle(data: da.array, chunks: Tuple) -> da.core.Array: - """ - Shuffle randomly from given data (draw elements with replacement). - - :param data: data to shuffle - :param chunks: chunk size for dask - :return: shuffled data as dask core array (not computed yet) - """ - size = data.shape - return da.random.choice(data.reshape(-1, ), size=size, chunks=chunks) - - -class BootStraps: - """ - Main class to perform bootstrap operations. - - This class requires a DataGenerator object and a path, where to find and store all data related to the bootstrap - operation. In initialisation, this class will automatically call the class CreateShuffleData to set up the shuffled - data sets. How to use BootStraps: - - * call .get_generator(<station>, <variable>) to get a generator for given station and variable combination that \ - iterates over all bootstrap realisations (as keras sequence) - * call .get_labels(<station>) to get the measured observations in the same format as bootstrap predictions - * call .get_bootstrap_predictions(<station>, <variable>) to get the bootstrapped predictions - * call .get_orig_prediction(<station>) to get the non-bootstrapped predictions (referred as original predictions) - """ - - def __init__(self, data: DataGenerator, bootstrap_path: str, number_of_bootstraps: int = 10): - """ - Automatically check and create (if needed) shuffled data on initialisation. - - :param data: a data generator object to get data / history - :param bootstrap_path: path to find and store the bootstrap data - :param number_of_bootstraps: the number of bootstrap realisations - """ - self.data = data - self.number_of_bootstraps = number_of_bootstraps - self.bootstrap_path = bootstrap_path - CreateShuffledData(data, number_of_bootstraps, bootstrap_path) # Todo: think about how to create the bootstrapped - # data inside the datapreparation class and not on top. get_X(bootstrapped=True) or get_bootstrapped_X. If this - # method is not implemented, skip bootstrapping analysis - - @property - def stations(self) -> List[str]: - """ - Station property inherits directly from data generator object. - - :return: list with all stations - """ - return self.data.stations - - @property - def variables(self) -> List[str]: - """ - Variables property inherits directly from data generator object. - - :return: list with all variables - """ - return self.data.variables - - @property - def window_history_size(self) -> int: - """ - Window history size property inherits directly from data generator object. - - :return: the window history size - """ - return self.data.window_history_size - - def get_generator(self, station: str, variable: str) -> BootStrapGenerator: - """ - Return the actual generator to use for the bootstrap evaluation. - - The generator requires information on station and bootstrapped variable. There is only a loop on the bootstrap - realisation and not on stations or variables. - - :param station: name of the station - :param variable: name of the variable to bootstrap - :return: BootStrapGenerator class ready to use. - """ - hist, _ = self.data[station] - shuffled_data = self._load_shuffled_data(station, self.variables).reindex_like(hist) - return BootStrapGenerator(self.number_of_bootstraps, hist, shuffled_data, self.variables, variable) - - def get_labels(self, station: str) -> np.ndarray: - """ - Repeat labels for given key by the number of boots and returns as single array. - - :param station: name of station - :return: repeated labels as single array - """ - labels = self.data[station][1] - return np.tile(labels.data, (self.number_of_bootstraps, 1)) - - def get_orig_prediction(self, path: str, file_name: str, prediction_name: str = "CNN") -> np.ndarray: - """ - Repeat predictions from given file(_name) in path by the number of boots. - - :param path: path to file - :param file_name: file name - :param prediction_name: name of the prediction to select from loaded file (default CNN) - :return: repeated predictions - """ - file = os.path.join(path, file_name) - prediction = xr.open_dataarray(file).sel(type=prediction_name).squeeze() - vals = np.tile(prediction.data, (self.number_of_bootstraps, 1)) - return vals[~np.isnan(vals).any(axis=1), :] - - def _load_shuffled_data(self, station: str, variables: List[str]) -> xr.DataArray: - """ - Load shuffled data from bootstrap path. +from src.data_handling.advanced_data_handling import AbstractDataPreparation - Data is stored as '<station>_<var1>_<var2>_..._hist<histsize>_nboots<nboots>_shuffled.nc', e.g. - 'DEBW107_cloudcover_no_no2_temp_u_v_hist13_nboots20_shuffled.nc' - - :param station: name of station - :param variables: list of variables - :return: shuffled data as xarray - """ - file_name = self._get_shuffled_data_file(station, variables) - shuffled_data = xr.open_dataarray(file_name, chunks=100) - return shuffled_data - - def _get_shuffled_data_file(self, station: str, variables: List[str]) -> str: - """ - Look for data file using regular expressions and returns found file or raise FileNotFoundError. - - :param station: name of station - :param variables: name of variables - :return: found file with complete path - """ - files = os.listdir(self.bootstrap_path) - regex = self._create_file_regex(station, variables) - file = self._filter_files(regex, files, self.window_history_size, self.number_of_bootstraps) - if file: - return os.path.join(self.bootstrap_path, file) - else: - raise FileNotFoundError(f"Could not find a file to match pattern {regex}") - - @staticmethod - def _create_file_regex(station: str, variables: List[str]) -> Pattern: - """ - Create regex for given station and variables. - - With this regex, it is possible to look for shuffled data with pattern: - `<station>(_<var>)*_hist(<hist>)_nboots(<nboots>)_shuffled.nc` - - :param station: station name to use as prefix - :param variables: variables to add after station - :return: compiled regular expression - """ - var_regex = "".join([rf"(_\w+)*_{v}(_\w+)*" for v in sorted(variables)]) - regex = re.compile(rf"{station}{var_regex}_hist(\d+)_nboots(\d+)_shuffled\.nc") - return regex - - @staticmethod - def _filter_files(regex: Pattern, files: List[str], window: int, nboot: int) -> Union[str, None]: - """ - Filter list of files by regex. - - Regex has to be structured to match the following string structure - `<station>(_<var>)*_hist(<hist>)_nboots(<nboots>)_shuffled.nc`. Hist and nboots values have to be included as - group. All matches are compared to given window and nboot parameters. A valid file must have the same value (or - larger) than these parameters and contain all variables. - - :param regex: compiled regular expression pattern following the style from method description - :param files: list of file names to filter - :param window: minimum length of window to look for - :param nboot: minimal number of boots to search - :return: matching file name or None, if no valid file was found - """ - for f in files: - match = regex.match(f) - if match: - last = match.lastindex - if (int(match.group(last - 1)) >= window) and (int(match.group(last)) >= nboot): - return f - -from collections import Iterator, Iterable -from itertools import chain class BootstrapIterator(Iterator): _position: int = None - def __init__(self, data: "BootStrapsNew"): - assert isinstance(data, BootStrapsNew) + def __init__(self, data: "BootStraps"): + assert isinstance(data, BootStraps) self._data = data self._dimension = data.bootstrap_dimension self._collection = self._data.bootstraps() @@ -429,33 +80,30 @@ class BootstrapIterator(Iterator): size = data.shape return np.random.choice(data.reshape(-1, ), size=size) -class BootStrapsNew(Iterable): + +class BootStraps(Iterable): """ Main class to perform bootstrap operations. - This class requires a DataGenerator object and a path, where to find and store all data related to the bootstrap - operation. In initialisation, this class will automatically call the class CreateShuffleData to set up the shuffled - data sets. How to use BootStraps: + This class requires a data handler following the definition of the AbstractDataPreparation, the number of bootstraps + to create and the dimension along this bootstrapping is performed (default dimension is `variables`). - * call .get_generator(<station>, <variable>) to get a generator for given station and variable combination that \ - iterates over all bootstrap realisations (as keras sequence) - * call .get_labels(<station>) to get the measured observations in the same format as bootstrap predictions - * call .get_bootstrap_predictions(<station>, <variable>) to get the bootstrapped predictions - * call .get_orig_prediction(<station>) to get the non-bootstrapped predictions (referred as original predictions) + When iterating on this class, it returns the bootstrapped X, Y and a tuple with (position of variable in X, name of + this variable). The tuple is interesting if X consists on mutliple input streams X_i (e.g. two or more stations) + because it shows which variable of which input X_i has been bootstrapped. All bootstrap combinations can be + retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class + imitates according to the set number of bootstraps the original prediction """ - from src.data_handling.advanced_data_handling import AbstractDataPreparation - def __init__(self, data: AbstractDataPreparation, bootstrap_path: str, number_of_bootstraps: int = 10, + def __init__(self, data: AbstractDataPreparation, number_of_bootstraps: int = 10, bootstrap_dimension: str = "variables"): """ - Automatically check and create (if needed) shuffled data on initialisation. + Create iterable class to be ready to iter. :param data: a data generator object to get data / history - :param bootstrap_path: path to find and store the bootstrap data :param number_of_bootstraps: the number of bootstrap realisations """ self.data = data self.number_of_bootstraps = number_of_bootstraps - self.bootstrap_path = bootstrap_path self.bootstrap_dimension = bootstrap_dimension def __iter__(self): @@ -470,9 +118,6 @@ class BootStrapsNew(Iterable): l.append(list(map(lambda y: (i, y), x.indexes['variables']))) return list(chain(*l)) - - - def get_orig_prediction(self, path: str, file_name: str, prediction_name: str = "CNN") -> np.ndarray: """ Repeat predictions from given file(_name) in path by the number of boots. @@ -505,10 +150,9 @@ if __name__ == "__main__": PreProcessing() data = run_env.data_store.get("generator", "general.test") - path = run_env.data_store.get("bootstrap_path", "general") number_bootstraps = 10 - boots = BootStraps(data, path, number_bootstraps) + boots = BootStraps(data, number_bootstraps) for b in boots.boot_strap_generator(): a, c = b logging.info(f"len is {len(boots.get_boot_strap_meta())}") diff --git a/src/data_handling/data_preparation.py b/src/data_handling/data_preparation.py index 453a203cc80aa950e2d5d0097c6f9bd3c3b15a7d..bff3b9f12f11d481ea70a470a14795d7bce807b5 100644 --- a/src/data_handling/data_preparation.py +++ b/src/data_handling/data_preparation.py @@ -42,7 +42,8 @@ class StationPrep(AbstractStationPrep): def __init__(self, station, data_path, statistics_per_var, station_type, network, sampling, target_dim, target_var, interpolate_dim, window_history_size, window_lead_time, - overwrite_local_data: bool = False, transformation=None, **kwargs): + overwrite_local_data: bool = False, transformation=None, store_data_locally: bool = True, + min_length: int = 0, start=None, end=None, **kwargs): super().__init__() # path, station, statistics_per_var, transformation, **kwargs) self.station = helpers.to_list(station) self.path = os.path.abspath(data_path) @@ -58,6 +59,10 @@ class StationPrep(AbstractStationPrep): self.window_history_size = window_history_size self.window_lead_time = window_lead_time self.overwrite_local_data = overwrite_local_data + self.store_data_locally = store_data_locally + self.min_length = min_length + self.start = start + self.end = end # internal self.data = None @@ -120,6 +125,10 @@ class StationPrep(AbstractStationPrep): def get_Y(self): return self.get_transposed_label() + def get_coordinates(self): + coords = self.meta.loc[["station_lon", "station_lat"]].astype(float) + return coords.rename(index={"station_lon": "lon", "station_lat": "lat"}).to_dict()[str(self)] + def call_transform(self, inverse=False): self.transform(dim=self.interpolate_dim, method=self.transformation["method"], mean=self.transformation['mean'], std=self.transformation["std"], @@ -158,7 +167,7 @@ class StationPrep(AbstractStationPrep): check_path_and_create(self.path) file_name = self._set_file_name() meta_file = self._set_meta_file_name() - if self.kwargs.get('overwrite_local_data', False): + if self.overwrite_local_data is True: logging.debug(f"overwrite_local_data is true, therefore reload {file_name}{source_name}") if os.path.exists(file_name): os.remove(file_name) @@ -201,7 +210,7 @@ class StationPrep(AbstractStationPrep): # convert df_all to xarray xarr = {k: xr.DataArray(v, dims=['datetime', 'variables']) for k, v in df_all.items()} xarr = xr.Dataset(xarr).to_array(dim='Stations') - if self.kwargs.get('store_data_locally', True): + if self.store_data_locally is True: # save locally as nc/csv file xarr.to_netcdf(path=file_name) meta.to_csv(meta_file) @@ -398,8 +407,7 @@ class StationPrep(AbstractStationPrep): intersect = reduce(np.intersect1d, (non_nan_history.coords[dim].values, non_nan_label.coords[dim].values, non_nan_observation.coords[dim].values)) - min_length = self.kwargs.get("min_length", 0) - if len(intersect) < max(min_length, 1): + if len(intersect) < max(self.min_length, 1): self.history = None self.label = None self.observation = None @@ -417,8 +425,8 @@ class StationPrep(AbstractStationPrep): :return: sliced data """ - start = self.kwargs.get('start', data.coords[coord][0].values) - end = self.kwargs.get('end', data.coords[coord][-1].values) + start = self.start if self.start is not None else data.coords[coord][0].values + end = self.end if self.end is not None else data.coords[coord][-1].values return self._slice(data, start, end, coord) @staticmethod diff --git a/src/data_handling/data_preparation_neighbors.py b/src/data_handling/data_preparation_neighbors.py index c38cdf84de276022530a76c1064c1d1595bdeafd..855ba3c04f455171a81f7dc4595f8b8c64409a87 100644 --- a/src/data_handling/data_preparation_neighbors.py +++ b/src/data_handling/data_preparation_neighbors.py @@ -37,6 +37,10 @@ class DataPreparationNeighbors(DefaultDataPreparation): def _create_collection(self): return [self.id_class] + self.neighbors + def get_coordinates(self, include_neighbors=False): + neighbors = list(map(lambda n: n.get_coordinates(), self.neighbors)) if include_neighbors is True else [] + return [super(DataPreparationNeighbors, self).get_coordinates()].append(neighbors) + if __name__ == "__main__": diff --git a/src/plotting/postprocessing_plotting.py b/src/plotting/postprocessing_plotting.py index bb3b54dcaa8d598f9c550328e06e1da499150e28..7e282022e02f2d6fc2abef38e9ee190914d63956 100644 --- a/src/plotting/postprocessing_plotting.py +++ b/src/plotting/postprocessing_plotting.py @@ -237,12 +237,10 @@ class PlotStationMap(AbstractPlotClass): import cartopy.crs as ccrs if generators is not None: - for color, gen in generators.items(): - for k, v in enumerate(gen): - station_coords = gen.get_data_generator(k).meta.loc[['station_lon', 'station_lat']] - # station_names = gen.get_data_generator(k).meta.loc[['station_id']] - IDx, IDy = float(station_coords.loc['station_lon'].values), float( - station_coords.loc['station_lat'].values) + for color, data_collection in generators.items(): + for station in data_collection: + coords = station.get_coordinates() + IDx, IDy = coords["lon"], coords["lat"] self._ax.plot(IDx, IDy, mfc=color, mec='k', marker='s', markersize=6, transform=ccrs.PlateCarree()) def _plot(self, generators: Dict): @@ -772,8 +770,8 @@ class PlotTimeSeries: def _plot(self, plot_folder): pdf_pages = self._create_pdf_pages(plot_folder) - start, end = self._get_time_range(self._load_data(self._stations[0])) for pos, station in enumerate(self._stations): + start, end = self._get_time_range(self._load_data(self._stations[0])) data = self._load_data(station) fig, axes, factor = self._create_subplots(start, end) nan_list = [] diff --git a/src/run_modules/post_processing.py b/src/run_modules/post_processing.py index f088e535824b22169ba345ccc6fca32ce0fabad3..f63e92ba314c2c69a62210e70335e703217b0cb8 100644 --- a/src/run_modules/post_processing.py +++ b/src/run_modules/post_processing.py @@ -13,8 +13,7 @@ import numpy as np import pandas as pd import xarray as xr -from src.data_handling import BootStraps, Distributor, DataGenerator, DataPrepJoin, KerasIterator -from src.data_handling.bootstraps import BootStrapsNew +from src.data_handling import BootStraps, KerasIterator from src.helpers.datastore import NameNotFoundInDataStore from src.helpers import TimeTracking, statistics from src.model_modules.linear_model import OrdinaryLeastSquaredModel @@ -147,7 +146,7 @@ class PostProcessing(RunEnvironment): for station in self.test_data: logging.info(str(station)) X, Y = None, None - bootstraps = BootStrapsNew(station, bootstrap_path, number_of_bootstraps) + bootstraps = BootStraps(station, number_of_bootstraps) for boot in bootstraps: X, Y, (index, dimension) = boot # make bootstrap predictions @@ -185,9 +184,8 @@ class PostProcessing(RunEnvironment): forecast_path = self.data_store.get("forecast_path") window_lead_time = self.data_store.get("window_lead_time") number_of_bootstraps = self.data_store.get("number_of_bootstraps", "postprocessing") - # bootstraps = BootStraps(self.test_data, bootstrap_path, number_of_bootstraps) forecast_file = f"forecasts_norm_%s_test.nc" - bootstraps = BootStrapsNew(self.test_data[0], bootstrap_path, number_of_bootstraps).bootstraps() + bootstraps = BootStraps(self.test_data[0], number_of_bootstraps).bootstraps() skill_scores = statistics.SkillScores(None) score = {} for station in self.test_data: diff --git a/src/run_modules/pre_processing.py b/src/run_modules/pre_processing.py index 491b1530de3f935d5b8409e7f260da3276bc6aad..1b7124d0aded0b0316d2a3df4a6f34d410f8f54a 100644 --- a/src/run_modules/pre_processing.py +++ b/src/run_modules/pre_processing.py @@ -266,7 +266,7 @@ class PreProcessing(RunEnvironment): kwargs = self.data_store.create_args_dict(data_preparation.requirements(), scope=set_name) for station in set_stations: try: - dp = data_preparation.build(station, **kwargs) + dp = data_preparation.build(station, name_affix=set_name, **kwargs) collection.add(dp) valid_stations.append(station) except (AttributeError, EmptyQueryResult):