Skip to content
Snippets Groups Projects
Commit b7034ea6 authored by lukas leufen's avatar lukas leufen
Browse files

hold only fastest boot strap method

parent 2c4cd969
No related branches found
No related tags found
3 merge requests!90WIP: new release update,!89Resolve "release branch / CI on gpu",!61Resolve "REFAC: clean-up bootstrap workflow"
Pipeline #32372 passed
......@@ -54,23 +54,20 @@ class PostProcessing(RunEnvironment):
# bootstraps
if self.data_store.get("evaluate_bootstraps", "general.postprocessing"):
with TimeTracking(name="split (refac_1): create_boot_straps_refac_2()"):
bootstrap_path = self.data_store.get("bootstrap_path", "general.postprocessing")
number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing")
BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
with TimeTracking(name="split (refac_1): create_boot_straps_refac_2()"):
self.create_boot_straps_refac_2()
self.create_boot_straps()
self.bootstrap_skill_scores = self.calculate_bootstrap_skill_scores()
with TimeTracking(name="merged: combined_boot_forecast_and_skill()"):
self.bootstrap_skill_scores = self.combined_boot_forecast_and_skill()
# skill scores
# self.skill_scores = self.calculate_skill_scores()
self.skill_scores = self.calculate_skill_scores()
# plotting
# self.plot()
self.plot()
def create_boot_straps_refac_2(self):
def create_boot_straps(self):
# forecast
with TimeTracking(name="boot predictions"):
bootstrap_path = self.data_store.get("bootstrap_path", "general")
......@@ -142,62 +139,6 @@ class PostProcessing(RunEnvironment):
score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
return score
def combined_boot_forecast_and_skill(self):
# forecast
with TimeTracking(name="boot predictions"):
bootstrap_path = self.data_store.get("bootstrap_path", "general")
forecast_path = self.data_store.get("forecast_path", "general")
window_lead_time = self.data_store.get("window_lead_time", "general")
number_of_bootstraps = self.data_store.get("number_of_bootstraps", "general.postprocessing")
bootstraps = BootStraps(self.test_data, bootstrap_path, number_of_bootstraps)
skill_scores = statistics.SkillScores(None)
score = {}
for station in bootstraps.stations:
with TimeTracking(name=station):
logging.info(station)
# store also true labels for each station
labels = bootstraps.get_labels(station)
shape = labels.shape
labels = labels.reshape((*shape, 1))
coords = (range(labels.shape[0]), range(1, labels.shape[1] + 1))
# file_name = os.path.join(forecast_path, f"bootstraps_labels_{station}.nc")
labels = xr.DataArray(labels, coords=(*coords, ["obs"]), dims=["index", "ahead", "type"])
# labels.to_netcdf(file_name)
shape = labels.shape
orig = bootstraps.get_orig_prediction(forecast_path, f"forecasts_norm_{station}_test.nc").reshape(shape)
coords = (range(shape[0]), range(1, shape[1] + 1), ["orig"])
orig = xr.DataArray(orig, coords=coords, dims=["index", "ahead", "type"])
skill = pd.DataFrame(columns=range(1, window_lead_time + 1))
for var in bootstraps.variables:
hist, label, station_bootstrap, length = bootstraps.get_generator_station_var_wise(station, var)
# make bootstrap predictions
bootstrap_predictions = self.model.predict_generator(generator=station_bootstrap(),
steps=length,
use_multiprocessing=True)
if isinstance(bootstrap_predictions, list):
bootstrap_predictions = bootstrap_predictions[-1]
# get bootstrap prediction meta data
bootstrap_meta = np.array(bootstraps.get_bootstrap_meta_station_var_wise(station, var))
# save bootstrap predictions separately for each station and variable combination
# store each variable - station - combination
ind = np.all(bootstrap_meta == [var, station], axis=1)
length = sum(ind)
sel = bootstrap_predictions[ind].reshape((length, window_lead_time, 1))
coords = (range(length), range(1, window_lead_time + 1))
boot_data = xr.DataArray(sel, coords=(*coords, [var]), dims=["index", "ahead", "type"])
# file_name = os.path.join(forecast_path, f"bootstraps_{var}_{station}.nc")
# boot_data.to_netcdf(file_name)
boot_data = boot_data.combine_first(labels).combine_first(orig)
boot_scores = []
for ahead in range(1, window_lead_time + 1):
data = boot_data.sel(ahead=ahead)
boot_scores.append(skill_scores.general_skill_score(data, forecast_name=var, reference_name="orig"))
skill.loc[var] = np.array(boot_scores)
score[station] = xr.DataArray(skill, dims=["boot_var", "ahead"])
return score
def _load_model(self):
try:
model = self.data_store.get("best_model", "general")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment