Skip to content
Snippets Groups Projects
Commit 8d343f1b authored by Felix Kleinert's avatar Felix Kleinert
Browse files

Merge branch 'issue333_feat_test-set-sample-uncertainty' of...

Merge branch 'issue333_feat_test-set-sample-uncertainty' of gitlab.jsc.fz-juelich.de:esde/machine-learning/mlair into issue333_feat_test-set-sample-uncertainty
parents 441714a6 4fa3bf86
No related branches found
No related tags found
7 merge requests!353add developments to release v1.5.0,!352Resolve "release v1.5.0",!351Lukas issue337 bug ci pipeline fails for docs,!350Resolve "upgrade code to TensorFlow V2",!342Include sample-uncertainty to wrf workflow,!337Resolve "Test Set Sample Uncertainty in PostProcessing",!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #80974 passed
......@@ -135,8 +135,11 @@ class PostProcessing(RunEnvironment):
self.plot()
def estimate_sample_uncertainty(self, separate_ahead=False):
#todo: visualize
#todo: write results on disk
"""
Estimate sample uncertainty by using a bootstrap approach. Forecasts are split into individual blocks along time
and randomly drawn with replacement. The resulting behaviour of the error indicates the robustness of each
analyzed model to quantify which model might be superior compared to others.
"""
n_boots = self.data_store.get_default("n_boots", default=1000, scope="uncertainty_estimate")
block_length = self.data_store.get_default("block_length", default="1m")
evaluate_competitors = self.data_store.get_default("evaluate_competitors", default=True)
......@@ -145,8 +148,37 @@ class PostProcessing(RunEnvironment):
self.uncertainty_estimate = statistics.create_n_bootstrap_realizations(
block_mse, dim_name_time=self.index_dim, dim_name_model=self.model_type_dim,
dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots)
self.report_sample_uncertainty()
def report_sample_uncertainty(self, percentiles: list = None):
"""
Store raw results of uncertainty estimate and calculate aggregate statistcs and store as raw data but also as
markdown and latex.
"""
report_path = os.path.join(self.data_store.get("experiment_path"), "latex_report")
path_config.check_path_and_create(report_path)
# store raw results as nc
file_name = os.path.join(report_path, "uncertainty_estimate_raw_results.nc")
self.uncertainty_estimate.to_netcdf(path=file_name)
# store statistics
if percentiles is None:
percentiles = [.05, .1, .25, .5, .75, .9, .95]
df_descr = self.uncertainty_estimate.to_pandas().describe(percentiles=percentiles).astype("float32")
column_format = tables.create_column_format_for_tex(df_descr)
file_name = os.path.join(report_path, "uncertainty_estimate_statistics.%s")
tables.save_to_tex(report_path, file_name % "tex", column_format=column_format, df=df_descr)
tables.save_to_md(report_path, file_name % "md", df=df_descr)
df_descr.to_csv(file_name % "csv", sep=";")
def calculate_block_mse(self, evaluate_competitors=True, separate_ahead=False, block_length="1m"):
"""
Transform data into blocks along time axis. Block length can be any frequency like '1m' or '7d. Data are only
split along time axis, which means that a single block can have very diverse quantities regarding the number of
station or actual data contained. This is intended to analyze not only the robustness against the time but also
against the number of observations and diversity ot stations.
"""
path = self.data_store.get("forecast_path")
all_stations = self.data_store.get("stations")
start = self.data_store.get("start", "test")
......@@ -155,13 +187,11 @@ class PostProcessing(RunEnvironment):
coll_dim = "station"
collector = []
for station in all_stations:
external_data = self._get_external_data(station, path) # test data
# test errors
# test data
external_data = self._get_external_data(station, path)
if external_data is not None:
pass
# load competitors
# competitors
if evaluate_competitors is True:
competitor = self.load_competitors(station)
combined = self._combine_forecasts(external_data, competitor, dim=self.model_type_dim)
......@@ -185,9 +215,7 @@ class PostProcessing(RunEnvironment):
return mse_blocks
def create_error_array(self, data):
"""
Calculate squared error of all given time series in relation to observation.
"""
"""Calculate squared error of all given time series in relation to observation."""
errors = data.drop_sel({self.model_type_dim: self.observation_indicator})
errors1 = errors - data.sel({self.model_type_dim: self.observation_indicator})
errors2 = errors1 ** 2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment