Skip to content
Snippets Groups Projects
Commit 9833edec authored by lukas leufen's avatar lukas leufen
Browse files

added 4th subset "train_val" that consists on the union of train and val. Is...

added 4th subset "train_val" that consists on the union of train and val. Is needed for the external skill score calculation. Also added simple MSE function (no need to install sklearn just for mse)
parent 4a4ba794
No related branches found
No related tags found
2 merge requests!37include new development,!27Lukas issue032 feat plotting postprocessing
Pipeline #28653 passed with warnings
...@@ -95,6 +95,10 @@ class ExperimentSetup(RunEnvironment): ...@@ -95,6 +95,10 @@ class ExperimentSetup(RunEnvironment):
self._set_param("start", test_start, default="2010-01-01", scope="general.test") self._set_param("start", test_start, default="2010-01-01", scope="general.test")
self._set_param("end", test_end, default="2017-12-31", scope="general.test") self._set_param("end", test_end, default="2017-12-31", scope="general.test")
# train_val parameters
self._set_param("start", self.data_store.get("start", "general.train"), scope="general.train_val")
self._set_param("end", self.data_store.get("end", "general.val"), scope="general.train_val")
# use all stations on all data sets (train, val, test) # use all stations on all data sets (train, val, test)
self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True) self._set_param("use_all_stations_on_all_data_sets", use_all_stations_on_all_data_sets, default=True)
......
...@@ -56,26 +56,28 @@ class PreProcessing(RunEnvironment): ...@@ -56,26 +56,28 @@ class PreProcessing(RunEnvironment):
def split_train_val_test(self): def split_train_val_test(self):
fraction_of_training = self.data_store.get("fraction_of_training", "general") fraction_of_training = self.data_store.get("fraction_of_training", "general")
stations = self.data_store.get("stations", "general") stations = self.data_store.get("stations", "general")
train_index, val_index, test_index = self.split_set_indices(len(stations), fraction_of_training) train_index, val_index, test_index, train_val_index = self.split_set_indices(len(stations), fraction_of_training)
for (ind, scope) in zip([train_index, val_index, test_index], ["train", "val", "test"]): subset_names = ["train", "val", "test", "train_val"]
for (ind, scope) in zip([train_index, val_index, test_index, train_val_index], subset_names):
self.create_set_split(ind, scope) self.create_set_split(ind, scope)
@staticmethod @staticmethod
def split_set_indices(total_length: int, fraction: float) -> Tuple[slice, slice, slice]: def split_set_indices(total_length: int, fraction: float) -> Tuple[slice, slice, slice, slice]:
""" """
create the training, validation and test subset slice indices for given total_length. The test data consists on create the training, validation and test subset slice indices for given total_length. The test data consists on
(1-fraction) of total_length (fraction*len:end). Train and validation data therefore are made from fraction of (1-fraction) of total_length (fraction*len:end). Train and validation data therefore are made from fraction of
total_length (0:fraction*len). Train and validation data is split by the factor 0.8 for train and 0.2 for total_length (0:fraction*len). Train and validation data is split by the factor 0.8 for train and 0.2 for
validation. validation. In addition, split_set_indices returns also the combination of training and validation subset.
:param total_length: list with all objects to split :param total_length: list with all objects to split
:param fraction: ratio between test and union of train/val data :param fraction: ratio between test and union of train/val data
:return: slices for each subset in the order: train, val, test :return: slices for each subset in the order: train, val, test, train_val
""" """
pos_test_split = int(total_length * fraction) pos_test_split = int(total_length * fraction)
train_index = slice(0, int(pos_test_split * 0.8)) train_index = slice(0, int(pos_test_split * 0.8))
val_index = slice(int(pos_test_split * 0.8), pos_test_split) val_index = slice(int(pos_test_split * 0.8), pos_test_split)
test_index = slice(pos_test_split, total_length) test_index = slice(pos_test_split, total_length)
return train_index, val_index, test_index train_val_index = slice(0, pos_test_split)
return train_index, val_index, test_index, train_val_index
def create_set_split(self, index_list, set_name): def create_set_split(self, index_list, set_name):
scope = f"general.{set_name}" scope = f"general.{set_name}"
......
__author__ = 'Lukas Leufen' __author__ = 'Lukas Leufen'
__date__ = '2019-10-23' __date__ = '2019-10-23'
import numpy as np
import xarray as xr import xarray as xr
import pandas as pd import pandas as pd
from typing import Union, Tuple from typing import Union, Tuple
...@@ -70,3 +71,7 @@ def centre_inverse(data: Data, mean: Data) -> Data: ...@@ -70,3 +71,7 @@ def centre_inverse(data: Data, mean: Data) -> Data:
:return: :return:
""" """
return data + mean return data + mean
def mean_squared_error(a, b):
return np.square(a - b).mean()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment