Skip to content
Snippets Groups Projects

Resolve "release v1.4.0"

Merged Ghost User requested to merge release_v1.4.0 into master
4 files
+ 285
103
Compare changes
  • Side-by-side
  • Inline
Files
4
@@ -15,69 +15,156 @@ __date__ = '2020-02-07'
import os
from collections import Iterator, Iterable
from itertools import chain
from typing import Union, List
import numpy as np
import xarray as xr
from mlair.data_handler.abstract_data_handler import AbstractDataHandler
from mlair.helpers.helpers import to_list
class BootstrapIterator(Iterator):
_position: int = None
def __init__(self, data: "BootStraps"):
def __init__(self, data: "BootStraps", method):
assert isinstance(data, BootStraps)
self._data = data
self._dimension = data.bootstrap_dimension
self._collection = self._data.bootstraps()
self.boot_dim = "boots"
self._method = method
self._collection = self.create_collection(self._data.data, self._dimension)
self._position = 0
def __next__(self):
"""Return next element or stop iteration."""
raise NotImplementedError
@classmethod
def create_collection(cls, data, dim):
raise NotImplementedError
def _reshape(self, d):
if isinstance(d, list):
return list(map(lambda x: self._reshape(x), d))
# return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
else:
shape = d.shape
return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
def _to_numpy(self, d):
if isinstance(d, list):
return list(map(lambda x: self._to_numpy(x), d))
else:
return d.values
def apply_bootstrap_method(self, data: np.ndarray) -> Union[np.ndarray, List[np.ndarray]]:
"""
Apply predefined bootstrap method from given data.
:param data: data to apply bootstrap method on
:return: processed data as numpy array
"""
if isinstance(data, list):
return list(map(lambda x: self.apply_bootstrap_method(x.values), data))
else:
return self._method.apply(data)
class BootstrapIteratorSingleInput(BootstrapIterator):
_position: int = None
def __init__(self, *args):
super().__init__(*args)
def __next__(self):
"""Return next element or stop iteration."""
try:
index, dimension = self._collection[self._position]
nboot = self._data.number_of_bootstraps
_X, _Y = self._data.data.get_data(as_numpy=False)
_X = list(map(lambda x: x.expand_dims({'boots': range(nboot)}, axis=-1), _X))
_Y = _Y.expand_dims({"boots": range(nboot)}, axis=-1)
_X = list(map(lambda x: x.expand_dims({self.boot_dim: range(nboot)}, axis=-1), _X))
_Y = _Y.expand_dims({self.boot_dim: range(nboot)}, axis=-1)
single_variable = _X[index].sel({self._dimension: [dimension]})
shuffled_variable = self.shuffle(single_variable.values)
shuffled_data = xr.DataArray(shuffled_variable, coords=single_variable.coords, dims=single_variable.dims)
_X[index] = shuffled_data.combine_first(_X[index]).reindex_like(_X[index])
bootstrapped_variable = self.apply_bootstrap_method(single_variable.values)
bootstrapped_data = xr.DataArray(bootstrapped_variable, coords=single_variable.coords,
dims=single_variable.dims)
_X[index] = bootstrapped_data.combine_first(_X[index]).reindex_like(_X[index])
self._position += 1
except IndexError:
raise StopIteration()
_X, _Y = self._to_numpy(_X), self._to_numpy(_Y)
return self._reshape(_X), self._reshape(_Y), (index, dimension)
@staticmethod
def _reshape(d):
if isinstance(d, list):
return list(map(lambda x: np.rollaxis(x, -1, 0).reshape(x.shape[0] * x.shape[-1], *x.shape[1:-1]), d))
else:
shape = d.shape
return np.rollaxis(d, -1, 0).reshape(shape[0] * shape[-1], *shape[1:-1])
@classmethod
def create_collection(cls, data, dim):
l = []
for i, x in enumerate(data.get_X(as_numpy=False)):
l.append(list(map(lambda y: (i, y), x.indexes[dim])))
return list(chain(*l))
@staticmethod
def _to_numpy(d):
if isinstance(d, list):
return list(map(lambda x: x.values, d))
else:
return d.values
@staticmethod
def shuffle(data: np.ndarray) -> np.ndarray:
"""
Shuffle randomly from given data (draw elements with replacement).
class BootstrapIteratorVariable(BootstrapIterator):
:param data: data to shuffle
:return: shuffled data as numpy array
"""
def __init__(self, *args):
super().__init__(*args)
def __next__(self):
"""Return next element or stop iteration."""
try:
dimension = self._collection[self._position]
nboot = self._data.number_of_bootstraps
_X, _Y = self._data.data.get_data(as_numpy=False)
_X = list(map(lambda x: x.expand_dims({self.boot_dim: range(nboot)}, axis=-1), _X))
_Y = _Y.expand_dims({self.boot_dim: range(nboot)}, axis=-1)
for index in range(len(_X)):
single_variable = _X[index].sel({self._dimension: [dimension]})
bootstrapped_variable = self.apply_bootstrap_method(single_variable.values)
bootstrapped_data = xr.DataArray(bootstrapped_variable, coords=single_variable.coords,
dims=single_variable.dims)
_X[index] = bootstrapped_data.combine_first(_X[index]).transpose(*_X[index].dims)
self._position += 1
except IndexError:
raise StopIteration()
_X, _Y = self._to_numpy(_X), self._to_numpy(_Y)
return self._reshape(_X), self._reshape(_Y), (None, dimension)
@classmethod
def create_collection(cls, data, dim):
l = set()
for i, x in enumerate(data.get_X(as_numpy=False)):
l.update(x.indexes[dim].to_list())
return to_list(l)
class BootstrapIteratorBranch(BootstrapIterator):
def __init__(self, *args):
super().__init__(*args)
def __next__(self):
pass
# TODO: implement here: permute entire branch at once
class ShuffleBootstraps:
@staticmethod
def apply(data):
size = data.shape
return np.random.choice(data.reshape(-1, ), size=size)
class MeanBootstraps:
def __init__(self, mean):
self._mean = mean
def apply(self, data):
return np.ones_like(data) * self._mean
class BootStraps(Iterable):
"""
Main class to perform bootstrap operations.
@@ -89,10 +176,19 @@ class BootStraps(Iterable):
this variable). The tuple is interesting if X consists on mutliple input streams X_i (e.g. two or more stations)
because it shows which variable of which input X_i has been bootstrapped. All bootstrap combinations can be
retrieved by calling the .bootstraps() method. Further more, by calling the .get_orig_prediction() this class
imitates according to the set number of bootstraps the original prediction
imitates according to the set number of bootstraps the original prediction.
As bootstrap method, this class can currently make use of the ShuffleBoostraps class that uses drawing with
replacement to destroy the variables information by keeping its statistical properties. Use `bootstrap="shuffle"` to
call this method. Another method is the zero mean bootstrapping triggered by `bootstrap="zero_mean"` and performed
by the MeanBootstraps class. This method destroy the variable's information by a mode collapse to constant value of
zero. In case, the variable is normalized with a zero mean, this is equivalent to a mode collapse to the variable's
mean value. Statistics in general are not conserved in this case, but the mean value of course. A custom mean value
for bootstrapping is currently not supported.
"""
def __init__(self, data: AbstractDataHandler, number_of_bootstraps: int = 10,
bootstrap_dimension: str = "variables"):
bootstrap_dimension: str = "variables", bootstrap_type="singleinput", bootstrap_method="shuffle"):
"""
Create iterable class to be ready to iter.
@@ -100,20 +196,24 @@ class BootStraps(Iterable):
:param number_of_bootstraps: the number of bootstrap realisations
"""
self.data = data
self.number_of_bootstraps = number_of_bootstraps
self.number_of_bootstraps = number_of_bootstraps if bootstrap_method == "shuffle" else 1
self.bootstrap_dimension = bootstrap_dimension
self.bootstrap_method = {"shuffle": ShuffleBootstraps(),
"zero_mean": MeanBootstraps(mean=0)}.get(
bootstrap_method) # todo adjust number of bootstraps if mean bootstrapping
self.BootstrapIterator = {"singleinput": BootstrapIteratorSingleInput,
"branch": BootstrapIteratorBranch,
"variable": BootstrapIteratorVariable}.get(bootstrap_type,
BootstrapIteratorSingleInput)
def __iter__(self):
return BootstrapIterator(self)
return self.BootstrapIterator(self, self.bootstrap_method)
def __len__(self):
return len(self.bootstraps())
return len(self.BootstrapIterator.create_collection(self.data, self.bootstrap_dimension))
def bootstraps(self):
l = []
for i, x in enumerate(self.data.get_X(as_numpy=False)):
l.append(list(map(lambda y: (i, y), x.indexes['variables'])))
return list(chain(*l))
return self.BootstrapIterator.create_collection(self.data, self.bootstrap_dimension)
def get_orig_prediction(self, path: str, file_name: str, prediction_name: str = "CNN") -> np.ndarray:
"""
Loading