Skip to content
Snippets Groups Projects
Commit 8d7ceb65 authored by lukas leufen's avatar lukas leufen
Browse files

can calculate accurate transformation

parent 63324afa
Branches
Tags
2 merge requests!50release for v0.7.0,!49Lukas issue054 feat transformation on entire dataset
Pipeline #30936 passed
...@@ -4,6 +4,7 @@ __date__ = '2019-11-07' ...@@ -4,6 +4,7 @@ __date__ = '2019-11-07'
import os import os
from typing import Union, List, Tuple, Any, Dict from typing import Union, List, Tuple, Any, Dict
import dask.array as da
import keras import keras
import xarray as xr import xarray as xr
import pickle import pickle
...@@ -114,8 +115,24 @@ class DataGenerator(keras.utils.Sequence): ...@@ -114,8 +115,24 @@ class DataGenerator(keras.utils.Sequence):
return transformation return transformation
def calculate_accurate_transformation(self, method): def calculate_accurate_transformation(self, method):
tmp = []
mean = None mean = None
std = None std = None
for station in self.stations:
try:
data = DataPrep(self.data_path, self.network, station, self.variables, station_type=self.station_type,
**self.kwargs)
chunks = (1, 100, data.data.shape[2])
tmp.append(da.from_array(data.data.data, chunks=chunks))
except EmptyQueryResult:
continue
tmp = da.concatenate(tmp, axis=1)
if method in ["standardise", "centre"]:
mean = da.nanmean(tmp, axis=1).compute()
mean = xr.DataArray(mean.flatten(), coords={"variables": sorted(self.variables)}, dims=["variables"])
if method == "standardise":
std = da.nanstd(tmp, axis=1).compute()
std = xr.DataArray(std.flatten(), coords={"variables": sorted(self.variables)}, dims=["variables"])
return mean, std return mean, std
def calculate_estimated_transformation(self, method): def calculate_estimated_transformation(self, method):
...@@ -131,7 +148,7 @@ class DataGenerator(keras.utils.Sequence): ...@@ -131,7 +148,7 @@ class DataGenerator(keras.utils.Sequence):
data.transform("datetime", method=method, inverse=True) data.transform("datetime", method=method, inverse=True)
except EmptyQueryResult: except EmptyQueryResult:
continue continue
return mean.mean("Stations") if mean.shape[1] > 0 else "hi", std.mean("Stations") if std.shape[1] > 0 else None return mean.mean("Stations") if mean.shape[1] > 0 else None, std.mean("Stations") if std.shape[1] > 0 else None
def get_data_generator(self, key: Union[str, int] = None, local_tmp_storage: bool = True) -> DataPrep: def get_data_generator(self, key: Union[str, int] = None, local_tmp_storage: bool = True) -> DataPrep:
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment