Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • esde/machine-learning/mlair
1 result
Select Git revision
Show changes
Commits on Source (5)
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
......@@ -114,6 +114,12 @@ class DataHandlerMixedSamplingWithFilterSingleStation(DataHandlerMixedSamplingSi
self._data = list(map(self.load_and_interpolate, [0, 1])) # load input (0) and target (1) data
self.set_inputs_and_targets()
self.apply_kz_filter()
# lazy data loading on first time if possible
# * store the kz data locally in data path under different folder /e.g. kzf_data
# * create a checksum for the name and reuse this data always if checksum fits (this will replace all previous
# steps and save a lot of computation time.
# lazy create of subsets by reusing as much as possible
# * start here when using preprocessed data, select new start and end
if self.do_transformation is True:
self.call_transform()
self.make_samples()
......@@ -204,7 +210,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
time_deltas = np.round(self.time_delta(self.cutoff_period)).astype(int)
start, end = window, 1
res = []
window_array = self.create_index_array(self.window_dim.range(start, end), squeeze_dim=self.target_dim)
window_array = self.create_index_array(self.window_dim, range(start, end), squeeze_dim=self.target_dim)
for delta, filter_name in zip(np.append(time_deltas, 1), data.coords["filter"]):
res_filter = []
data_filter = data.sel({"filter": filter_name})
......@@ -212,7 +218,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
res_filter.append(data_filter.shift({dim: -w * delta}))
res_filter = xr.concat(res_filter, dim=window_array).chunk()
res.append(res_filter)
res = xr.concat(res, dim="filter")
res = xr.concat(res, dim="filter").compute()
return res
def estimate_filter_width(self):
......
......@@ -11,8 +11,10 @@ import pandas as pd
from typing import Union, Tuple, Dict, List
from matplotlib import pyplot as plt
import itertools
import gc
import warnings
from mlair.helpers import to_list
from mlair.helpers import to_list, TimeTracking, TimeTrackingWrapper
Data = Union[xr.DataArray, pd.DataFrame]
......@@ -438,7 +440,7 @@ class SkillScores:
"""Calculate CASE IV."""
AI, BI, CI, data, suffix = self.skill_score_pre_calculations(internal_data, observation_name, forecast_name)
monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, index=data.index)
data = xr.concat([data, monthly_mean_external], dim="type")
data = xr.concat([data, monthly_mean_external], dim="type").dropna(dim="index")
mean, sigma = suffix["mean"], suffix["sigma"]
mean_external = monthly_mean_external.mean()
sigma_external = np.sqrt(monthly_mean_external.var())
......@@ -608,6 +610,48 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
else:
return None
@TimeTrackingWrapper
def kz_filter_new(self, df, wl, itr):
"""
It passes the low frequency time series.
If filter method is from mean, max, min this method will call construct and rechunk before the actual
calculation to improve performance. If filter method is either median or percentile this approach is not
applicable and depending on the data and window size, this method can become slow.
Args:
wl(int): a window length
itr(int): a number of iteration
"""
warnings.filterwarnings("ignore")
df_itr = df.__deepcopy__()
try:
kwargs = {"min_periods": int(0.7 * wl),
"center": True,
self.filter_dim: wl}
for i in np.arange(0, itr):
print(i)
rolling = df_itr.chunk().rolling(**kwargs)
if self.method not in ["percentile", "median"]:
rolling = rolling.construct("construct").chunk("auto")
if self.method == "median":
df_mv_avg_tmp = rolling.median()
elif self.method == "percentile":
df_mv_avg_tmp = rolling.quantile(self.percentile)
elif self.method == "max":
df_mv_avg_tmp = rolling.max("construct")
elif self.method == "min":
df_mv_avg_tmp = rolling.min("construct")
else:
df_mv_avg_tmp = rolling.mean("construct")
df_itr = df_mv_avg_tmp.compute()
del df_mv_avg_tmp, rolling
gc.collect()
return df_itr
except ValueError:
raise ValueError
@TimeTrackingWrapper
def kz_filter(self, df, wl, itr):
"""
It passes the low frequency time series.
......@@ -616,15 +660,18 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
wl(int): a window length
itr(int): a number of iteration
"""
import warnings
warnings.filterwarnings("ignore")
df_itr = df.__deepcopy__()
try:
kwargs = {"min_periods": 1,
kwargs = {"min_periods": int(0.7 * wl),
"center": True,
self.filter_dim: wl}
iter_vars = df_itr.coords["variables"].values
for var in iter_vars:
df_itr_var = df_itr.sel(variables=[var]).chunk()
df_itr_var = df_itr.sel(variables=[var])
for _ in np.arange(0, itr):
df_itr_var = df_itr_var.chunk()
rolling = df_itr_var.rolling(**kwargs)
if self.method == "median":
df_mv_avg_tmp = rolling.median()
......@@ -637,7 +684,7 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
else:
df_mv_avg_tmp = rolling.mean()
df_itr_var = df_mv_avg_tmp.compute()
df_itr = df_itr.drop_sel(variables=var).combine_first(df_itr_var)
df_itr.loc[{"variables": [var]}] = df_itr_var
return df_itr
except ValueError:
raise ValueError
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......