Skip to content
Snippets Groups Projects
Commit 7cc89dec authored by lukas leufen's avatar lukas leufen
Browse files

Merge branch 'lukas_issue290_bug_fix-separation-of-scales-data-handler' into...

Merge branch 'lukas_issue290_bug_fix-separation-of-scales-data-handler' into 'lukas_issue292_feat_data-preprocessing'

# Conflicts:
#   mlair/data_handler/data_handler_mixed_sampling.py
parents f9c10fe3 891e208f
No related branches found
No related tags found
8 merge requests!319add all changes of dev into release v1.4.0 branch,!318Resolve "release v1.4.0",!283Merge latest develop into falcos issue,!279include Develop,!278Felix issue295 transformation parameters in data handler,!275include lazy preprocessing,!274Resolve "implement lazy data preprocessing",!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #63274 passed
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
......@@ -226,7 +226,7 @@ class DataHandlerSeparationOfScalesSingleStation(DataHandlerMixedSamplingWithFil
res_filter.append(data_filter.shift({dim: -w * delta}))
res_filter = xr.concat(res_filter, dim=window_array).chunk()
res.append(res_filter)
res = xr.concat(res, dim="filter")
res = xr.concat(res, dim="filter").compute()
return res
def estimate_filter_width(self):
......
......@@ -11,8 +11,10 @@ import pandas as pd
from typing import Union, Tuple, Dict, List
from matplotlib import pyplot as plt
import itertools
import gc
import warnings
from mlair.helpers import to_list
from mlair.helpers import to_list, TimeTracking, TimeTrackingWrapper
Data = Union[xr.DataArray, pd.DataFrame]
......@@ -438,7 +440,7 @@ class SkillScores:
"""Calculate CASE IV."""
AI, BI, CI, data, suffix = self.skill_score_pre_calculations(internal_data, observation_name, forecast_name)
monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, index=data.index)
data = xr.concat([data, monthly_mean_external], dim="type")
data = xr.concat([data, monthly_mean_external], dim="type").dropna(dim="index")
mean, sigma = suffix["mean"], suffix["sigma"]
mean_external = monthly_mean_external.mean()
sigma_external = np.sqrt(monthly_mean_external.var())
......@@ -608,6 +610,48 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
else:
return None
@TimeTrackingWrapper
def kz_filter_new(self, df, wl, itr):
"""
It passes the low frequency time series.
If filter method is from mean, max, min this method will call construct and rechunk before the actual
calculation to improve performance. If filter method is either median or percentile this approach is not
applicable and depending on the data and window size, this method can become slow.
Args:
wl(int): a window length
itr(int): a number of iteration
"""
warnings.filterwarnings("ignore")
df_itr = df.__deepcopy__()
try:
kwargs = {"min_periods": int(0.7 * wl),
"center": True,
self.filter_dim: wl}
for i in np.arange(0, itr):
print(i)
rolling = df_itr.chunk().rolling(**kwargs)
if self.method not in ["percentile", "median"]:
rolling = rolling.construct("construct").chunk("auto")
if self.method == "median":
df_mv_avg_tmp = rolling.median()
elif self.method == "percentile":
df_mv_avg_tmp = rolling.quantile(self.percentile)
elif self.method == "max":
df_mv_avg_tmp = rolling.max("construct")
elif self.method == "min":
df_mv_avg_tmp = rolling.min("construct")
else:
df_mv_avg_tmp = rolling.mean("construct")
df_itr = df_mv_avg_tmp.compute()
del df_mv_avg_tmp, rolling
gc.collect()
return df_itr
except ValueError:
raise ValueError
@TimeTrackingWrapper
def kz_filter(self, df, wl, itr):
"""
It passes the low frequency time series.
......@@ -616,15 +660,18 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
wl(int): a window length
itr(int): a number of iteration
"""
import warnings
warnings.filterwarnings("ignore")
df_itr = df.__deepcopy__()
try:
kwargs = {"min_periods": 1,
kwargs = {"min_periods": int(0.7 * wl),
"center": True,
self.filter_dim: wl}
iter_vars = df_itr.coords["variables"].values
for var in iter_vars:
df_itr_var = df_itr.sel(variables=[var]).chunk()
df_itr_var = df_itr.sel(variables=[var])
for _ in np.arange(0, itr):
df_itr_var = df_itr_var.chunk()
rolling = df_itr_var.rolling(**kwargs)
if self.method == "median":
df_mv_avg_tmp = rolling.median()
......@@ -637,7 +684,7 @@ class KolmogorovZurbenkoFilterMovingWindow(KolmogorovZurbenkoBaseClass):
else:
df_mv_avg_tmp = rolling.mean()
df_itr_var = df_mv_avg_tmp.compute()
df_itr = df_itr.drop_sel(variables=var).combine_first(df_itr_var)
df_itr.loc[{"variables": [var]}] = df_itr_var
return df_itr
except ValueError:
raise ValueError
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
......@@ -2,6 +2,7 @@ absl-py==0.11.0
appdirs==1.4.4
astor==0.8.1
attrs==20.3.0
bottleneck==1.3.2
cached-property==1.5.2
certifi==2020.12.5
cftime==1.4.1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment