new plot: ws vs perf
IDEA
If advection is of major importance, there should be relation between wind speed and the occuring error. Try to visualize this relation in a plot.
implementation
@leufen1 already implemented some snippets. But it is not realy stable as it depends strongly on the available input variables as well as their representation and the used data handler. But maybe this work done is a good start point for another try.
current status
- Just noted down as an idea (with some initial implementation tries)
- Reconsider this plot at another stage
code snippets
@TimeTrackingWrapper
class PlotWindToErrorRelation(AbstractPlotClass): # pragma: no cover
def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type",
time_dim="datetime", variables_dim="variables", index_dim="index", wind_var="v", model_name: str = "NN", window_dim="window",
ahead_dim="ahead",
model_indicator: str = "nn", obs_indicator: str = "obs", forecast_path=".", forecast_file=r"forecasts_%s_test.nc"):
super().__init__(plot_folder, "wind_to_error_relation")
self._forecast_path = forecast_path
self._forecast_file = forecast_file
self._model_name = model_indicator
self._obs_name = obs_indicator
self._model_type_dim = model_type_dim
self._wind_var = wind_var
self._time_dim = time_dim
self._index_dim = index_dim
self._ahead_dim = ahead_dim
self._var_dim = variables_dim
self._window_dim = window_dim
data_coll = self._prepare_data(data)
self._plot(data_coll)
def _plot(self, data):
for plot_var in data.keys():
raw_data = data[plot_var]
errors = raw_data.sel(variables="error").to_pandas().to_numpy().flatten()
wind = raw_data.sel(variables=plot_var).to_pandas().to_numpy().flatten()
return
def _prepare_data(self, data):
data_coll = {}
for station in data:
logging.debug(f"... preprocess station {station}")
station_forecast = self._load_data(station)
station_error = self._calc_error(station_forecast)
station_wind = self._load_wind(station, station_error)
station_combined = xr.concat([station_error, station_wind], dim=self._var_dim)
for var in station_wind.coords[self._var_dim].values:
if var not in data_coll.keys():
data_coll[var] = []
data_coll[var].append(station_combined.sel({self._var_dim: [var, "error"]}).squeeze(drop=True).dropna(self._time_dim))
for var in data_coll.keys():
n_start = 0
raw_data = data_coll[var]
d_coll = []
for d in raw_data:
n_steps = len(d.coords[self._time_dim])
new_vals = range(n_start, n_start + n_steps)
d.coords[self._time_dim] = new_vals
d_coll.append(d)
n_start = d.coords[self._time_dim].values.max() + 1
d_coll = xr.concat(d_coll, dim=self._time_dim)
data_coll[var] = d_coll
return data_coll
def _load_data(self, station):
file_name = os.path.join(self._forecast_path, self._forecast_file % station)
with xr.open_dataarray(file_name) as d:
return d.sel({self._model_type_dim: [self._model_name, self._obs_name]}).rename({self._index_dim: self._time_dim})
def _harmonize_data(self, wind, error):
intersect = reduce(np.intersect1d, map(lambda x: x.coords[self._time_dim].values, [wind, error]))
return wind.sel({self._time_dim: intersect}), error.sel({self._time_dim: intersect})
def _calc_error(self, data):
error = data.sel({self._model_type_dim: self._model_name}) - data.sel({self._model_type_dim: self._obs_name})
error = error.expand_dims({self._var_dim: ["error"]})
return error
def _load_wind(self, data, error_data):
d = data.get_X(as_numpy=False)
d0 = d[0]
wind_vars = list(set(self._wind_var).intersection(d0.coords[self._var_dim].values))
d0wind = d0.sel({self._var_dim: wind_vars, self._window_dim: 0}, drop=True)
if len({"u", "v"}.intersection(wind_vars)) == 2:
wind_abs = np.sqrt(d0wind.sel({self._var_dim: "u"})**2 + d0wind.sel({self._var_dim: "v"})**2)
wind_abs = wind_abs.expand_dims({self._var_dim: ["ws"]})
d0wind = xr.concat([d0wind, wind_abs], dim=self._var_dim)
expand_dim_vals = error_data.coords[self._ahead_dim]
d0wind = d0wind.expand_dims({self._ahead_dim: expand_dim_vals})
return d0wind