new plot: ws vs perf

IDEA

If advection is of major importance, there should be relation between wind speed and the occuring error. Try to visualize this relation in a plot.

implementation

@leufen1 already implemented some snippets. But it is not realy stable as it depends strongly on the available input variables as well as their representation and the used data handler. But maybe this work done is a good start point for another try.

current status

Just noted down as an idea (with some initial implementation tries)
Reconsider this plot at another stage

code snippets



@TimeTrackingWrapper
class PlotWindToErrorRelation(AbstractPlotClass):  # pragma: no cover

    def __init__(self, data: xr.DataArray, plot_folder: str = ".", model_type_dim: str = "type",
                 time_dim="datetime", variables_dim="variables", index_dim="index", wind_var="v", model_name: str = "NN", window_dim="window",
                 ahead_dim="ahead",
                 model_indicator: str = "nn", obs_indicator: str = "obs", forecast_path=".", forecast_file=r"forecasts_%s_test.nc"):
        super().__init__(plot_folder, "wind_to_error_relation")
        self._forecast_path = forecast_path
        self._forecast_file = forecast_file
        self._model_name = model_indicator
        self._obs_name = obs_indicator
        self._model_type_dim = model_type_dim
        self._wind_var = wind_var
        self._time_dim = time_dim
        self._index_dim = index_dim
        self._ahead_dim = ahead_dim
        self._var_dim = variables_dim
        self._window_dim = window_dim
        data_coll = self._prepare_data(data)
        self._plot(data_coll)

    def _plot(self, data):
        for plot_var in data.keys():
            raw_data = data[plot_var]
            errors = raw_data.sel(variables="error").to_pandas().to_numpy().flatten()
            wind = raw_data.sel(variables=plot_var).to_pandas().to_numpy().flatten()
        return


    def _prepare_data(self, data):
        data_coll = {}
        for station in data:
            logging.debug(f"... preprocess station {station}")
            station_forecast = self._load_data(station)
            station_error = self._calc_error(station_forecast)
            station_wind = self._load_wind(station, station_error)
            station_combined = xr.concat([station_error, station_wind], dim=self._var_dim)
            for var in station_wind.coords[self._var_dim].values:
                if var not in data_coll.keys():
                    data_coll[var] = []
                data_coll[var].append(station_combined.sel({self._var_dim: [var, "error"]}).squeeze(drop=True).dropna(self._time_dim))

        for var in data_coll.keys():
            n_start = 0
            raw_data = data_coll[var]
            d_coll = []
            for d in raw_data:
                n_steps = len(d.coords[self._time_dim])
                new_vals = range(n_start, n_start + n_steps)
                d.coords[self._time_dim] = new_vals
                d_coll.append(d)
                n_start = d.coords[self._time_dim].values.max() + 1
            d_coll = xr.concat(d_coll, dim=self._time_dim)
            data_coll[var] = d_coll
        return data_coll

    def _load_data(self, station):
        file_name = os.path.join(self._forecast_path, self._forecast_file % station)
        with xr.open_dataarray(file_name) as d:
            return d.sel({self._model_type_dim: [self._model_name, self._obs_name]}).rename({self._index_dim: self._time_dim})

    def _harmonize_data(self, wind, error):
        intersect = reduce(np.intersect1d, map(lambda x: x.coords[self._time_dim].values, [wind, error]))
        return wind.sel({self._time_dim: intersect}), error.sel({self._time_dim: intersect})


    def _calc_error(self, data):
        error = data.sel({self._model_type_dim: self._model_name}) - data.sel({self._model_type_dim: self._obs_name})
        error = error.expand_dims({self._var_dim: ["error"]})
        return error

    def _load_wind(self, data, error_data):
        d = data.get_X(as_numpy=False)
        d0 = d[0]
        wind_vars = list(set(self._wind_var).intersection(d0.coords[self._var_dim].values))
        d0wind = d0.sel({self._var_dim: wind_vars, self._window_dim: 0}, drop=True)
        if len({"u", "v"}.intersection(wind_vars)) == 2:
            wind_abs = np.sqrt(d0wind.sel({self._var_dim: "u"})**2 + d0wind.sel({self._var_dim: "v"})**2)
            wind_abs = wind_abs.expand_dims({self._var_dim: ["ws"]})
            d0wind = xr.concat([d0wind, wind_abs], dim=self._var_dim)
        expand_dim_vals = error_data.coords[self._ahead_dim]
        d0wind = d0wind.expand_dims({self._ahead_dim: expand_dim_vals})
        return d0wind