Skip to content
Snippets Groups Projects
Commit 9973cc26 authored by leufen1's avatar leufen1
Browse files

competitive skill scores can now be calculated for an arbitrary number of models

parent 1a3d9079
Branches
Tags
3 merge requests!253include current develop,!252Resolve "release v1.3.0",!196Resolve "Competitor Models"
......@@ -8,8 +8,9 @@ __date__ = '2019-10-23'
import numpy as np
import xarray as xr
import pandas as pd
from typing import Union, Tuple, Dict
from typing import Union, Tuple, Dict, List
from matplotlib import pyplot as plt
import itertools
from mlair.helpers import to_list, remove_items
......@@ -178,10 +179,33 @@ class SkillScores:
skill_scores_clim = skill_scores_class.climatological_skill_scores(external_data, window_lead_time=3)
"""
models_default = ["cnn", "persi", "ols"]
def __init__(self, internal_data: Data):
def __init__(self, internal_data: Data, models=None, observation_name="obs"):
"""Set internal data."""
self.internal_data = internal_data
self.models = self.set_model_names(models)
self.observation_name = observation_name
def set_model_names(self, models: List[str]) -> List[str]:
"""Either use given models or use defaults."""
if models is None:
models = self.models_default
return self._reorder(models)
@staticmethod
def _reorder(model_list: List[str]) -> List[str]:
"""Set elements persi and obs at the very end of given list."""
for element in ["persi", "obs"]:
if element in model_list:
model_list.append(model_list.pop(model_list.index(element)))
return model_list
def get_model_name_combinations(self):
"""Return all combinations of two models as tuple and string."""
combinations = list(itertools.combinations(self.models, 2))
combination_strings = [f"{first}-{second}" for (first, second) in combinations]
return combinations, combination_strings
def skill_scores(self, window_lead_time: int) -> pd.DataFrame:
"""
......@@ -192,16 +216,19 @@ class SkillScores:
:return: skill score for each comparison and forecast step
"""
ahead_names = list(range(1, window_lead_time + 1))
skill_score = pd.DataFrame(index=['cnn-persi', 'ols-persi', 'cnn-ols', 'cnn-competitor'])
combinations, combination_strings = self.get_model_name_combinations()
skill_score = pd.DataFrame(index=combination_strings)
for iahead in ahead_names:
data = self.internal_data.sel(ahead=iahead)
skill_score[iahead] = [self.general_skill_score(data, forecast_name="CNN", reference_name="persi"),
self.general_skill_score(data, forecast_name="OLS", reference_name="persi"),
self.general_skill_score(data, forecast_name="CNN", reference_name="OLS"),
self.general_skill_score(data, forecast_name="CNN", reference_name="competitor")]
skill_score[iahead] = [self.general_skill_score(data,
forecast_name=first,
reference_name=second,
observation_name=self.observation_name)
for (first, second) in combinations]
return skill_score
def climatological_skill_scores(self, external_data: Data, window_lead_time: int) -> xr.DataArray:
def climatological_skill_scores(self, external_data: Data, window_lead_time: int,
forecast_name: str = "cnn") -> xr.DataArray:
"""
Calculate climatological skill scores according to Murphy (1988).
......@@ -210,6 +237,7 @@ class SkillScores:
:param external_data: external data
:param window_lead_time: interested time step of forecast horizon to select data
:param forecast_name: name of the forecast to use for this calculation (must be available in `data`)
:return: all CASES as well as all terms
"""
......@@ -225,30 +253,28 @@ class SkillScores:
data = self.internal_data.sel(ahead=iahead)
skill_score.loc[["CASE I", "AI", "BI", "CI"], iahead] = np.stack(self._climatological_skill_score(
data, mu_type=1, forecast_name="CNN").values.flatten())
data, mu_type=1, forecast_name=forecast_name, observation_name=self.observation_name).values.flatten())
skill_score.loc[["CASE II", "AII", "BII"], iahead] = np.stack(self._climatological_skill_score(
data, mu_type=2, forecast_name="CNN").values.flatten())
data, mu_type=2, forecast_name=forecast_name, observation_name=self.observation_name).values.flatten())
if external_data is not None:
skill_score.loc[["CASE III", "AIII"], iahead] = np.stack(self._climatological_skill_score(
data, mu_type=3, forecast_name="CNN",
data, mu_type=3, forecast_name=forecast_name, observation_name=self.observation_name,
external_data=external_data).values.flatten())
skill_score.loc[["CASE IV", "AIV", "BIV", "CIV"], iahead] = np.stack(self._climatological_skill_score(
data, mu_type=4, forecast_name="CNN",
data, mu_type=4, forecast_name=forecast_name, observation_name=self.observation_name,
external_data=external_data).values.flatten())
return skill_score
def _climatological_skill_score(self, data, mu_type=1, observation_name="obs", forecast_name="CNN",
external_data=None):
def _climatological_skill_score(self, data, observation_name, forecast_name, mu_type=1, external_data=None):
kwargs = {"external_data": external_data} if external_data is not None else {}
return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs)
@staticmethod
def general_skill_score(data: Data, observation_name: str = "obs", forecast_name: str = "CNN",
reference_name: str = "persi") -> np.ndarray:
def general_skill_score(data: Data, observation_name: str, forecast_name: str, reference_name: str) -> np.ndarray:
r"""
Calculate general skill score based on mean squared error.
......@@ -285,49 +311,51 @@ class SkillScores:
:returns: Terms AI, BI, and CI, internal data without nans and mean, variance, correlation and its p-value
"""
data = data.loc[..., [observation_name, forecast_name]].drop("ahead")
data = data.sel(type=[observation_name, forecast_name]).drop("ahead")
data = data.dropna("index")
mean = data.mean("index")
sigma = np.sqrt(data.var("index"))
r, p = stats.pearsonr(data.loc[..., forecast_name], data.loc[..., observation_name])
r, p = stats.pearsonr(*[data.sel(type=n).values.flatten() for n in [forecast_name, observation_name]])
AI = np.array(r ** 2)
BI = ((r - (sigma.loc[..., forecast_name] / sigma.loc[..., observation_name])) ** 2).values
CI = (((mean.loc[..., forecast_name] - mean.loc[..., observation_name]) / sigma.loc[
..., observation_name]) ** 2).values
BI = ((r - (sigma.sel(type=forecast_name, drop=True) / sigma.sel(type=observation_name,
drop=True))) ** 2).values
CI = (((mean.sel(type=forecast_name, drop=True) - mean.sel(type=observation_name, drop=True)) / sigma.sel(
type=observation_name, drop=True)) ** 2).values
suffix = {"mean": mean, "sigma": sigma, "r": r, "p": p}
return AI, BI, CI, data, suffix
def skill_score_mu_case_1(self, data, observation_name="obs", forecast_name="CNN"):
def skill_score_mu_case_1(self, data, observation_name, forecast_name):
"""Calculate CASE I."""
AI, BI, CI, data, _ = self.skill_score_pre_calculations(data, observation_name, forecast_name)
skill_score = np.array(AI - BI - CI)
return pd.DataFrame({"skill_score": [skill_score], "AI": [AI], "BI": [BI], "CI": [CI]}).to_xarray().to_array()
def skill_score_mu_case_2(self, data, observation_name="obs", forecast_name="CNN"):
def skill_score_mu_case_2(self, data, observation_name, forecast_name):
"""Calculate CASE II."""
AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
monthly_mean = self.create_monthly_mean_from_daily_data(data)
data = xr.concat([data, monthly_mean], dim="type")
sigma = suffix["sigma"]
sigma_monthly = np.sqrt(monthly_mean.var())
r, p = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
r, p = stats.pearsonr(*[data.sel(type=n).values.flatten() for n in [observation_name, observation_name + "X"]])
AII = np.array(r ** 2)
BII = ((r - sigma_monthly / sigma.loc[observation_name]) ** 2).values
BII = ((r - sigma_monthly / sigma.sel(type=observation_name, drop=True)) ** 2).values
skill_score = np.array((AI - BI - CI - AII + BII) / (1 - AII + BII))
return pd.DataFrame({"skill_score": [skill_score], "AII": [AII], "BII": [BII]}).to_xarray().to_array()
def skill_score_mu_case_3(self, data, observation_name="obs", forecast_name="CNN", external_data=None):
def skill_score_mu_case_3(self, data, observation_name, forecast_name, external_data=None):
"""Calculate CASE III."""
AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
mean, sigma = suffix["mean"], suffix["sigma"]
AIII = (((external_data.mean().values - mean.loc[observation_name]) / sigma.loc[observation_name]) ** 2).values
AIII = (((external_data.mean().values - mean.sel(type=observation_name, drop=True)) / sigma.sel(
type=observation_name, drop=True)) ** 2).values
skill_score = np.array((AI - BI - CI + AIII) / 1 + AIII)
return pd.DataFrame({"skill_score": [skill_score], "AIII": [AIII]}).to_xarray().to_array()
def skill_score_mu_case_4(self, data, observation_name="obs", forecast_name="CNN", external_data=None):
def skill_score_mu_case_4(self, data, observation_name, forecast_name, external_data=None):
"""Calculate CASE IV."""
AI, BI, CI, data, suffix = self.skill_score_pre_calculations(data, observation_name, forecast_name)
monthly_mean_external = self.create_monthly_mean_from_daily_data(external_data, columns=data.type.values,
......@@ -338,12 +366,13 @@ class SkillScores:
mean_external = monthly_mean_external.mean()
sigma_external = np.sqrt(monthly_mean_external.var())
# r_mu, p_mu = stats.spearmanr(data.loc[..., [observation_name, observation_name+'X']])
r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
r_mu, p_mu = stats.pearsonr(
*[data.sel(type=n).values.flatten() for n in [observation_name, observation_name + "X"]])
AIV = np.array(r_mu ** 2)
BIV = ((r_mu - sigma_external / sigma.loc[observation_name]) ** 2).values
CIV = (((mean_external - mean.loc[observation_name]) / sigma.loc[observation_name]) ** 2).values
BIV = ((r_mu - sigma_external / sigma.sel(type=observation_name, drop=True)) ** 2).values
CIV = (((mean_external - mean.sel(type=observation_name, drop=True)) / sigma.sel(type=observation_name,
drop=True)) ** 2).values
skill_score = np.array((AI - BI - CI - AIV + BIV + CIV) / (1 - AIV + BIV + CIV))
return pd.DataFrame(
{"skill_score": [skill_score], "AIV": [AIV], "BIV": [BIV], "CIV": CIV}).to_xarray().to_array()
......@@ -369,7 +398,7 @@ class SkillScores:
mu = data.groupby("index.month").mean()
for month in mu.month:
monthly_mean[monthly_mean.index.dt.month == month, :] = mu[mu.month == month].values
monthly_mean[monthly_mean.index.dt.month == month, :] = mu[mu.month == month].values.flatten()
return monthly_mean
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment