diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py index ee07d7674c73439254cec5eb9f046fb540f3e9a3..9415e0fcb0557f792d1cc0679868caff97e316d3 100644 --- a/mlair/helpers/statistics.py +++ b/mlair/helpers/statistics.py @@ -214,6 +214,11 @@ def mean_absolute_error(a, b, dim=None): return np.abs(a - b).mean(dim) +def mean_error(a, b, dim=None): + """Calculate mean error where a is forecast and b the reference (e.g. observation).""" + return a.mean(dim) - b.mean(dim) + + def index_of_agreement(a, b, dim=None): """Calculate index of agreement (IOA) where a is the forecast and b the reference (e.g. observation).""" num = (np.square(b - a)).sum(dim) @@ -235,7 +240,7 @@ def modified_normalized_mean_bias(a, b, dim=None): def calculate_error_metrics(a, b, dim): - """Calculate MSE, RMSE, MAE, IOA, and MNMB. Additionally, return number of used values for calculation. + """Calculate MSE, ME, RMSE, MAE, IOA, and MNMB. Additionally, return number of used values for calculation. :param a: forecast data to calculate metrics for :param b: reference (e.g. observation) @@ -244,22 +249,25 @@ def calculate_error_metrics(a, b, dim): :returns: dict with results for all metrics indicated by lowercase metric short name """ mse = mean_squared_error(a, b, dim) + me = mean_error(a, b, dim) rmse = np.sqrt(mse) mae = mean_absolute_error(a, b, dim) ioa = index_of_agreement(a, b, dim) mnmb = modified_normalized_mean_bias(a, b, dim) n = (a - b).notnull().sum(dim) - results = {"mse": mse, "rmse": rmse, "mae": mae, "ioa": ioa, "mnmb": mnmb, "n": n} + results = {"mse": mse, "me": me, "rmse": rmse, "mae": mae, "ioa": ioa, "mnmb": mnmb, "n": n} return {k: squeeze_coords(v) for k, v in results.items()} def get_error_metrics_units(base_unit): - return {"mse": f"{base_unit}$^2$", "rmse": base_unit, "mae": base_unit, "ioa": None, "mnmb": None, "n": None} + return {"mse": f"{base_unit}$^2$", "me": base_unit, "rmse": base_unit, "mae": base_unit, "ioa": None, "mnmb": None, + "n": None} def get_error_metrics_long_name(): - return {"mse": "mean squared error", "rmse": "root mean squared error", "mae": "mean absolute error", - "ioa": "index of agreement", "mnmb": "modified normalized mean bias", "n": "count"} + return {"mse": "mean squared error", "me": "mean error", "rmse": "root mean squared error", + "mae": "mean absolute error", "ioa": "index of agreement", "mnmb": "modified normalized mean bias", + "n": "count"} def mann_whitney_u_test(data: pd.DataFrame, reference_col_name: str, **kwargs): diff --git a/test/test_helpers/test_statistics.py b/test/test_helpers/test_statistics.py index 6f1952a9c1df2b16a2e298b433d732d9e69200d4..5ff6f7ee526960a90755f09e5c090735e2b532bb 100644 --- a/test/test_helpers/test_statistics.py +++ b/test/test_helpers/test_statistics.py @@ -375,6 +375,7 @@ class TestCalculateErrorMetrics: x_array1 = xr.DataArray(d1, coords=coords, dims=coords.keys()) x_array2 = xr.DataArray(d2, coords=coords, dims=coords.keys()) expected = {"mse": xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), + "me": xr.DataArray(np.array([-1./3, -2./3, 0, 0, -1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), "rmse": np.sqrt(xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"])), "mae": xr.DataArray(np.array([1, 4./3, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), "ioa": xr.DataArray(np.array([0.3721, 0.4255, 1, 1, 0.4706]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]), @@ -383,6 +384,7 @@ class TestCalculateErrorMetrics: assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "index"), 3) is True expected = {"mse": xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]), + "me": xr.DataArray(np.array([-0.8, -0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]), "rmse": np.sqrt(xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"])), "mae": xr.DataArray(np.array([0.8, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]), "ioa": xr.DataArray(np.array([0.8478, 0.9333, 0.9629]), coords={"index": [0, 1, 2]}, dims=["index"]),