diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 5f3aa45161530ff7d425ccbc7625dd7e081d8839..ad344ab3178933424357c61c16fad6fba7e0592f 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -213,6 +213,11 @@ def mean_absolute_error(a, b, dim=None):
     return np.abs(a - b).mean(dim)
 
 
+def mean_error(a, b, dim=None):
+    """Calculate mean error where a is forecast and b the reference (e.g. observation)."""
+    return a.mean(dim) - b.mean(dim)
+
+
 def index_of_agreement(a, b, dim=None):
     """Calculate index of agreement (IOA) where a is the forecast and b the reference (e.g. observation)."""
     num = (np.square(b - a)).sum(dim)
@@ -234,7 +239,7 @@ def modified_normalized_mean_bias(a, b, dim=None):
 
 
 def calculate_error_metrics(a, b, dim):
-    """Calculate MSE, RMSE, MAE, IOA, and MNMB. Additionally, return number of used values for calculation.
+    """Calculate MSE, ME, RMSE, MAE, IOA, and MNMB. Additionally, return number of used values for calculation.
 
     :param a: forecast data to calculate metrics for
     :param b: reference (e.g. observation)
@@ -243,12 +248,13 @@ def calculate_error_metrics(a, b, dim):
     :returns: dict with results for all metrics indicated by lowercase metric short name
     """
     mse = mean_squared_error(a, b, dim)
+    me = mean_error(a, b, dim)
     rmse = np.sqrt(mse)
     mae = mean_absolute_error(a, b, dim)
     ioa = index_of_agreement(a, b, dim)
     mnmb = modified_normalized_mean_bias(a, b, dim)
     n = (a - b).notnull().sum(dim)
-    return {"mse": mse, "rmse": rmse, "mae": mae, "ioa": ioa, "mnmb": mnmb, "n": n}
+    return {"mse": mse, "me": me, "rmse": rmse, "mae": mae, "ioa": ioa, "mnmb": mnmb, "n": n}
 
 
 def mann_whitney_u_test(data: pd.DataFrame, reference_col_name: str, **kwargs):
diff --git a/test/test_helpers/test_statistics.py b/test/test_helpers/test_statistics.py
index 6f1952a9c1df2b16a2e298b433d732d9e69200d4..5ff6f7ee526960a90755f09e5c090735e2b532bb 100644
--- a/test/test_helpers/test_statistics.py
+++ b/test/test_helpers/test_statistics.py
@@ -375,6 +375,7 @@ class TestCalculateErrorMetrics:
         x_array1 = xr.DataArray(d1, coords=coords, dims=coords.keys())
         x_array2 = xr.DataArray(d2, coords=coords, dims=coords.keys())
         expected = {"mse": xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
+                    "me": xr.DataArray(np.array([-1./3, -2./3, 0, 0, -1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
                     "rmse": np.sqrt(xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"])),
                     "mae": xr.DataArray(np.array([1, 4./3, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
                     "ioa": xr.DataArray(np.array([0.3721, 0.4255, 1, 1, 0.4706]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
@@ -383,6 +384,7 @@ class TestCalculateErrorMetrics:
         assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "index"), 3) is True
 
         expected = {"mse": xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]),
+                    "me": xr.DataArray(np.array([-0.8, -0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]),
                     "rmse": np.sqrt(xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"])),
                     "mae": xr.DataArray(np.array([0.8, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]),
                     "ioa": xr.DataArray(np.array([0.8478, 0.9333, 0.9629]), coords={"index": [0, 1, 2]}, dims=["index"]),