diff --git a/mlair/helpers/statistics.py b/mlair/helpers/statistics.py
index 57143c9aed0e730c81adbed33f7ba62fea39b298..69d58766f9d54727cfd8e057ce965e7054037d89 100644
--- a/mlair/helpers/statistics.py
+++ b/mlair/helpers/statistics.py
@@ -211,13 +211,28 @@ def mean_absolute_error(a, b, dim=None):
     return np.abs(a - b).mean(dim)
 
 
+def index_of_agreement(a, b, dim=None):
+    """Calculate index of agreement (IOA) where a is the forecast and b the reference (e.g. observation)."""
+    num = (np.square(b - a)).sum(dim)
+    b_mean = (b * np.ones(1)).mean(dim)
+    den = (np.square(np.abs(b - b_mean) + np.abs(a - b_mean))).sum(dim)
+    frac = num / den
+    # issue with 0/0 division for exactly equal arrays
+    if isinstance(frac, (int, float)):
+        frac = 0 if num == 0 else frac
+    else:
+        frac[num == 0] = 0
+    return 1 - frac
+
+
 def calculate_error_metrics(a, b, dim):
-    """Calculate MSE, RMSE, and MAE. Additionally return number of used values for calculation."""
+    """Calculate MSE, RMSE, MAE, and IOA. Aditionally, return number of used values for calculation."""
     mse = mean_squared_error(a, b, dim)
     rmse = np.sqrt(mse)
     mae = mean_absolute_error(a, b, dim)
+    ioa = index_of_agreement(a, b, dim)
     n = (a - b).notnull().sum(dim)
-    return {"mse": mse, "rmse": rmse, "mae": mae, "n": n}
+    return {"mse": mse, "rmse": rmse, "mae": mae, "ioa": ioa, "n": n}
 
 
 def mann_whitney_u_test(data: pd.DataFrame, reference_col_name: str, **kwargs):
diff --git a/mlair/helpers/testing.py b/mlair/helpers/testing.py
index 9820b4956dac09e213df3b9addc317a00ee381b8..eb8982ae3625cfccedf894717eebf299faffb3ee 100644
--- a/mlair/helpers/testing.py
+++ b/mlair/helpers/testing.py
@@ -105,7 +105,10 @@ def get_all_args(*args, remove=None, add=None):
     return res
 
 
-def check_nested_equality(obj1, obj2):
+def check_nested_equality(obj1, obj2, precision=None):
+    """Check for equality in nested structures. Use precision to indicate number of decimals to check for consistency"""
+
+    assert precision is None or isinstance(precision, int)
 
     try:
         print(f"check type {type(obj1)} and {type(obj2)}")
@@ -116,22 +119,38 @@ def check_nested_equality(obj1, obj2):
             assert len(obj1) == len(obj2)
             for pos in range(len(obj1)):
                 print(f"check pos {obj1[pos]} and {obj2[pos]}")
-                assert check_nested_equality(obj1[pos], obj2[pos]) is True
+                assert check_nested_equality(obj1[pos], obj2[pos], precision) is True
         elif isinstance(obj1, dict):
             print(f"check keys {obj1.keys()} and {obj2.keys()}")
             assert sorted(obj1.keys()) == sorted(obj2.keys())
             for k in obj1.keys():
                 print(f"check pos {obj1[k]} and {obj2[k]}")
-                assert check_nested_equality(obj1[k], obj2[k]) is True
+                assert check_nested_equality(obj1[k], obj2[k], precision) is True
         elif isinstance(obj1, xr.DataArray):
-            print(f"check xr {obj1} and {obj2}")
-            assert xr.testing.assert_equal(obj1, obj2) is None
+            if precision is None:
+                print(f"check xr {obj1} and {obj2}")
+                assert xr.testing.assert_equal(obj1, obj2) is None
+            else:
+                print(f"check xr {obj1} and {obj2} with precision {precision}")
+                assert xr.testing.assert_allclose(obj1, obj2, atol=10**(-precision)) is None
         elif isinstance(obj1, np.ndarray):
-            print(f"check np {obj1} and {obj2}")
-            assert np.testing.assert_array_equal(obj1, obj2) is None
+            if precision is None:
+                print(f"check np {obj1} and {obj2}")
+                assert np.testing.assert_array_equal(obj1, obj2) is None
+            else:
+                print(f"check np {obj1} and {obj2} with precision {precision}")
+                assert np.testing.assert_array_almost_equal(obj1, obj2, decimal=precision) is None
         else:
-            print(f"check equal {obj1} and {obj2}")
-            assert obj1 == obj2
+            if isinstance(obj1, (int, float)) and isinstance(obj2, (int, float)):
+                if precision is None:
+                    print(f"check number equal {obj1} and {obj2}")
+                    assert np.testing.assert_equal(obj1, obj2) is None
+                else:
+                    print(f"check number equal {obj1} and {obj2} with precision {precision}")
+                    assert np.testing.assert_almost_equal(obj1, obj2, decimal=precision) is None
+            else:
+                print(f"check equal {obj1} and {obj2}")
+                assert obj1 == obj2
     except AssertionError:
         return False
     return True
diff --git a/mlair/run_modules/post_processing.py b/mlair/run_modules/post_processing.py
index 8f6bf05d29b2534e4918d72aa59f91aace0ec982..827a53ce869af7d7935f375c6c5007624f40cc66 100644
--- a/mlair/run_modules/post_processing.py
+++ b/mlair/run_modules/post_processing.py
@@ -86,6 +86,7 @@ class PostProcessing(RunEnvironment):
         self.skill_scores = None
         self.feature_importance_skill_scores = None
         self.uncertainty_estimate = None
+        self.block_mse_per_station = None
         self.competitor_path = self.data_store.get("competitor_path")
         self.competitors = to_list(self.data_store.get_default("competitors", default=[]))
         self.forecast_indicator = "nn"
@@ -148,8 +149,10 @@ class PostProcessing(RunEnvironment):
         block_length = self.data_store.get_default("block_length", default="1m", scope="uncertainty_estimate")
         evaluate_competitors = self.data_store.get_default("evaluate_competitors", default=True,
                                                            scope="uncertainty_estimate")
-        block_mse = self.calculate_block_mse(evaluate_competitors=evaluate_competitors, separate_ahead=separate_ahead,
-                                             block_length=block_length)
+        block_mse, block_mse_per_station = self.calculate_block_mse(evaluate_competitors=evaluate_competitors,
+                                                                    separate_ahead=separate_ahead,
+                                                                    block_length=block_length)
+        self.block_mse_per_station = block_mse_per_station
         self.uncertainty_estimate = statistics.create_n_bootstrap_realizations(
             block_mse, dim_name_time=self.index_dim, dim_name_model=self.model_type_dim,
             dim_name_boots=self.uncertainty_estimate_boot_dim, n_boots=n_boots)
@@ -227,12 +230,16 @@ class PostProcessing(RunEnvironment):
                 # calc mse for each block (single station)
                 mse = errors.resample(indexer={index_dim: block_length}).mean(skipna=True)
                 collector.append(mse.assign_coords({coll_dim: station}))
+
+        # combine all mse blocks
+        mse_blocks_per_station = xr.concat(collector, dim=coll_dim)
         # calc mse for each block (average over all stations)
-        mse_blocks = xr.concat(collector, dim=coll_dim).mean(dim=coll_dim, skipna=True)
+        mse_blocks = mse_blocks_per_station.mean(dim=coll_dim, skipna=True)
         # average also on ahead steps
         if separate_ahead is False:
             mse_blocks = mse_blocks.mean(dim=self.ahead_dim, skipna=True)
-        return mse_blocks
+            mse_blocks_per_station = mse_blocks_per_station.mean(dim=self.ahead_dim, skipna=True)
+        return mse_blocks, mse_blocks_per_station
 
     def create_error_array(self, data):
         """Calculate squared error of all given time series in relation to observation."""
diff --git a/test/test_helpers/test_statistics.py b/test/test_helpers/test_statistics.py
index a3f645937258604c2dbbda07b36a58d83e879065..06b77935cc06cdf0e2dfe5865e9e134f22095074 100644
--- a/test/test_helpers/test_statistics.py
+++ b/test/test_helpers/test_statistics.py
@@ -6,7 +6,7 @@ import xarray as xr
 from mlair.helpers.statistics import standardise, standardise_inverse, standardise_apply, centre, centre_inverse, \
     centre_apply, apply_inverse_transformation, min_max, min_max_inverse, min_max_apply, log, log_inverse, log_apply, \
     create_single_bootstrap_realization, calculate_average, create_n_bootstrap_realizations, mean_squared_error, \
-    mean_absolute_error, calculate_error_metrics
+    mean_absolute_error, calculate_error_metrics, index_of_agreement
 from mlair.helpers.testing import check_nested_equality
 
 lazy = pytest.lazy_fixture
@@ -297,6 +297,32 @@ class TestMeanAbsoluteError:
         assert xr.testing.assert_equal(mean_absolute_error(x_array1, x_array2, "value"), expected) is None
 
 
+class TestIndexOfAgreement:
+
+    def test_index_of_agreement(self):
+        d1 = np.array([1, 2, 3, 4, 5])
+        d2 = np.array([1, 2, 3, 4, 5])
+        assert index_of_agreement(d1, d2) == 1
+        d1 = np.array([1, 2, 3, 4, 7])
+        assert np.testing.assert_almost_equal(index_of_agreement(d1, d2), 0.9333, 3) is None
+        d1 = np.array([3, 4, 5, 6, 7])
+        assert np.testing.assert_almost_equal(index_of_agreement(d1, d2), 0.687, 3) is None
+
+    def test_index_of_agreement_xarray(self):
+        d1 = np.array([np.array([1, 2, 3, 4, 5]), np.array([1, 2, 3, 4, 5]), np.array([1, 2, 3, 4, 5])])
+        d2 = np.array([np.array([2, 4, 3, 4, 6]), np.array([2, 3, 3, 4, 5]), np.array([0, 1, 3, 4, 5])])
+        shape = d1.shape
+        coords = {'index': range(shape[0]), 'value': range(shape[1])}
+        x_array1 = xr.DataArray(d1, coords=coords, dims=coords.keys())
+        x_array2 = xr.DataArray(d2, coords=coords, dims=coords.keys())
+        expected = xr.DataArray(np.array([1, 1, 1]), coords={"index": [0, 1, 2]}, dims=["index"])
+        res = index_of_agreement(x_array1, x_array1, dim="value")
+        assert xr.testing.assert_equal(res, expected) is None
+        expected = xr.DataArray(np.array([0.8478, 0.9333, 0.9629]), coords={"index": [0, 1, 2]}, dims=["index"])
+        res = index_of_agreement(x_array1, x_array2, dim="value")
+        assert xr.testing.assert_allclose(res, expected, atol=10**-2) is None
+
+
 class TestCalculateErrorMetrics:
 
     def test_calculate_error_metrics(self):
@@ -309,20 +335,13 @@ class TestCalculateErrorMetrics:
         expected = {"mse": xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
                     "rmse": np.sqrt(xr.DataArray(np.array([1, 2, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"])),
                     "mae": xr.DataArray(np.array([1, 4./3, 0, 0, 1./3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
+                    "ioa": xr.DataArray(np.array([0.3721, 0.4255, 1, 1, 0.4706]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"]),
                     "n": xr.DataArray(np.array([3, 3, 3, 3, 3]), coords={"value": [0, 1, 2, 3, 4]}, dims=["value"])}
-        assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "index")) is True
+        assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "index"), 3) is True
 
         expected = {"mse": xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]),
                     "rmse": np.sqrt(xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"])),
                     "mae": xr.DataArray(np.array([0.8, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"]),
+                    "ioa": xr.DataArray(np.array([0.8478, 0.9333, 0.9629]), coords={"index": [0, 1, 2]}, dims=["index"]),
                     "n": xr.DataArray(np.array([5, 5, 5]), coords={"index": [0, 1, 2]}, dims=["index"])}
-        assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "value")) is True
-
-
-
-        # expected = xr.DataArray(np.array([1.2, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"])
-        # assert xr.testing.assert_equal(mean_squared_error(x_array1, x_array2, "value"), expected) is None
-        #
-        #
-        # expected = xr.DataArray(np.array([0.8, 0.4, 0.4]), coords={"index": [0, 1, 2]}, dims=["index"])
-        # assert xr.testing.assert_equal(mean_absolute_error(x_array1, x_array2, "value"), expected) is None
\ No newline at end of file
+        assert check_nested_equality(expected, calculate_error_metrics(x_array1, x_array2, "value"), 3) is True
diff --git a/test/test_helpers/test_testing_helpers.py b/test/test_helpers/test_testing_helpers.py
index 9b888a91a7c88a31764bd272632b1aab8e6e170f..8a4bdb92e41f14a8680ea797dcd74db74bd95c9c 100644
--- a/test/test_helpers/test_testing_helpers.py
+++ b/test/test_helpers/test_testing_helpers.py
@@ -58,16 +58,25 @@ class TestNestedEquality:
         assert check_nested_equality("3", 3) is False
         assert check_nested_equality("3", "3") is True
         assert check_nested_equality(None, None) is True
+        assert check_nested_equality(3.92, 3.9, 1) is True
+        assert check_nested_equality(3.92, 3.9, 2) is False
 
     def test_nested_equality_xarray(self):
         obj1 = xr.DataArray(np.random.randn(2, 3), dims=('x', 'y'), coords={'x': [10, 20], 'y': [0, 10, 20]})
         obj2 = xr.ones_like(obj1) * obj1
         assert check_nested_equality(obj1, obj2) is True
+        obj2 = obj2 * 1.0001
+        assert check_nested_equality(obj1, obj2) is False
+        assert check_nested_equality(obj1, obj2, 3) is True
 
     def test_nested_equality_numpy(self):
         obj1 = np.random.randn(2, 3)
         obj2 = obj1 * 1
         assert check_nested_equality(obj1, obj2) is True
+        obj2 = obj2 * 1.001
+        assert check_nested_equality(obj1, obj2) is False
+        assert check_nested_equality(obj1, obj2, 5) is False
+        assert check_nested_equality(obj1, obj2, 1) is True
 
     def test_nested_equality_list_tuple(self):
         assert check_nested_equality([3, 3], [3, 3]) is True