Skip to content

BUG: NaN error during PostProcessing

There is an error regarding NaNs during PostProcessing when using AIRBASE data. Run the following to reproduce the error:

__author__ = "Lukas Leufen"
__date__ = '2019-11-14'

import argparse

from mlair.workflows import DefaultWorkflow
from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling

def load_stations():
    import json
    try:
        # filename = 'supplement/station_list_north_german_plain_rural_UBA.json'
        filename = 'supplement/station_list_north_german_plain_rural_AIRBASE.json'
        with open(filename, 'r') as jfile:
            stations = json.load(jfile)
    except FileNotFoundError:
        stations = None
    return stations

stats = {'o3': 'dma8eu', 'no': 'dma8eu', 'no2': 'dma8eu',
         'relhum': 'average_values', 'u': 'average_values', 'v': 'average_values',
         'cloudcover': 'average_values', 'pblheight': 'maximum',
         'temp': 'maximum'}
data_origin = {'o3': '', 'no': '', 'no2': '',
               'relhum': 'REA', 'u': 'REA', 'v': 'REA',
               'cloudcover': 'REA', 'pblheight': 'REA',
               'temp': 'REA'}

def main(parser_args):

    workflow = DefaultWorkflow(stations=load_stations(), network="AIRBASE",
                               evaluate_bootstraps=False, plot_list=["PlotAvailability", "PlotStationMap"],
                               data_origin=data_origin, data_handler=DataHandlerMixedSampling,
                               interpolation_limit=1,
                               overwrite_local_data=True,
                               sampling="daily", sampling_inputs="hourly",
                               statistics_per_var=stats,
                               create_new_model=True, train_model=True, epochs=2,
                               window_history_size=48, **parser_args.__dict__)
    workflow.run()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None,
                        help="set experiment date as string")
    args = parser.parse_args()
    main(args)

Error message:

Traceback (most recent call last):
  File "run_T1B_experiments.py", line 49, in <module>
    main(args)
  File "run_T1B_experiments.py", line 41, in main
    workflow.run()
  File "/home/l.leufen/mlair/mlair/workflows/abstract_workflow.py", line 32, in run
    stage(**self._registry_kwargs[pos])
  File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 81, in __init__
    self._run()
  File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 101, in _run
    self.skill_scores = self.calculate_skill_scores()
  File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 563, in calculate_skill_scores
    self.window_lead_time)
  File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 239, in climatological_skill_scores
    external_data=external_data).values.flatten())
  File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 246, in _climatological_skill_score
    return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs)
  File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 341, in skill_score_mu_case_4
    r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"])
  File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/scipy/stats/stats.py", line 3531, in pearsonr
    normym = linalg.norm(ym)
  File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/scipy/linalg/misc.py", line 142, in norm
    a = np.asarray_chkfinite(a)
  File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/numpy/lib/function_base.py", line 499, in asarray_chkfinite
    "array must not contain infs or NaNs")
ValueError: array must not contain infs or NaNs