BUG: NaN error during PostProcessing
There is an error regarding NaNs during PostProcessing when using AIRBASE data. Run the following to reproduce the error: ```python __author__ = "Lukas Leufen" __date__ = '2019-11-14' import argparse from mlair.workflows import DefaultWorkflow from mlair.data_handler.data_handler_mixed_sampling import DataHandlerMixedSampling def load_stations(): import json try: # filename = 'supplement/station_list_north_german_plain_rural_UBA.json' filename = 'supplement/station_list_north_german_plain_rural_AIRBASE.json' with open(filename, 'r') as jfile: stations = json.load(jfile) except FileNotFoundError: stations = None return stations stats = {'o3': 'dma8eu', 'no': 'dma8eu', 'no2': 'dma8eu', 'relhum': 'average_values', 'u': 'average_values', 'v': 'average_values', 'cloudcover': 'average_values', 'pblheight': 'maximum', 'temp': 'maximum'} data_origin = {'o3': '', 'no': '', 'no2': '', 'relhum': 'REA', 'u': 'REA', 'v': 'REA', 'cloudcover': 'REA', 'pblheight': 'REA', 'temp': 'REA'} def main(parser_args): workflow = DefaultWorkflow(stations=load_stations(), network="AIRBASE", evaluate_bootstraps=False, plot_list=["PlotAvailability", "PlotStationMap"], data_origin=data_origin, data_handler=DataHandlerMixedSampling, interpolation_limit=1, overwrite_local_data=True, sampling="daily", sampling_inputs="hourly", statistics_per_var=stats, create_new_model=True, train_model=True, epochs=2, window_history_size=48, **parser_args.__dict__) workflow.run() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--experiment_date', metavar='--exp_date', type=str, default=None, help="set experiment date as string") args = parser.parse_args() main(args) ``` Error message: ``` Traceback (most recent call last): File "run_T1B_experiments.py", line 49, in <module> main(args) File "run_T1B_experiments.py", line 41, in main workflow.run() File "/home/l.leufen/mlair/mlair/workflows/abstract_workflow.py", line 32, in run stage(**self._registry_kwargs[pos]) File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 81, in __init__ self._run() File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 101, in _run self.skill_scores = self.calculate_skill_scores() File "/home/l.leufen/mlair/mlair/run_modules/post_processing.py", line 563, in calculate_skill_scores self.window_lead_time) File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 239, in climatological_skill_scores external_data=external_data).values.flatten()) File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 246, in _climatological_skill_score return self.__getattribute__(f"skill_score_mu_case_{mu_type}")(data, observation_name, forecast_name, **kwargs) File "/home/l.leufen/mlair/mlair/helpers/statistics.py", line 341, in skill_score_mu_case_4 r_mu, p_mu = stats.pearsonr(data.loc[..., observation_name], data.loc[..., observation_name + "X"]) File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/scipy/stats/stats.py", line 3531, in pearsonr normym = linalg.norm(ym) File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/scipy/linalg/misc.py", line 142, in norm a = np.asarray_chkfinite(a) File "/home/l.leufen/mlair/venv/lib/python3.6/site-packages/numpy/lib/function_base.py", line 499, in asarray_chkfinite "array must not contain infs or NaNs") ValueError: array must not contain infs or NaNs ```
issue