diff --git a/mlair/data_handler/data_handler_wrf_chem.py b/mlair/data_handler/data_handler_wrf_chem.py index df1e958d6acb7b0b5d5801f99a7077a3ab0a9a13..36fbb0c7e473eb2fddbc7253c3344eda7e112e6a 100644 --- a/mlair/data_handler/data_handler_wrf_chem.py +++ b/mlair/data_handler/data_handler_wrf_chem.py @@ -66,6 +66,7 @@ class BaseWrfChemDataLoader: staged_rotation_opts: Dict = DEFAULT_STAGED_ROTATION_opts, vars_to_rotate: Tuple[Tuple[Tuple[str, str], Tuple[str, str]]] = DEFAULT_VARS_TO_ROTATE, staged_dimension_mapping=None, stag_ending='_stag', + date_format_of_nc_file=None, ): # super().__init__() @@ -89,6 +90,9 @@ class BaseWrfChemDataLoader: self.start_time = start_time self.end_time = end_time + if date_format_of_nc_file is None: + date_format_of_nc_file = "%Y-%m-%d" + self.date_format_of_nc_file = date_format_of_nc_file # if rechunk_values is None: # self.rechunk_values = {self.time_dim_name: 1} @@ -128,14 +132,14 @@ class BaseWrfChemDataLoader: if (self.start_time is None) and (self.end_time is None): return os.path.join(self.data_path, self.common_file_starter + '*') elif (self.start_time is not None) and (self.end_time is not None): - path_list = [] + path_list = set() for day in pd.date_range(self.start_time, self.end_time): - path_list += sorted(glob.glob(os.path.join(self.data_path, - self.common_file_starter) + '*' + day.strftime( - "%Y-%m-%d") + '*' - )) + path_list.add(sorted(glob.glob(os.path.join(self.data_path, + self.common_file_starter) + '*' + day.strftime( + self.date_format_of_nc_file) + '*' + ))[0]) - return path_list + return list(path_list) else: raise ValueError(f"`start_time' and `end_time' must both be given or None.") @@ -150,7 +154,7 @@ class BaseWrfChemDataLoader: parallel=True, decode_cf=False) else: data = xr.open_mfdataset(paths=self.dataset_search_str, combine='nested', concat_dim=self.time_dim_name, - parallel=True, decode_cf=False, preprocess=self.preprocess_fkt_for_loader) + parallel=True, decode_cf=False, preprocess=self.preprocess_fkt_for_loader, ) data = xr.decode_cf(data) # if self.variables is not None: # data = self.preprocess_fkt_for_loader(data) @@ -447,7 +451,9 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): def __init__(self, *args, external_coords_file=None, var_logical_z_coord_selector=None, targetvar_logical_z_coord_selector=None, - rechunk_values=None, **kwargs): + rechunk_values=None, + date_format_of_nc_file=None, + **kwargs): self.external_coords_file = external_coords_file self.var_logical_z_coord_selector = self._ret_z_coord_select_if_valid(var_logical_z_coord_selector, as_input=True) @@ -458,6 +464,7 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): self.targetvar_logical_z_coord_selector) self.__loader = None self.rechunk_values = rechunk_values + self.date_format_of_nc_file = date_format_of_nc_file super().__init__(*args, **kwargs) @staticmethod @@ -513,6 +520,7 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): z_coord_selector=self._joint_z_coord_selector, start_time=self.start, end_time=self.end, + date_format_of_nc_file=self.date_format_of_nc_file ) self.__loader = loader