diff --git a/mlair/data_handler/data_handler_wrf_chem.py b/mlair/data_handler/data_handler_wrf_chem.py index 4bf0ca1e0d517d97fb9ddb1aa7d36d762fa69541..15837977c7a0c8f937daa697075134ab9d44b7d5 100644 --- a/mlair/data_handler/data_handler_wrf_chem.py +++ b/mlair/data_handler/data_handler_wrf_chem.py @@ -77,7 +77,7 @@ class BaseWrfChemDataLoader: staged_rotation_opts: Dict = DEFAULT_STAGED_ROTATION_opts, vars_to_rotate: Tuple[Tuple[Tuple[str, str], Tuple[str, str]]] = DEFAULT_VARS_TO_ROTATE, staged_dimension_mapping=None, stag_ending='_stag', - date_format_of_nc_file=None, + date_format_of_nc_file=None, vars_for_unit_conv: Dict = None ): """ Initialisze data loader @@ -141,6 +141,10 @@ class BaseWrfChemDataLoader: else: self.staged_dimension_mapping = staged_dimension_mapping + self.vars_for_unit_conv = vars_for_unit_conv + # chemical convs. + self._parts_per_exponents = {'ppmv': 6, 'ppbv': 9, 'pptv': 12, 'ppqv': 15} + # internal self._X = None self._Y = None @@ -189,6 +193,10 @@ class BaseWrfChemDataLoader: else: raise ValueError(f"`start_time' and `end_time' must both be given or None.") + def convert_chem(self, data, from_unit, to_unit): + convert_exponent = self._parts_per_exponents[to_unit] - self._parts_per_exponents[from_unit] + return data * 10**convert_exponent + @TimeTrackingWrapper def open_data(self): """ @@ -492,6 +500,9 @@ class SingleGridColumnWrfChemDataLoader(BaseWrfChemDataLoader): self.apply_staged_transormation() self._set_geoinfos() + if self.vars_for_unit_conv is not None: + self.convert_chemical_units() + if self.lazy is False: self.reset_data_by_other(self.apply_toarstats()) else: @@ -544,6 +555,14 @@ class SingleGridColumnWrfChemDataLoader(BaseWrfChemDataLoader): hash = "".join([str(self.__getattribute__(e)) for e in self._hash_list()]).encode() return hashlib.md5(hash).hexdigest() + def convert_chemical_units(self): + with xr.set_options(keep_attrs=True): + for var, to_unit in self.vars_for_unit_conv.items(): + from_unit = self.data[var].attrs['units'] + data = self.convert_chem(self.data[var], from_unit, to_unit) + data.attrs['units'] = to_unit + self.data[var] = data + def __exit__(self, exc_type, exc_val, exc_tb): self.data.close() gc.collect() @@ -816,6 +835,8 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): time_zone=None, target_time_type=None, input_output_sampling4toarstats : tuple = None, + experiment_path: str = None, + vars_for_unit_conv: Dict = None, **kwargs): self.external_coords_file = external_coords_file self.var_logical_z_coord_selector = self._return_z_coord_select_if_valid(var_logical_z_coord_selector, @@ -833,6 +854,8 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): self.time_zone = time_zone self.target_time_type = target_time_type self.input_output_sampling4toarstats = input_output_sampling4toarstats + self.experiment_path = experiment_path + self.vars_for_unit_conv = vars_for_unit_conv super().__init__(*args, **kwargs) @staticmethod @@ -920,6 +943,7 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): target_time_type=self.target_time_type, station=self.station, lazy_preprocessing=True, + vars_for_unit_conv=self.vars_for_unit_conv, ) self.__loader = loader @@ -1209,8 +1233,30 @@ class DataHandlerSectorGrid(DataHandlerSingleGridColumn): wind_sector_edge_dim_name=self.wind_sector_edge_dim_name) self._added_vars = [] self.wind_dir_name = None + self._wind_upstream_sector_by_name = None + # self.wind_upstream_sector_by_name = None super().__init__(*args, **kwargs) + def get_wind_upstream_sector_by_name(self): + return self.wind_upstream_sector_by_name + + @property + def wind_upstream_sector_by_name(self): + return self._wind_upstream_sector_by_name + + @wind_upstream_sector_by_name.setter + def wind_upstream_sector_by_name(self, wind_upstream_sector_by_name: xr.DataArray): + self._wind_upstream_sector_by_name = wind_upstream_sector_by_name + + def _store_wind_upstream_sector_by_name(self): + file_name = os.path.join(self.experiment_path, + f"data/{self.station[0]}_{self.start}_{self.end}_upstream_wind_sector.nc") + wind_upstream_sector_by_name = self.wind_upstream_sector_by_name + dims_to_expand = list(wind_upstream_sector_by_name.coords._names - set(wind_upstream_sector_by_name.dims)) + wind_upstream_sector_by_name = wind_upstream_sector_by_name.expand_dims(dims_to_expand) + wind_upstream_sector_by_name = wind_upstream_sector_by_name.to_dataset(self.iter_dim) + wind_upstream_sector_by_name.to_netcdf(file_name) + @TimeTrackingWrapper def extract_data_from_loader(self, loader): wind_dir_name = self._get_wind_dir_var_name(loader) @@ -1274,12 +1320,7 @@ class DataHandlerSectorGrid(DataHandlerSingleGridColumn): @TimeTrackingWrapper def modify_history(self): if self.transformation_is_applied: - ws_edges = self.get_applied_transdormation_on_wind_sector_edges() - wind_dir_of_interest = self.compute_wind_dir_of_interest() - sector_allocation = self.windsector.get_sect_of_value(value=wind_dir_of_interest, external_edges=ws_edges) - sector_allocation = sector_allocation.squeeze() - existing_sectors = np.unique(sector_allocation.data) - sector_history, sector_history_var_names = self.setup_history_like_xr_and_var_names() + existing_sectors, sector_allocation, sector_history, sector_history_var_names = self.prepare_sector_allocation_and_history() with self.loader as loader, TimeTracking(name="loader in modify history"): # setup sector history grid_data = self.preselect_and_transform_neighbouring_data_based_on_radius(loader) @@ -1295,6 +1336,17 @@ class DataHandlerSectorGrid(DataHandlerSingleGridColumn): else: return self.history + def prepare_sector_allocation_and_history(self): + ws_edges = self.get_applied_transdormation_on_wind_sector_edges() + wind_dir_of_interest = self.compute_wind_dir_of_interest() + sector_allocation = self.windsector.get_sect_of_value(value=wind_dir_of_interest, external_edges=ws_edges) + sector_allocation = sector_allocation.squeeze() + existing_sectors = np.unique(sector_allocation.data) + sector_history, sector_history_var_names = self.setup_history_like_xr_and_var_names() + self.wind_upstream_sector_by_name = sector_allocation + self._store_wind_upstream_sector_by_name() + return existing_sectors, sector_allocation, sector_history, sector_history_var_names + def setup_history_like_xr_and_var_names(self, var_name_suffix="Sect"): """ Returns ones_like xarray from self.history and list of variable names which can be modified by passing a @@ -1406,11 +1458,7 @@ class DataHandler3SectorGrid(DataHandlerSectorGrid): @TimeTrackingWrapper def modify_history(self): if self.transformation_is_applied: - ws_edges = self.get_applied_transdormation_on_wind_sector_edges() - wind_dir_of_interest = self.compute_wind_dir_of_interest() - sector_allocation = self.windsector.get_sect_of_value(value=wind_dir_of_interest, external_edges=ws_edges) - existing_sectors = np.unique(sector_allocation.data) - sector_history, sector_history_var_names = self.setup_history_like_xr_and_var_names() + existing_sectors, sector_allocation, sector_history, sector_history_var_names = self.prepare_sector_allocation_and_history() sector_history_left, sector_history_var_names_left = self.setup_history_like_xr_and_var_names(var_name_suffix="SectLeft") sector_history_right, sector_history_var_names_right = self.setup_history_like_xr_and_var_names(var_name_suffix="SectRight") with self.loader as loader, TimeTracking(name="loader in modify history"):