diff --git a/mlair/data_handler/data_handler_wrf_chem.py b/mlair/data_handler/data_handler_wrf_chem.py index c89442c0b4e19a0f523c89c31e3d7abd3c58065b..902d55ffe069dcc4937e644de5d7e6f72195ef65 100644 --- a/mlair/data_handler/data_handler_wrf_chem.py +++ b/mlair/data_handler/data_handler_wrf_chem.py @@ -311,18 +311,6 @@ class SingleGridColumnWrfChemDataLoader(BaseWrfChemDataLoader): self._nearest_coords = None self.external_coords_file = external_coords_file - # self.open_data() - # - # if self.physical_t_coord_name != self.time_dim_name: - # self.assign_coords( - # {self.physical_t_coord_name: (self.time_dim_name, self._data[self.physical_t_coord_name].values)}) - # - # self._set_dims_as_coords() - # if external_coords_file is not None: - # self._apply_external_coordinates() - # self.apply_staged_transormation() - # self.rechunk_data(self.rechunk_values) - # self._set_geoinfos() logging.debug("SingleGridColumnWrfChemDataLoader Initialised") def __enter__(self): @@ -344,7 +332,6 @@ class SingleGridColumnWrfChemDataLoader(BaseWrfChemDataLoader): self.data.close() gc.collect() - @TimeTrackingWrapper def _set_geoinfos(self): # identify nearest coords self._set_nearest_icoords(dim=[self.logical_x_coord_name, self.logical_y_coord_name]) @@ -505,24 +492,18 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): except ValueError as e: raise ValueError(f"Pass an iterable with two items; (station, path)") lat, lon = self.coord_str2coords(station) - with TimeTracking(name="Initialise loader (sgcWRFdh)"): - # preprocess_fkt_for_loader = self.preprocess_fkt_for_loader() - - loader = SingleGridColumnWrfChemDataLoader((lat, lon), - data_path=path, - external_coords_file=self.external_coords_file, - time_dim_name=self.time_dim, - rechunk_values=self.rechunk_values, - variables=self.variables, - z_coord_selector=self._joint_z_coord_selector, - start_time=self.start, - end_time=self.end, - - # preprocess_open_mfdataset=preprocess_fkt_for_loader, - ) + loader = SingleGridColumnWrfChemDataLoader((lat, lon), + data_path=path, + external_coords_file=self.external_coords_file, + time_dim_name=self.time_dim, + rechunk_values=self.rechunk_values, + variables=self.variables, + z_coord_selector=self._joint_z_coord_selector, + start_time=self.start, + end_time=self.end, + ) self.__loader = loader - @TimeTrackingWrapper def load_data(self, path, station, statistics_per_var, sampling, station_type=None, network=None, store_data_locally=False, data_origin: Dict = None, start=None, end=None): @@ -540,11 +521,9 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): # transpose dataarray: set first three fixed and keep remaining as is data = data.transpose(self.iter_dim, self.time_dim, self.target_dim, ...) - with ProgressBar(), TimeTracking(name="DataHandlerSingleGridColumn.load_data: compute data for slice_prep"): + with ProgressBar(), TimeTracking(name=f"{self.station}: compute data for slice_prep"): data = dask.compute(self._slice_prep(data, start=start, end=end))[0] - # ToDo - # data should somehow look like this: - # < xarray.DataArray(Stations: 1, datetime: 7670, variables: 9) (From DataHandlerSingleStation) + # ToDo add metadata meta = None return data, meta @@ -581,13 +560,6 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): """ return self.label.squeeze([self.iter_dim, self.target_dim]).transpose(self.time_dim, self.window_dim, ...).copy() - # def set_inputs_and_targets(self): - # # inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)}) - # # targets = self._data.sel( - # # {self.target_dim: helpers.to_list(self.target_var)}) # ToDo: is it right to expand this dim?? - # # self.input_data = inputs - # # self.target_data = targets - # raise NotImplementedError @TimeTrackingWrapper def make_samples(self): self.make_history_window(self.target_dim, self.window_history_size, self.time_dim) @@ -643,22 +615,6 @@ class DataHandlerSingleGridColumn(DataHandlerSingleStation): """ return all(self._transformation) - # @TimeTrackingWrapper - # def setup_samples(self): - # """ - # Setup samples. This method prepares and creates samples X, and labels Y. - # """ - # data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling, - # self.station_type, self.network, self.store_data_locally, self.data_origin, - # self.start, self.end) - # self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method, - # limit=self.interpolation_limit) - # # self.set_inputs_and_targets() - # # if self.do_transformation is True: - # # self.call_transform() - # # self.make_samples() - # raise NotImplementedError - class DataHandlerWRF(DefaultDataHandler): """Data handler using DataHandlerSingleGridColumn.""" @@ -766,23 +722,14 @@ class DataHandlerSectorGrid(DataHandlerSingleGridColumn): drop=True).values sector_history.loc[{self.time_dim: time_index_of_sector}] = sec_data_history.sel( {self.time_dim: time_index_of_sector}).values - # sector_history = xr.where(sector_allocation.squeeze() == sect, - # sec_data_history.sel({self.time_dim: sector_history[self.time_dim]}), - # sector_history * 1.) - sector_history = sector_history.assign_coords({self.target_dim: sector_history_var_names}) - # with ProgressBar(): - # logging.info(f"compute `sector_history' for modify_history") - # sector_history = sector_history.compute() - combined_history = xr.concat([self.history, sector_history], dim=self.target_dim) return combined_history else: return self.history - @TimeTrackingWrapper def get_section_data_from_circle(self, grid_data, loader, sect, compute=True): sec_data = grid_data.where( self.windsector.is_in_sector(sect, loader.geo_infos.bearing.drop('points').squeeze())) @@ -820,29 +767,20 @@ class DataHandlerSectorGrid(DataHandlerSingleGridColumn): else: return grid_data - @TimeTrackingWrapper - def apply_aggregation_method_on_sector_data(self, data, loader): + @staticmethod + def apply_aggregation_method_on_sector_data(data, loader): data = data.mean(dim=(loader.logical_x_coord_name, loader.logical_y_coord_name)) return data - @TimeTrackingWrapper def compute_wind_dir_of_interest(self): wind_dir_of_intrest = self.history.sel({self.target_dim: self.wind_dir_name, self.window_dim: 0}) return wind_dir_of_intrest - @TimeTrackingWrapper def get_applied_transdormation_on_wind_sector_edges(self): ws_edges = self._get_left_and_right_wind_sector_edges(return_as='xr.da', dim=self.wind_sector_edge_dim_name) ws_edges = self.apply_transformation_on_data(ws_edges) return ws_edges - # def set_inputs_and_targets(self): - # inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables) + helpers.to_list(self.sector_dim_name)}) - # targets = self._data.sel( - # {self.target_dim: helpers.to_list(self.target_var)}) # ToDo: is it right to expand this dim?? - # self.input_data = inputs - # self.target_data = targets - class DataHandlerMainSectWRF(DefaultDataHandler): """Data handler using DataHandlerSectorGrid."""