diff --git a/mlair/data_handler/data_handler_wrf_chem.py b/mlair/data_handler/data_handler_wrf_chem.py index 2c90835f070a28bd30c24e4ebcffa48fac167d4d..59348b30413c87ec3838af3ea333a294e98cc7f3 100644 --- a/mlair/data_handler/data_handler_wrf_chem.py +++ b/mlair/data_handler/data_handler_wrf_chem.py @@ -10,16 +10,18 @@ import dask.array as da import os from mlair.helpers.geofunctions import haversine_dist from mlair.helpers.helpers import convert2xrda, remove_items +from mlair.helpers import TimeTrackingWrapper from typing import Tuple, Union, List, Dict from mlair.data_handler.abstract_data_handler import AbstractDataHandler +from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation import logging import cartopy.crs as ccrs float_np_xr = Union[float, np.ndarray, xr.DataArray, xr.Dataset] -class WrfChemDataHandler(AbstractDataHandler): +class BaseWrfChemDataLoader: DEFAULT_LOGICAL_TIME_COORD_NAME = 'Time' DEFAULT_LOGICAL_X_COORD_NAME = 'x' DEFAULT_LOGICAL_Y_COORD_NAME = 'y' @@ -32,11 +34,13 @@ class WrfChemDataHandler(AbstractDataHandler): DEFAULT_RECHUNK = {"XTIME": 1, "y": 36, "x": 40} def __init__(self, data_path: str, common_file_starter: str, time_dim_name: str = DEFAULT_LOGICAL_TIME_COORD_NAME, - rechunk_values: Dict = None, logical_x_coord_name: str = DEFAULT_LOGICAL_X_COORD_NAME, + rechunk_values: Dict = None, + logical_x_coord_name: str = DEFAULT_LOGICAL_X_COORD_NAME, logical_y_coord_name: str = DEFAULT_LOGICAL_Y_COORD_NAME, logical_z_coord_name: str = DEFAULT_LOGICAL_Z_COORD_NAME, physical_x_coord_name: str = DEFAULT_PHYSICAL_X_COORD_NAME, - physical_y_coord_name: str = DEFAULT_PHYSICAL_Y_COORD_NAME + physical_y_coord_name: str = DEFAULT_PHYSICAL_Y_COORD_NAME, + physical_t_coord_name: str = DEFAULT_PHYSICAL_TIME_COORD_NAME ): super().__init__() self.data_path = data_path @@ -49,6 +53,7 @@ class WrfChemDataHandler(AbstractDataHandler): self.physical_x_coord_name = physical_x_coord_name self.physical_y_coord_name = physical_y_coord_name + self.physical_t_coord_name = physical_t_coord_name if rechunk_values is None: self.rechunk_values = {self.time_dim_name: 1} @@ -75,6 +80,13 @@ class WrfChemDataHandler(AbstractDataHandler): # data = data.assign_coords({'XTIME': data.XTIME.values}) self._data = data + def assign_coords(self, coords, **coords_kwargs): + """ + Assign coords to WrfChemDataHandler._data + + """ + self._data = self._data.assign_coords(coords, **coords_kwargs) + def rechunk_data(self, chunks=None, name_prefix='xarray-', token=None, lock=False): self._data = self._data.chunk(chunks=chunks, name_prefix=name_prefix, token=token, lock=lock) @@ -114,7 +126,7 @@ class WrfChemDataHandler(AbstractDataHandler): return dist.argmin(dim) -class DataHandlerSingleGridCoulumn(WrfChemDataHandler): +class SingleGridColumnWrfChemDataLoader(BaseWrfChemDataLoader): DEFAULT_MODEL = "WRF-Chem" DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', @@ -146,6 +158,8 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler): self.external_coords_file = external_coords_file self.open_data() + self.assign_coords( + {self.physical_t_coord_name: (self.time_dim_name, self._data[self.physical_t_coord_name].values)}) if external_coords_file is not None: self._apply_external_coordinates() self.rechunk_data(self.rechunk_values) @@ -212,6 +226,62 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler): return {k: list(v.values) for k, v in self._nearest_coords.items()} +class DataHandlerSingleGridCoulumn2(SingleGridColumnWrfChemDataLoader, DataHandlerSingleStation): + + def __init__(self, common_file_starter, wrf_kwargs=None, **kwargs): + super().__init__() + + + def load_data(self, path, station, statistics_per_var, sampling, station_type=None, network=None, + store_data_locally=False, data_origin: Dict = None, start = None, end = None): + data = SingleGridColumnWrfChemDataLoader((lat_xr, lon_xr), + data_path='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009_?/', + common_file_starter='wrfout_d0', + time_dim_name='Time', + logical_x_coord_name='west_east', + logical_y_coord_name='south_north', + logical_z_coord_name='bottom_top', + rechunk_values={'Time': 1, 'bottom_top': 2}, + external_coords_file='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc', + ) + + #ToDo + data, meta = None, None + raise NotImplementedError + #return data, meta + + def set_inputs_and_targets(self): + # inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)}) + # targets = self._data.sel( + # {self.target_dim: helpers.to_list(self.target_var)}) # ToDo: is it right to expand this dim?? + # self.input_data = inputs + # self.target_data = targets + raise NotImplementedError + + def make_samples(self): + # self.make_history_window(self.target_dim, self.window_history_size, self.time_dim) + # self.make_labels(self.target_dim, self.target_var, self.time_dim, self.window_lead_time) + # self.make_observation(self.target_dim, self.target_var, self.time_dim) + # self.remove_nan(self.time_dim) + raise NotImplementedError + + @TimeTrackingWrapper + def setup_samples(self): + """ + Setup samples. This method prepares and creates samples X, and labels Y. + """ + # data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling, + # self.station_type, self.network, self.store_data_locally, self.data_origin, + # self.start, self.end) + # self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method, + # limit=self.interpolation_limit) + # self.set_inputs_and_targets() + # if self.do_transformation is True: + # self.call_transform() + # self.make_samples() + raise NotImplementedError + + if __name__ == '__main__': def plot_map_proj(data, xlim=None, ylim=None, filename=None, point=None): @@ -246,18 +316,18 @@ if __name__ == '__main__': use_first_dummy_dataset = True if use_first_dummy_dataset: - wrf_new = DataHandlerSingleGridCoulumn((lat_xr, lon_xr), - data_path='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009/', - common_file_starter='wrfout_d0', - time_dim_name='Time', - logical_x_coord_name='west_east', - logical_y_coord_name='south_north', - logical_z_coord_name='bottom_top', - rechunk_values={'Time': 1, 'bottom_top': 2}, - external_coords_file='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc', - ) - - # wrf_gridcol = DataHandlerSingleGridCoulumn((lat_xr, lon_xr), + wrf_new = SingleGridColumnWrfChemDataLoader((lat_xr, lon_xr), + data_path='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009_?/', + common_file_starter='wrfout_d0', + time_dim_name='Time', + logical_x_coord_name='west_east', + logical_y_coord_name='south_north', + logical_z_coord_name='bottom_top', + rechunk_values={'Time': 1, 'bottom_top': 2}, + external_coords_file='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc', + ) + + # wrf_gridcol = SingleGridColumnWrfChemDataLoader((lat_xr, lon_xr), # data_path='/home/felix/Data/WRF-Chem/', # common_file_starter='wrfout_d01_2010-', # time_dim_name='Time', @@ -300,10 +370,10 @@ if __name__ == '__main__': ######################### # Larger 4D data use_second_dummy_dataset = False if use_second_dummy_dataset: - wrf_dh_4d = WrfChemDataHandler(data_path='/home/felix/Data/WRF-Chem/upload_aura/2009/2009', - common_file_starter='wrfout_d01_2009', - time_dim_name='Time', - ) + wrf_dh_4d = BaseWrfChemDataLoader(data_path='/home/felix/Data/WRF-Chem/upload_aura/2009/2009', + common_file_starter='wrfout_d01_2009', + time_dim_name='Time', + ) wrf_dh_4d.open_data() wrf_dh_4d.rechunk_data({"Time": 1, "bottom_top": 34, "south_north": 36, "west_east": 40}) lat_np = np.array([50.73333])