Skip to content
Snippets Groups Projects
Commit ad30d036 authored by Felix Kleinert's avatar Felix Kleinert
Browse files

Draft: inh. from DataHandlerSingleStation

parent ebd31407
Branches
Tags
1 merge request!259Draft: Resolve "WRF-Datahandler should inherit from SingleStationDatahandler"
Pipeline #61724 passed
...@@ -10,16 +10,18 @@ import dask.array as da ...@@ -10,16 +10,18 @@ import dask.array as da
import os import os
from mlair.helpers.geofunctions import haversine_dist from mlair.helpers.geofunctions import haversine_dist
from mlair.helpers.helpers import convert2xrda, remove_items from mlair.helpers.helpers import convert2xrda, remove_items
from mlair.helpers import TimeTrackingWrapper
from typing import Tuple, Union, List, Dict from typing import Tuple, Union, List, Dict
from mlair.data_handler.abstract_data_handler import AbstractDataHandler from mlair.data_handler.abstract_data_handler import AbstractDataHandler
from mlair.data_handler.data_handler_single_station import DataHandlerSingleStation
import logging import logging
import cartopy.crs as ccrs import cartopy.crs as ccrs
float_np_xr = Union[float, np.ndarray, xr.DataArray, xr.Dataset] float_np_xr = Union[float, np.ndarray, xr.DataArray, xr.Dataset]
class WrfChemDataHandler(AbstractDataHandler): class BaseWrfChemDataLoader:
DEFAULT_LOGICAL_TIME_COORD_NAME = 'Time' DEFAULT_LOGICAL_TIME_COORD_NAME = 'Time'
DEFAULT_LOGICAL_X_COORD_NAME = 'x' DEFAULT_LOGICAL_X_COORD_NAME = 'x'
DEFAULT_LOGICAL_Y_COORD_NAME = 'y' DEFAULT_LOGICAL_Y_COORD_NAME = 'y'
...@@ -32,11 +34,13 @@ class WrfChemDataHandler(AbstractDataHandler): ...@@ -32,11 +34,13 @@ class WrfChemDataHandler(AbstractDataHandler):
DEFAULT_RECHUNK = {"XTIME": 1, "y": 36, "x": 40} DEFAULT_RECHUNK = {"XTIME": 1, "y": 36, "x": 40}
def __init__(self, data_path: str, common_file_starter: str, time_dim_name: str = DEFAULT_LOGICAL_TIME_COORD_NAME, def __init__(self, data_path: str, common_file_starter: str, time_dim_name: str = DEFAULT_LOGICAL_TIME_COORD_NAME,
rechunk_values: Dict = None, logical_x_coord_name: str = DEFAULT_LOGICAL_X_COORD_NAME, rechunk_values: Dict = None,
logical_x_coord_name: str = DEFAULT_LOGICAL_X_COORD_NAME,
logical_y_coord_name: str = DEFAULT_LOGICAL_Y_COORD_NAME, logical_y_coord_name: str = DEFAULT_LOGICAL_Y_COORD_NAME,
logical_z_coord_name: str = DEFAULT_LOGICAL_Z_COORD_NAME, logical_z_coord_name: str = DEFAULT_LOGICAL_Z_COORD_NAME,
physical_x_coord_name: str = DEFAULT_PHYSICAL_X_COORD_NAME, physical_x_coord_name: str = DEFAULT_PHYSICAL_X_COORD_NAME,
physical_y_coord_name: str = DEFAULT_PHYSICAL_Y_COORD_NAME physical_y_coord_name: str = DEFAULT_PHYSICAL_Y_COORD_NAME,
physical_t_coord_name: str = DEFAULT_PHYSICAL_TIME_COORD_NAME
): ):
super().__init__() super().__init__()
self.data_path = data_path self.data_path = data_path
...@@ -49,6 +53,7 @@ class WrfChemDataHandler(AbstractDataHandler): ...@@ -49,6 +53,7 @@ class WrfChemDataHandler(AbstractDataHandler):
self.physical_x_coord_name = physical_x_coord_name self.physical_x_coord_name = physical_x_coord_name
self.physical_y_coord_name = physical_y_coord_name self.physical_y_coord_name = physical_y_coord_name
self.physical_t_coord_name = physical_t_coord_name
if rechunk_values is None: if rechunk_values is None:
self.rechunk_values = {self.time_dim_name: 1} self.rechunk_values = {self.time_dim_name: 1}
...@@ -75,6 +80,13 @@ class WrfChemDataHandler(AbstractDataHandler): ...@@ -75,6 +80,13 @@ class WrfChemDataHandler(AbstractDataHandler):
# data = data.assign_coords({'XTIME': data.XTIME.values}) # data = data.assign_coords({'XTIME': data.XTIME.values})
self._data = data self._data = data
def assign_coords(self, coords, **coords_kwargs):
"""
Assign coords to WrfChemDataHandler._data
"""
self._data = self._data.assign_coords(coords, **coords_kwargs)
def rechunk_data(self, chunks=None, name_prefix='xarray-', token=None, lock=False): def rechunk_data(self, chunks=None, name_prefix='xarray-', token=None, lock=False):
self._data = self._data.chunk(chunks=chunks, name_prefix=name_prefix, token=token, lock=lock) self._data = self._data.chunk(chunks=chunks, name_prefix=name_prefix, token=token, lock=lock)
...@@ -114,7 +126,7 @@ class WrfChemDataHandler(AbstractDataHandler): ...@@ -114,7 +126,7 @@ class WrfChemDataHandler(AbstractDataHandler):
return dist.argmin(dim) return dist.argmin(dim)
class DataHandlerSingleGridCoulumn(WrfChemDataHandler): class SingleGridColumnWrfChemDataLoader(BaseWrfChemDataLoader):
DEFAULT_MODEL = "WRF-Chem" DEFAULT_MODEL = "WRF-Chem"
DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values', DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', 'u': 'average_values',
'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values', 'v': 'average_values', 'no': 'dma8eu', 'no2': 'dma8eu', 'cloudcover': 'average_values',
...@@ -146,6 +158,8 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler): ...@@ -146,6 +158,8 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler):
self.external_coords_file = external_coords_file self.external_coords_file = external_coords_file
self.open_data() self.open_data()
self.assign_coords(
{self.physical_t_coord_name: (self.time_dim_name, self._data[self.physical_t_coord_name].values)})
if external_coords_file is not None: if external_coords_file is not None:
self._apply_external_coordinates() self._apply_external_coordinates()
self.rechunk_data(self.rechunk_values) self.rechunk_data(self.rechunk_values)
...@@ -212,6 +226,62 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler): ...@@ -212,6 +226,62 @@ class DataHandlerSingleGridCoulumn(WrfChemDataHandler):
return {k: list(v.values) for k, v in self._nearest_coords.items()} return {k: list(v.values) for k, v in self._nearest_coords.items()}
class DataHandlerSingleGridCoulumn2(SingleGridColumnWrfChemDataLoader, DataHandlerSingleStation):
def __init__(self, common_file_starter, wrf_kwargs=None, **kwargs):
super().__init__()
def load_data(self, path, station, statistics_per_var, sampling, station_type=None, network=None,
store_data_locally=False, data_origin: Dict = None, start = None, end = None):
data = SingleGridColumnWrfChemDataLoader((lat_xr, lon_xr),
data_path='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009_?/',
common_file_starter='wrfout_d0',
time_dim_name='Time',
logical_x_coord_name='west_east',
logical_y_coord_name='south_north',
logical_z_coord_name='bottom_top',
rechunk_values={'Time': 1, 'bottom_top': 2},
external_coords_file='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc',
)
#ToDo
data, meta = None, None
raise NotImplementedError
#return data, meta
def set_inputs_and_targets(self):
# inputs = self._data.sel({self.target_dim: helpers.to_list(self.variables)})
# targets = self._data.sel(
# {self.target_dim: helpers.to_list(self.target_var)}) # ToDo: is it right to expand this dim??
# self.input_data = inputs
# self.target_data = targets
raise NotImplementedError
def make_samples(self):
# self.make_history_window(self.target_dim, self.window_history_size, self.time_dim)
# self.make_labels(self.target_dim, self.target_var, self.time_dim, self.window_lead_time)
# self.make_observation(self.target_dim, self.target_var, self.time_dim)
# self.remove_nan(self.time_dim)
raise NotImplementedError
@TimeTrackingWrapper
def setup_samples(self):
"""
Setup samples. This method prepares and creates samples X, and labels Y.
"""
# data, self.meta = self.load_data(self.path, self.station, self.statistics_per_var, self.sampling,
# self.station_type, self.network, self.store_data_locally, self.data_origin,
# self.start, self.end)
# self._data = self.interpolate(data, dim=self.time_dim, method=self.interpolation_method,
# limit=self.interpolation_limit)
# self.set_inputs_and_targets()
# if self.do_transformation is True:
# self.call_transform()
# self.make_samples()
raise NotImplementedError
if __name__ == '__main__': if __name__ == '__main__':
def plot_map_proj(data, xlim=None, ylim=None, filename=None, point=None): def plot_map_proj(data, xlim=None, ylim=None, filename=None, point=None):
...@@ -246,8 +316,8 @@ if __name__ == '__main__': ...@@ -246,8 +316,8 @@ if __name__ == '__main__':
use_first_dummy_dataset = True use_first_dummy_dataset = True
if use_first_dummy_dataset: if use_first_dummy_dataset:
wrf_new = DataHandlerSingleGridCoulumn((lat_xr, lon_xr), wrf_new = SingleGridColumnWrfChemDataLoader((lat_xr, lon_xr),
data_path='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009/', data_path='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/2009_?/',
common_file_starter='wrfout_d0', common_file_starter='wrfout_d0',
time_dim_name='Time', time_dim_name='Time',
logical_x_coord_name='west_east', logical_x_coord_name='west_east',
...@@ -257,7 +327,7 @@ if __name__ == '__main__': ...@@ -257,7 +327,7 @@ if __name__ == '__main__':
external_coords_file='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc', external_coords_file='/home/felix/Data/WRF-Chem/upload_aura_2021-02-24/coords.nc',
) )
# wrf_gridcol = DataHandlerSingleGridCoulumn((lat_xr, lon_xr), # wrf_gridcol = SingleGridColumnWrfChemDataLoader((lat_xr, lon_xr),
# data_path='/home/felix/Data/WRF-Chem/', # data_path='/home/felix/Data/WRF-Chem/',
# common_file_starter='wrfout_d01_2010-', # common_file_starter='wrfout_d01_2010-',
# time_dim_name='Time', # time_dim_name='Time',
...@@ -300,7 +370,7 @@ if __name__ == '__main__': ...@@ -300,7 +370,7 @@ if __name__ == '__main__':
######################### # Larger 4D data ######################### # Larger 4D data
use_second_dummy_dataset = False use_second_dummy_dataset = False
if use_second_dummy_dataset: if use_second_dummy_dataset:
wrf_dh_4d = WrfChemDataHandler(data_path='/home/felix/Data/WRF-Chem/upload_aura/2009/2009', wrf_dh_4d = BaseWrfChemDataLoader(data_path='/home/felix/Data/WRF-Chem/upload_aura/2009/2009',
common_file_starter='wrfout_d01_2009', common_file_starter='wrfout_d01_2009',
time_dim_name='Time', time_dim_name='Time',
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment