diff --git a/toargridding/grids.py b/toargridding/grids.py index cb0f946a72b124c965872ee798086bc3072ea2e4..83eb164ca5e264a4d202179102f37050fd5ce29e 100644 --- a/toargridding/grids.py +++ b/toargridding/grids.py @@ -6,6 +6,9 @@ import xarray as xr import pandas as pd import numpy as np +from typing import Dict +from pandas.core.groupby import DataFrameGroupBy + from toargridding.metadata import ( Variables, Coordinates, @@ -22,6 +25,8 @@ GridType = Enum("GridType", ["regular"]) class GridDefinition(ABC): """factory and base class for definition of different grids + usage: GridDefinition.construct( GridType, dict( parameter : value ) ) + The dict must contain all parameters required for the creation of the desired GridType """ cell_index_name = "cell_index" @@ -32,6 +37,9 @@ class GridDefinition(ABC): @staticmethod def construct(grid_type: GridType, **kwargs): """creation of requested grid type + + usage: GridDefinition.construct( GridType, dict( parameter : value ) ) + The dict must contain all parameters required for the creation of the desired GridType """ match (grid_type): case GridType.regular: @@ -41,7 +49,7 @@ class GridDefinition(ABC): @property @abstractmethod - def description(self): + def description(self)->str: """description of this grid """ pass @@ -67,11 +75,32 @@ class GridDefinition(ABC): class RegularGrid(GridDefinition): """definition of a regular grid with longitude and latitude. + + The grid covers the complete globe and is defined by providing resolution for latitude (lat_resolution) and longitude (lon_resolution) + + Argument: + -------- + lat: + latitude coordinate axis. + lon: + longitude coordinate axis. + dims: + names of the dimensions of the data """ Coord = namedtuple("Coord", ["lat", "lon"]) def __init__(self, lat_resolution, lon_resolution): + """constructor from resolutions + + Parameters: + ---------- + lat_resolution: + resolution for latitude in degree + lon_resolution: + resolution for longitude in degree + """ + super().__init__() # TODO make sure only sensible resolutions @@ -95,10 +124,23 @@ class RegularGrid(GridDefinition): self._as_i_index = np.arange(spatial_size).reshape(spatial_shape).T @property - def description(self): + def description(self)->str: + """get description of grid + """ + return f"regular global grid with lat/lon resolutions ({self.lat.step}, {self.lon.step})" def as_xarray(self, data: AnalysisRequestResult) -> xr.Dataset: + """gridding of a request to the TOAR database + + groups the stations into the cells of the grid and calculates mean and standard deviation for each cell. + + Parameters: + ---------- + data: + results of the request, including data, station coordinates and metadata of request + """ + data_grouped_by_cell = self.group_data_by_cell( data.stations_data, data.stations_coords ) @@ -107,7 +149,19 @@ class RegularGrid(GridDefinition): return dataset - def group_data_by_cell(self, data: pd.DataFrame, coords: pd.DataFrame): + def group_data_by_cell(self, data: pd.DataFrame, coords: pd.DataFrame) -> DataFrameGroupBy: + """grouping of stations into cells + + This function converts the lat/lon coordinates of the stations into cell indices and groups stations belonging to one cell. + + Parameters: + ---------- + data: + station data + coords: + station coordinates + """ + cell_indices = self.as_cell_index(coords) # will convert cell_indices to float as some nans ar present @@ -117,7 +171,17 @@ class RegularGrid(GridDefinition): return data_with_indices.groupby(GridDefinition.cell_index_name) - def get_cell_statistics(self, groups) -> dict[Variables, pd.DataFrame]: + def get_cell_statistics(self, groups : DataFrameGroupBy) -> dict[str, pd.DataFrame]: + """calculation of mean, std and number of stations per cell + + Parameters: + ---------- + groups: + time series data grouped by stations in a cell + return: + dictionary with calculated quantities + """ + stats = { Variables.mean: groups.mean(), Variables.std: groups.std(), @@ -126,7 +190,19 @@ class RegularGrid(GridDefinition): return stats - def create_dataset(self, cell_statistics: pd.DataFrame, metadata: Metadata): + def create_dataset(self, cell_statistics : Dict, metadata: Metadata) -> xr.Dataset: + """creation of data set and filling with results from the gridding + + Parameters: + ---------- + cell_statistics: + values obtained for each cell. In the beginning these are mean, std and number of stations + metadata: + metadata of the request + return: + xarray dataset with coordinates and variables following the CF convention + """ + time = Coordinate.from_data( metadata.time.as_cf_index(), Coordinates.time, @@ -143,12 +219,39 @@ class RegularGrid(GridDefinition): return gridded_ds - def get_data_array_dict(self, time, aggregated_data, variable, metadata): + def get_data_array_dict(self, time : Coordinate, aggregated_data : pd.DataFrame, variable : Variables, metadata : Metadata) -> Dict[str, xr.DataArray]: + """conversion of data to a dict for assigning them to the Dataset + + Parameters: + ---------- + time: + temporal coordinate + aggregated_data: + obtained results per grid cell + variable: + variable to be added. This allows access to Cf conform metadata + metadata: + metadata of request + """ + gridded_data = self.create_gridded_data(time, aggregated_data) gridded_variable = Variable.from_data(gridded_data, variable, metadata) return {variable.name: gridded_variable.as_data_array(self.dims)} - def create_gridded_data(self, time, grouped_timeseries): + def create_gridded_data(self, time : Coordinate, grouped_timeseries : pd.DataFrame)->np.array: + """converts the available cell data to a full lat/lon-temporal data cube. + + Parameters: + ---------- + time: + temporal coordinate + grouped_timeseries: + data frame with station position and data + return: + 3D-array with axis time, latitude and longitude. Fields without data are nan (fill_value defined in GridDefinition init) + """ + + #CAVE: This function might involve black magic... values = np.empty((time.size, self.lat.size, self.lon.size)) values[...] = self.fill_value @@ -159,7 +262,10 @@ class RegularGrid(GridDefinition): return values - def as_cell_index(self, coords): + def as_cell_index(self, coords : pd.DataFrame) -> pd.Series: + """converts coordinates of stations into x and y indices of the regular grid + """ + id_x = self.coord_to_index(coords[self.lat.name], self.lat.min, self.lat.step) id_y = self.coord_to_index(coords[self.lon.name], self.lon.min, self.lon.step) @@ -167,12 +273,35 @@ class RegularGrid(GridDefinition): return pd.Series(id_i, index=id_x.index) - def coord_to_index(self, coord, x0_axis, d_axis): + def coord_to_index(self, coord : pd.Series, x0_axis : float, d_axis : float) -> np.array: + """converts a coordinate into a bin index on one axis + + Parameters: + ---------- + coord: + coordinate for conversion + x0_axis: + offset of the axis + d_axis: + resolution of the axis + """ + return (np.ceil((coord / d_axis) - 0.5) - x0_axis / d_axis).astype(int) - def get_empty_grid( - self, time: Variable, metadata: pd.DataFrame - ) -> xr.Dataset: # TODO make CF-compliant => docs + def get_empty_grid(self, time: Variable, metadata: Metadata) -> xr.Dataset: # TODO make CF-compliant => docs + """creation of an empty dataset without data + + Sets up a dataset with its three axis: time, longitude and latitude. + Adds global metadata to the dataset. + + Parameters: + ---------- + time: + temporal coordinate + Metadata: + information on request + """ + coords = { Variables.time.name: time.as_data_array(), Variables.latitude.name: self.lat.as_data_array(), diff --git a/toargridding/variables.py b/toargridding/variables.py index eb798ee660b01d4c3a0730a7dad484e866e404b7..5445cf543363e2f1228a9bebc0c452c9b22f2422 100644 --- a/toargridding/variables.py +++ b/toargridding/variables.py @@ -31,7 +31,7 @@ class Variable: encoding: dict[str, str] @classmethod - def from_data(cls, data, variable: Variables, metadata: Metadata | None, **kwargs): + def from_data(cls, data : np.array, variable: Variables, metadata: Metadata | None, **kwargs): """construction from analysis results """ cf_metadata = get_cf_metadata(variable, metadata=metadata)