diff --git a/src/toargridding/grids.py b/src/toargridding/grids.py index 745599dfb9395b60de1a85d3261cd4d243fa54d5..afc47ed16d1b371f3b629506eb52840642991854 100644 --- a/src/toargridding/grids.py +++ b/src/toargridding/grids.py @@ -63,8 +63,8 @@ class GridDefinition(ABC): """ @abstractmethod - def as_xarray(timeseries: dict[str, pd.DataFrame], metadata: pd.DataFrame) -> dict[str, xr.Dataset]: - """conversion of panda Dataframes to an xarray dataset + def as_xarray(self, data: AnalysisRequestResult) -> xr.Dataset: + """gridding of a request to the TOAR database This includes the required setup to store the results as netCDF file according to CF (https://cfconventions.org/cf-conventions/cf-conventions.html) """ @@ -105,8 +105,12 @@ class RegularGrid(GridDefinition): super().__init__() # TODO make sure only sensible resolutions - self.lat = Coordinate.from_resolution(Coordinates.latitude, lat_resolution, min=-90, max=90, wraps=False) - self.lon = Coordinate.from_resolution(Coordinates.longitude, lon_resolution, min=-180, max=180, wraps=True) + self.lat = Coordinate.from_resolution( + Coordinates.latitude, lat_resolution, min=-90, max=90, wraps=False + ) + self.lon = Coordinate.from_resolution( + Coordinates.longitude, lon_resolution, min=-180, max=180, wraps=True + ) spatial_shape = (self.lon.size, self.lat.size) spatial_size = self.lon.size * self.lat.size self.dims = [ @@ -115,7 +119,9 @@ class RegularGrid(GridDefinition): Coordinates.longitude.name, ] - self._as_xy_index = np.dstack(np.meshgrid(range(self.lat.size), range(self.lon.size))).reshape(-1, 2) + self._as_xy_index = np.dstack( + np.meshgrid(range(self.lat.size), range(self.lon.size)) + ).reshape(-1, 2) self._as_i_index = np.arange(spatial_size).reshape(spatial_shape).T @property @@ -135,13 +141,17 @@ class RegularGrid(GridDefinition): results of the request, including data, station coordinates and metadata of request """ - data_grouped_by_cell = self.group_data_by_cell(data.stations_data, data.stations_coords) + data_grouped_by_cell = self.group_data_by_cell( + data.stations_data, data.stations_coords + ) cell_statistics = self.get_cell_statistics(data_grouped_by_cell) dataset = self.create_dataset(cell_statistics, data.metadata) return dataset - def group_data_by_cell(self, data: pd.DataFrame, coords: pd.DataFrame) -> DataFrameGroupBy: + def group_data_by_cell( + self, data: pd.DataFrame, coords: pd.DataFrame + ) -> DataFrameGroupBy: """grouping of stations into cells This function converts the lat/lon coordinates of the stations into cell indices and groups stations belonging to one cell. @@ -157,7 +167,9 @@ class RegularGrid(GridDefinition): cell_indices = self.as_cell_index(coords) # will convert cell_indices to float as some nans ar present - data_with_indices = data.join(cell_indices.to_frame(GridDefinition.cell_index_name), how="outer") + data_with_indices = data.join( + cell_indices.to_frame(GridDefinition.cell_index_name), how="outer" + ) return data_with_indices.groupby(GridDefinition.cell_index_name) @@ -180,7 +192,9 @@ class RegularGrid(GridDefinition): return stats - def create_dataset(self, cell_statistics: dict[str, pd.DataFrame], metadata: Metadata) -> xr.Dataset: + def create_dataset( + self, cell_statistics: dict[str, pd.DataFrame], metadata: Metadata + ) -> xr.Dataset: """creation of data set and filling with results from the gridding Parameters: @@ -202,7 +216,9 @@ class RegularGrid(GridDefinition): gridded_ds = self.get_empty_grid(time, metadata) for variable, aggregated_data in cell_statistics.items(): - data_array_dict = self.get_data_array_dict(time, aggregated_data, variable, metadata) + data_array_dict = self.get_data_array_dict( + time, aggregated_data, variable, metadata + ) gridded_ds = gridded_ds.assign(data_array_dict) return gridded_ds @@ -232,7 +248,9 @@ class RegularGrid(GridDefinition): gridded_variable = Variable.from_data(gridded_data, variable, metadata) return {variable.name: gridded_variable.as_data_array(self.dims)} - def create_gridded_data(self, time: Coordinate, grouped_timeseries: pd.DataFrame) -> np.array: + def create_gridded_data( + self, time: Coordinate, grouped_timeseries: pd.DataFrame + ) -> np.array: """converts the available cell data to a full lat/lon-temporal data cube. Parameters: @@ -250,7 +268,9 @@ class RegularGrid(GridDefinition): values[...] = self.fill_value index = self._as_xy_index[grouped_timeseries.index.astype(int)] - values[:, index.T[0], index.T[1]] = grouped_timeseries.values.reshape(-1, time.size).T + values[:, index.T[0], index.T[1]] = grouped_timeseries.values.reshape( + -1, time.size + ).T return values @@ -258,13 +278,17 @@ class RegularGrid(GridDefinition): """converts coordinates of stations into x and y indices of the regular grid""" id_x = self.coord_to_index(coords[self.lat.name], self.lat.min, self.lat.step) - id_y = self.coord_to_index(coords[self.lon.name], self.lon.min, self.lon.step, len(self.lon.data)) + id_y = self.coord_to_index( + coords[self.lon.name], self.lon.min, self.lon.step, len(self.lon.data) + ) id_i = self._as_i_index[id_x, id_y] return pd.Series(id_i, index=id_x.index) - def coord_to_index(self, coord: pd.Series, x0_axis: float, d_axis: float, maxBin4Wrap: int = None) -> np.array: + def coord_to_index( + self, coord: pd.Series, x0_axis: float, d_axis: float, maxBin4Wrap: int = None + ) -> np.array: """converts a coordinate into a bin index on one axis Parameters: @@ -284,7 +308,9 @@ class RegularGrid(GridDefinition): ids[ids < 0] += maxBin4Wrap return ids - def get_empty_grid(self, time: Variable, metadata: Metadata) -> xr.Dataset: # TODO make CF-compliant => docs + def get_empty_grid( + self, time: Variable, metadata: Metadata + ) -> xr.Dataset: # TODO make CF-compliant => docs """creation of an empty dataset without data Sets up a dataset with its three axis: time, longitude and latitude.