diff --git a/examples/produce_data_withOptional.ipynb b/examples/produce_data_withOptional.ipynb index d1b00316e99824957c942b132670014777433b8e..e73b77e5e862fb2688f4c3ec22ded9b5f554f11c 100644 --- a/examples/produce_data_withOptional.ipynb +++ b/examples/produce_data_withOptional.ipynb @@ -2,9 +2,22 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'kwargs' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtoargridding\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtoar_rest_client\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AnalysisServiceDownload, Connection\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtoargridding\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgrids\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RegularGrid\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtoargridding\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgridding\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_gridded_toar_data\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtoargridding\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetadata\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TimeSample\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtoargridding\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdefaultLogging\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m toargridding_defaultLogging\n", + "File \u001b[0;32m~/Eigene Daten/FZJ/JSC/workingDirectories/TOAR/toargridding/src/toargridding/gridding.py:21\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtoargridding\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtoar_rest_client\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AnalysisService\n\u001b[1;32m 11\u001b[0m GriddedResult \u001b[38;5;241m=\u001b[39m namedtuple(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGriddedResult\u001b[39m\u001b[38;5;124m\"\u001b[39m, [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_gridded_toar_data\u001b[39m(\n\u001b[1;32m 15\u001b[0m analysis_service: AnalysisService,\n\u001b[1;32m 16\u001b[0m grid: GridDefinition,\n\u001b[1;32m 17\u001b[0m time: TimeSample,\n\u001b[1;32m 18\u001b[0m variables: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m],\n\u001b[1;32m 19\u001b[0m stats: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m],\n\u001b[1;32m 20\u001b[0m contributors_path : Path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[43mkwargs\u001b[49m,\n\u001b[1;32m 22\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mlist\u001b[39m[xr\u001b[38;5;241m.\u001b[39mDataset], \u001b[38;5;28mlist\u001b[39m[Metadata]]:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"API to download data as xarrays\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \n\u001b[1;32m 25\u001b[0m \u001b[38;5;124;03m The function creates all combinations of the variable and stats list\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;124;03m Gridded datasets for each combination of variables and stats and appropriate metadata for each dataset.\u001b[39;00m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 45\u001b[0m metadatas \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 46\u001b[0m Metadata\u001b[38;5;241m.\u001b[39mconstruct(standard_name\u001b[38;5;241m=\u001b[39mvar, time\u001b[38;5;241m=\u001b[39mtime, stat\u001b[38;5;241m=\u001b[39mstat, moreOptions\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m var, stat \u001b[38;5;129;01min\u001b[39;00m product(variables, stats)\n\u001b[1;32m 48\u001b[0m ]\n", + "\u001b[0;31mNameError\u001b[0m: name 'kwargs' is not defined" + ] + } + ], "source": [ "import logging\n", "from datetime import datetime as dt\n", @@ -100,6 +113,7 @@ " time=config.time,\n", " variables=config.variables,\n", " stats=config.stats,\n", + " contributors_path=result_basepath\n", " **config.moreOptions\n", " )\n", "\n", @@ -125,7 +139,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/examples/quality_controll.ipynb b/examples/quality_controll.ipynb index b3ad13bcb96baf064ec9e4699977d89057db4c68..35e29fa5bce22277cbf0c75bb22e86b83444360a 100644 --- a/examples/quality_controll.ipynb +++ b/examples/quality_controll.ipynb @@ -28,6 +28,12 @@ "from toargridding.metadata import Metadata, TimeSample, AnalysisRequestResult, Coordinates\n", "from toargridding.variables import Coordinate\n", "\n", + "import logging\n", + "from toargridding.defaultLogging import toargridding_defaultLogging\n", + "#setup of logging\n", + "logger = toargridding_defaultLogging()\n", + "logger.addShellLogger(logging.DEBUG)\n", + "logger.logExceptions()\n", "\n", "endpoint = \"https://toar-data.fz-juelich.de/api/v2/analysis/statistics/\"\n", "#starts in directory [path/to/toargridding]/tests\n", @@ -36,10 +42,10 @@ "cache_dir = toargridding_base_path / \"cache\"\n", "data_download_dir = toargridding_base_path / \"results\"\n", "\n", - "analysis_service = AnalysisServiceDownload(endpoint, cache_dir, data_download_dir)\n", + "analysis_service = AnalysisServiceDownload(endpoint, cache_dir, data_download_dir, use_downloaded=False)\n", "my_grid = RegularGrid(1.9, 2.5)\n", "\n", - "time = TimeSample(dt(2016,1,1), dt(2016,12,31), \"monthly\")\n", + "time = TimeSample(dt(2016,1,1), dt(2016,2,28), \"daily\")\n", "metadata = Metadata.construct(\"mole_fraction_of_ozone_in_air\", time, \"mean\")\n" ] }, diff --git a/src/toargridding/contributors.py b/src/toargridding/contributors.py index 11703f66c0f2dd234ba47b6a2b74939b4ea74c7e..4bb85a9b9eae4d3659eb3e78ac6d293885ba973e 100644 --- a/src/toargridding/contributors.py +++ b/src/toargridding/contributors.py @@ -1,23 +1,29 @@ from pandas.core.groupby import DataFrameGroupBy from typing import Iterable +from pathlib import Path #TODO: maybe create an abstract base class and separate the offline and service implementations class contributionsManager: - def __init__(self, requestID, endpoint="not yet defined"): + def __init__(self, requestID, contributors_path : Path = None, endpoint=""): self.requestID = requestID self.timeseriesIDs = set() self.endpoint = endpoint - self.runsAsService = False + self.runsAsService = True + if contributors_path is not None: + self.runsAsService = False + self.contributors_path = contributors_path def setup_contributors_endpoint_for_metadata(self): if self.runsAsService: return self.setup_contributors_service() else: - return self.setup_contributors_offline() - def setup_contributors_offline(self) -> str: - # write time series IDs to file - # save file - return f"This dataset has been created in standalone mode. To retrieve the contributors from for this dataset execute: curl {self.endpoint} -file {self.requestID}.contributors" + return self.setup_contributors_id_file() + def setup_contributors_id_file(self) -> str: + ext = "contributors" + with open(self.contributors_path / f"{self.requestID}.{ext}", "w") as f: + for id in self.timeseriesIDs: + f.write(f"{id}\n") + return f"curl {self.endpoint} -file {self.requestID}.{ext}" def setup_contributors_service(self) -> str: # TODO: missing implementation raise NotImplementedError("This has not been implemented as this package is not yet operated as a service.") diff --git a/src/toargridding/gridding.py b/src/toargridding/gridding.py index fcd5e33647b8f8d4b73e5441bc0fc2b6e79ea0c5..94e8dfea7477f747036ebf2ab2e56f5fdf836c7a 100644 --- a/src/toargridding/gridding.py +++ b/src/toargridding/gridding.py @@ -1,5 +1,6 @@ from collections import namedtuple from itertools import product +from pathlib import Path import xarray as xr @@ -16,6 +17,7 @@ def get_gridded_toar_data( time: TimeSample, variables: list[str], stats: list[str], + contributors_path : Path = None, **kwargs, ) -> tuple[list[xr.Dataset], list[Metadata]]: """API to download data as xarrays @@ -48,6 +50,7 @@ def get_gridded_toar_data( datasets = [] for metadata in metadatas: # standard_name ? data = analysis_service.get_data(metadata) + #TODO add processing of contributors ds = grid.as_xarray(data) datasets.append(ds) diff --git a/src/toargridding/grids.py b/src/toargridding/grids.py index 1fe0731ff7d765b837e4f46061cc662605e87283..1e7a003e135cefdde19cae56b6d5feaa408218f6 100644 --- a/src/toargridding/grids.py +++ b/src/toargridding/grids.py @@ -139,7 +139,7 @@ class RegularGrid(GridDefinition): data_grouped_by_cell = self.group_data_by_cell(data.stations_data, data.stations_coords) data.contributions.extract_contributors(data_grouped_by_cell) cell_statistics = self.get_cell_statistics(data_grouped_by_cell) - dataset = self.create_dataset(cell_statistics, data.metadata, data.contributions) + dataset = self.create_dataset(cell_statistics, data.metadata) return dataset @@ -202,7 +202,7 @@ class RegularGrid(GridDefinition): step=metadata.time.sampling, ) - gridded_ds = self.get_empty_grid(time, metadata, contributions) + gridded_ds = self.get_empty_grid(time, metadata) for variable, aggregated_data in cell_statistics.items(): data_array_dict = self.get_data_array_dict(time, aggregated_data, variable, metadata) gridded_ds = gridded_ds.assign(data_array_dict) @@ -286,7 +286,7 @@ class RegularGrid(GridDefinition): ids[ids < 0] += maxBin4Wrap return ids - def get_empty_grid(self, time: Variable, metadata: Metadata, contributions: contributionsManager) -> xr.Dataset: # TODO make CF-compliant => docs + def get_empty_grid(self, time: Variable, metadata: Metadata) -> xr.Dataset: # TODO make CF-compliant => docs """creation of an empty dataset without data Sets up a dataset with its three axis: time, longitude and latitude. @@ -306,7 +306,7 @@ class RegularGrid(GridDefinition): Variables.longitude.name: self.lon.as_data_array(), } - ds = xr.Dataset(coords=coords, attrs=get_global_attributes(metadata, contributions)) + ds = xr.Dataset(coords=coords, attrs=get_global_attributes(metadata)) return ds diff --git a/src/toargridding/metadata.py b/src/toargridding/metadata.py index 552e17d5eb8720873353a3d5b81bac91115ef6e2..d6ad9af8bfa6a887588e54dacb5ea814664c46e5 100644 --- a/src/toargridding/metadata.py +++ b/src/toargridding/metadata.py @@ -113,6 +113,8 @@ class Metadata: time: TimeSample statistic: str moreOptions: dict = field(default_factory=dict) + requestID : str = None + contributors_metadata_field : str = None @staticmethod def construct(standard_name: str, time: TimeSample, stat: str, moreOptions: dict = {}): @@ -198,14 +200,16 @@ class AnalysisRequestResult: stations_data: pd.DataFrame stations_coords: pd.DataFrame metadata: Metadata - contributions: contributionsManager = None -def get_global_attributes(metadata: Metadata, contributions: contributionsManager) -> dict: +def get_global_attributes(metadata: Metadata) -> dict: """combination of global metadata with request specific values. Also adds all additional options passed to the request as meta data. Throws an exception if moreOptions contains an key already in use by the metadata. """ + if metadata.contributors_metadata_field is None: + raise ValueError("metadata.contributors_metadata_field must be set before calling get_global_attributes") + dynamic_cf_attributes = { "id": metadata.get_id(), "title": metadata.get_title(), @@ -222,7 +226,7 @@ def get_global_attributes(metadata: Metadata, contributions: contributionsManage # "time_coverage_duration": 0, # TODO insert durations # "time_coverage_resolution": 0, "product_version": f"version of toargridding {importlib.metadata.version( __package__ or __name__ )}", - "contributors": contributions.setup_contributors_endpoint_for_metadata(), + "contributors": metadata.contributors_metadata_field, } for key, value in metadata.moreOptions.items(): if key not in dynamic_cf_attributes: diff --git a/src/toargridding/toar_rest_client.py b/src/toargridding/toar_rest_client.py index b1992387fbd9e97fffa1203393335169c51c7f7c..3561b8a34a3797ec47814f49ac7fb3dc8e0d38b0 100644 --- a/src/toargridding/toar_rest_client.py +++ b/src/toargridding/toar_rest_client.py @@ -431,8 +431,7 @@ class AnalysisService: timeseries, timeseries_metadata = self.get_timeseries_and_metadata(metadata) coords = self.get_clean_coords(timeseries_metadata) timeseries = self.get_clean_timeseries(timeseries, metadata) - contributions = contributionsManager(metadata.get_id()) - return AnalysisRequestResult(timeseries, coords, metadata, contributions=contributions) + return AnalysisRequestResult(timeseries, coords, metadata) def get_timeseries_and_metadata(self, metadata: Metadata) -> tuple[pd.DataFrame, pd.DataFrame]: """obtain data and metadata from TOAR database @@ -443,11 +442,13 @@ class AnalysisService: query_options = QueryOptions.from_metadata(metadata) result = self.connection.get(query_options) + print(result.request) + print(result.request.url) timeseries, timeseries_metadata = self.load_data(result.content, metadata) return timeseries, timeseries_metadata def get_clean_coords(self, timeseries_metadata: pd.DataFrame): - """remove all stations with invalid coordinates + """remove all stations with invalid coordinates and drop unused metadata invalid coordinates are NaN, none etc. return: stations with valid coordinates @@ -579,6 +580,7 @@ class AnalysisServiceDownload(AnalysisService): if needs_fresh_download: logger.info("Performing request to TOAR DB") response = self.connection.get(query_options) + print(response.request.url) with open(filename, "w+b") as downloaded_file: downloaded_file.write(response.content) else: