diff --git a/tests/contryCode.py b/tests/contryCode.py new file mode 100644 index 0000000000000000000000000000000000000000..ffd1db17306647683f27807d5b19ab7009dfee1e --- /dev/null +++ b/tests/contryCode.py @@ -0,0 +1,16 @@ + +"""test script for obtaining valid country codes. +""" +from toargridding.metadata_utilities import countryCodes +import requests + +test = countryCodes() +validCodes = test.getValidVocabular(controlName="Country Code", varName="country") +print(validCodes) +print(len(validCodes)) + +print("stations per country code") +for country in validCodes: + getNumber = requests.get(test.stationMetaEndpoint, params={"country" : country, "limit" : "None"} ) + getNumber.raise_for_status() + print(f"\t{country}: {len(getNumber.json())}") \ No newline at end of file diff --git a/tests/produce_data_manyStations.ipynb b/tests/produce_data_manyStations.ipynb index f4149bfff091969c9b0db20773ee591871ccceef..3b4788cac1683b1f3176748493a8e00437e7e4c0 100644 --- a/tests/produce_data_manyStations.ipynb +++ b/tests/produce_data_manyStations.ipynb @@ -24,17 +24,18 @@ "source": [ "#creation of request.\n", "\n", - "Config = namedtuple(\"Config\", [\"grid\", \"time\", \"variables\", \"stats\"])\n", + "Config = namedtuple(\"Config\", [\"grid\", \"time\", \"variables\", \"stats\", \"moreOptions\"])\n", "\n", "grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )\n", "\n", "configs = dict()\n", - "for year in range (1,19):\n", + "for year in range (0,19):\n", " valid_data = Config(\n", " grid,\n", " TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"daily\"),#possibly adopt range:-)\n", " [\"mole_fraction_of_ozone_in_air\"],#variable name\n", - " [ \"dma8epax\" ]# change to dma8epa_strict\n", + " [ \"dma8epa_strict\" ]# change to dma8epa_strict\n", + " \n", " )\n", " \n", " configs[f\"test_ta{year}\"] = valid_data\n" diff --git a/tests/produce_data_withOptional_country.ipynb b/tests/produce_data_withOptional_country.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3de253e63433f42f310164a00f6c9a7df81a368c --- /dev/null +++ b/tests/produce_data_withOptional_country.ipynb @@ -0,0 +1,128 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime as dt\n", + "from collections import namedtuple\n", + "from pathlib import Path\n", + "\n", + "from toargridding.toar_rest_client import AnalysisServiceDownload, Connection\n", + "from toargridding.grids import RegularGrid\n", + "from toargridding.gridding import get_gridded_toar_data\n", + "from toargridding.metadata import TimeSample\n", + "\n", + "from toargridding.metadata_utilities import countryCodes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#creation of request.\n", + "\n", + "Config = namedtuple(\"Config\", [\"grid\", \"time\", \"variables\", \"stats\",\"moreOptions\"])\n", + "\n", + "#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf\n", + "\n", + "varName = \"country\"\n", + "stationCountries = countryCodes()\n", + "validCountries = stationCountries.getValidVocabular(controlName=\"Country Code\", varName=varName)\n", + "\n", + "grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )\n", + "\n", + "configs = dict()\n", + "for country in validCountries:\n", + " valid_data = Config(\n", + " grid,\n", + " TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling=\"daily\"),#possibly adopt range:-)\n", + " [\"mole_fraction_of_ozone_in_air\"],#variable name\n", + " [ \"dma8epa_strict\" ],\n", + " {varName : country}\n", + " )\n", + " \n", + " configs[f\"test_ta{country}\"] = valid_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.\n", + "#the processing is done on the server of the TOAR database.\n", + "#a restart of the cell continues the request to the REST API if the requested data are ready for download\n", + "# The download can also take a few minutes\n", + "\n", + "stats_endpoint = \"https://toar-data.fz-juelich.de/api/v2/analysis/statistics/\"\n", + "cache_basepath = Path(\"cache\")\n", + "result_basepath = Path(\"results\")\n", + "cache_basepath.mkdir(exist_ok=True)\n", + "result_basepath.mkdir(exist_ok=True)\n", + "analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)\n", + "\n", + "Connection.DEBUG=True\n", + "\n", + "# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.\n", + "# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.\n", + "analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)\n", + "\n", + "createdFiles = []\n", + "\n", + "for person, config in configs.items():\n", + " print(f\"\\nProcessing {person}:\")\n", + " print(f\"--------------------\")\n", + " datasets, metadatas = get_gridded_toar_data(\n", + " analysis_service=analysis_service,\n", + " grid=config.grid,\n", + " time=config.time,\n", + " variables=config.variables,\n", + " stats=config.stats,\n", + " **config.moreOptions\n", + " )\n", + "\n", + " for dataset, metadata in zip(datasets, metadatas):\n", + " outName = result_basepath / f\"{metadata.get_id()}_{config.grid.get_id()}.nc\"\n", + " dataset.to_netcdf(outName)\n", + " createdFiles.append(outName)\n", + " print(metadata.get_id())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##TODO: now we only need to combine all the obtained results...\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "toargridding-8RVrxzmn-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/toargridding/metadata_utilities.py b/toargridding/metadata_utilities.py new file mode 100644 index 0000000000000000000000000000000000000000..4e595f94be4b00ee6264b273d679300bc4be5b30 --- /dev/null +++ b/toargridding/metadata_utilities.py @@ -0,0 +1,52 @@ + +import requests +from collections import namedtuple + +ControlVoc = namedtuple("ControlVoc", ["ID", "short", "long"]) + +class countryCodes: + """! this is a quick and dirty utility class to obtain all valid values of a specific metadata value of the stations + It is created with the example of country codes in mind, to split the number of requests to the TOAD database for each specific analysis request. + Argument: + --------- + controlVocEndpoint: + endpoint to access all valid values of a variable used within the control vocabulary + stationMetaEndpoint: + endpoint to access all stations fitting to a specific search pattern. Will be used to validate, that a metadata values provides access to stations. + """ + def __init__(self, controlVocEndpoint="https://toar-data.fz-juelich.de/api/v2/controlled_vocabulary/", stationMetaEndpoint="https://toar-data.fz-juelich.de/api/v2/stationmeta/"): + self.controlVocEndpoint = controlVocEndpoint + if self.controlVocEndpoint[-1] != "/": + self.controlVocEndpoint += "/" + self.stationMetaEndpoint = stationMetaEndpoint + if self.stationMetaEndpoint[-1] != "/": + self.stationMetaEndpoint += "/" + def getValidVocabular(self, controlName, varName ): + """get all valid values for a variable + Argument: + -------- + controlName: + name of the parameter in the control vocabulary + varName: + name of the parameter in a request to another endpoint. + return: + list with all valid values for the requested varName to be used within request to the TOAR database + """ + #get all possible values for the given variable: + response = requests.get( f"{self.controlVocEndpoint}{controlName}") + response.raise_for_status() + controlVoc = self.convertLists(response) + #check for all valid infos, i.e. is there at least one station for this metadata + validCodes = [] + for voc in controlVoc: + params = {varName : voc.short, "limit" : 1} + getNumber = requests.get(self.stationMetaEndpoint, params=params ) + getNumber.raise_for_status() + if len(getNumber.json()) > 0: + validCodes.append(voc.short) + return validCodes + + def convertLists(self, response) -> list[namedtuple]: + """convert results into an easier to access data type + """ + return [ ControlVoc(id, short, long) for id, short, long in response.json()]