added exclusion of N/A from contributors

adbaf2a0 · Carsten Hinz · 61b4f45a · adbaf2a0 · adbaf2a0
Commit adbaf2a0 authored 9 months ago by Carsten Hinz
--- a/examples/produce_data_withOptional.ipynb
+++ b/examples/produce_data_withOptional.ipynb
@@ -56,12 +56,12 @@
    "for year in range(0,19):\n",
    "    valid_data = Config(\n",
    "        grid,\n",
-    "        #TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"daily\"),#possibly adopt range:-)\n",
-    "        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"monthly\"),#possibly adopt range:-)\n",
+    "        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"daily\"),#possibly adopt range:-)\n",
+    "        #TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"monthly\"),#possibly adopt range:-)\n",
    "        [\"mole_fraction_of_ozone_in_air\"],#variable name\n",
    "        #[ \"mean\", \"dma8epax\"],# will start one request after another other...\n",
-    "        #[ \"dma8epa_strict\" ],\n",
-    "        [ \"mean\" ],\n",
+    "        [ \"dma8epa_strict\" ],\n",
+    "        #[ \"mean\" ],\n",
    "        details4Query\n",
    "    )\n",
    "    \n",
@@ -89,7 +89,7 @@
    "\n",
    "# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.\n",
    "# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.\n",
-    "analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)\n",
+    "analysis_service.connection.set_request_times(interval_min=5, max_wait_minutes=60)\n",
    "\n",
    "for person, config in configs.items():\n",
    "    print(f\"\\nProcessing {person}:\")\n",
@@ -100,7 +100,7 @@
    "        time=config.time,\n",
    "        variables=config.variables,\n",
    "        stats=config.stats,\n",
-    "        contributors_path=result_basepath\n",
+    "        contributors_path=result_basepath,\n",
    "        **config.moreOptions\n",
    "    )\n",
    "\n",

 %% Cell type:code id: tags:

 ``` python
 import logging
 from datetime import datetime as dt
 from collections import namedtuple
 from pathlib import Path

 from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
 from toargridding.grids import RegularGrid
 from toargridding.gridding import get_gridded_toar_data
 from toargridding.metadata import TimeSample

 from toargridding.defaultLogging import toargridding_defaultLogging

 #setup of logging
 logger = toargridding_defaultLogging()
 logger.addShellLogger(logging.DEBUG)
 logger.logExceptions()
 logger.addRotatingLogFile(Path("log/produce_data_withOptional.log"))#we need to explicitly set a logfile
 #logger.addSysLogger(logging.DEBUG)
 ```

 %% Cell type:code id: tags:

 ``` python
 #creation of request.

 Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])

 #moreOptions is implemented as a dict to add additional arguments to the query to the REST API
 #For example the field toar1_category with its possible values Urban, RuralLowElevation, RuralHighElevation and Unclassified can be added.
 #see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
 #or type_of_area with urban, suburban and rural on page 20 can be used

 details4Query ={
    #"toar1_category" : "Urban" #uncomment if wished:-)
    #"toar1_category" : "RuralLowElevation" #uncomment if wished:-)
    #"toar1_category" : "RuralHighElevation" #uncomment if wished:-)
    #"type_of_area" : "Urban" #also test Rural, Suburban,
    "type_of_area" : "Rural" #also test Rural, Suburban,
    #"type_of_area" : "Suburban" #also test Rural, Suburban,
 }

 grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )

 configs = dict()
 for year in range(0,19):
    valid_data = Config(
        grid,
-        #TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
-        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="monthly"),#possibly adopt range:-)
+        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
+        #TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="monthly"),#possibly adopt range:-)
        ["mole_fraction_of_ozone_in_air"],#variable name
        #[ "mean", "dma8epax"],# will start one request after another other...
-        #[ "dma8epa_strict" ],
-        [ "mean" ],
+        [ "dma8epa_strict" ],
+        #[ "mean" ],
        details4Query
    )

    configs[f"test_ta{year}"] = valid_data
 ```

 %% Cell type:code id: tags:

 ``` python
 #CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
 #the processing is done on the server of the TOAR database.
 #a restart of the cell continues the request to the REST API if the requested data are ready for download
 # The download can also take a few minutes

 stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
 cache_basepath = Path("cache")
 result_basepath = Path("results")
 cache_basepath.mkdir(exist_ok=True)
 result_basepath.mkdir(exist_ok=True)
 analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)


 # maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
 # as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
-analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)
+analysis_service.connection.set_request_times(interval_min=5, max_wait_minutes=60)

 for person, config in configs.items():
    print(f"\nProcessing {person}:")
    print(f"--------------------")
    datasets, metadatas = get_gridded_toar_data(
        analysis_service=analysis_service,
        grid=config.grid,
        time=config.time,
        variables=config.variables,
        stats=config.stats,
-        contributors_path=result_basepath
+        contributors_path=result_basepath,
        **config.moreOptions
    )

    for dataset, metadata in zip(datasets, metadatas):
        dataset.to_netcdf(result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc")
        print(metadata.get_id())
 ```

--- a/src/toargridding/contributors.py
+++ b/src/toargridding/contributors.py
@@ -61,7 +61,8 @@ class contributions_manager_by_name(contributionsManager):
        for id in ids:
            names = self.id_to_names(id)
            for name in names:
-                self.timeseriesIDs.add(name)
+                if name != "N/A":
+                    self.timeseriesIDs.add(name)
    def id_to_names(self, id : int) -> list[str]:
        for pos in range(10):
            req_res = requests.get(f"https://toar-data.fz-juelich.de/api/v2/timeseries/{id}", timeout=40)