setup logging for notebooks

47c5f96d · Carsten Hinz · f054ef40 · 47c5f96d · 47c5f96d · 47c5f96d
Commit 47c5f96d authored 10 months ago by Carsten Hinz
--- a/examples/produce_data_manyStations.ipynb
+++ b/examples/produce_data_manyStations.ipynb
@@ -19,11 +19,12 @@
    "from toargridding.defaultLogging import toargridding_defaultLogging\n",
    "\n",
    "\n",
+    "from toargridding.defaultLogging import toargridding_defaultLogging\n",
    "#setup of logging\n",
    "logger = toargridding_defaultLogging()\n",
    "logger.addShellLogger(logging.DEBUG)\n",
    "logger.logExceptions()\n",
-    "logger.addRotatingLogFile_scriptName(__file__)"
+    "logger.addRotatingLogFile(Path(\"log/produce_data_manyStations.log\"))#we need to explicitly set a logfile"
   ]
  },
  {

 %% Cell type:code id: tags:

 ``` python
 import logging
 from datetime import datetime as dt
 from collections import namedtuple
 from pathlib import Path

 from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
 from toargridding.grids import RegularGrid
 from toargridding.gridding import get_gridded_toar_data
 from toargridding.metadata import TimeSample

 from toargridding.defaultLogging import toargridding_defaultLogging


+from toargridding.defaultLogging import toargridding_defaultLogging
 #setup of logging
 logger = toargridding_defaultLogging()
 logger.addShellLogger(logging.DEBUG)
 logger.logExceptions()
-logger.addRotatingLogFile_scriptName(__file__)
+logger.addRotatingLogFile(Path("log/produce_data_manyStations.log"))#we need to explicitly set a logfile
 ```

 %% Cell type:code id: tags:

 ``` python
 #creation of request.

 Config = namedtuple("Config", ["grid", "time", "variables", "stats", "moreOptions"])

 grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )

 configs = dict()
 for year in range (0,19):
    valid_data = Config(
        grid,
        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
        ["mole_fraction_of_ozone_in_air"],#variable name
        [ "dma8epa_strict" ]# change to dma8epa_strict

    )

    configs[f"test_ta{year}"] = valid_data
 ```

 %% Cell type:code id: tags:

 ``` python
 #CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download.
 #the processing is done on the server of the TOAR database.
 #a restart of the cell continues the request to the REST API if the requested data are ready for download
 # The download can also take a few minutes

 stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
 cache_basepath = Path("cache")
 result_basepath = Path("results")
 cache_basepath.mkdir(exist_ok=True)
 result_basepath.mkdir(exist_ok=True)
 analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)


 #here we adopt the durations before, a request is stopped.
 #the default value is 30 minutes.
 #waiting up to 3h for one request
 analysis_service.connection.setRequestTimes(interval_min=45, maxWait_min=12*60)

 for person, config in configs.items():
    print(f"\nProcessing {person}:")
    print(f"--------------------")
    datasets, metadatas = get_gridded_toar_data(
        analysis_service=analysis_service,
        grid=config.grid,
        time=config.time,
        variables=config.variables,
        stats=config.stats,
    )

    for dataset, metadata in zip(datasets, metadatas):
        dataset.to_netcdf(result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc")
        print(metadata.get_id())
 ```

--- a/examples/produce_data_withOptional.ipynb
+++ b/examples/produce_data_withOptional.ipynb
@@ -2,9 +2,21 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name '__file__' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 17\u001b[0m\n\u001b[1;32m     15\u001b[0m logger\u001b[38;5;241m.\u001b[39maddShellLogger(logging\u001b[38;5;241m.\u001b[39mDEBUG)\n\u001b[1;32m     16\u001b[0m logger\u001b[38;5;241m.\u001b[39mlogExceptions()\n\u001b[0;32m---> 17\u001b[0m logger\u001b[38;5;241m.\u001b[39maddRotatingLogFile_scriptName(\u001b[38;5;18;43m__file__\u001b[39;49m)\n\u001b[1;32m     18\u001b[0m \u001b[38;5;66;03m#logger.addSysLogger(logging.DEBUG)\u001b[39;00m\n",
+      "\u001b[0;31mNameError\u001b[0m: name '__file__' is not defined"
+     ]
+    }
+   ],
   "source": [
    "import logging\n",
    "from datetime import datetime as dt\n",
@@ -22,7 +34,7 @@
    "logger = toargridding_defaultLogging()\n",
    "logger.addShellLogger(logging.DEBUG)\n",
    "logger.logExceptions()\n",
-    "logger.addRotatingLogFile_scriptName(__file__)\n",
+    "logger.addRotatingLogFile(Path(\"log/produce_data_withOptional.log\"))#we need to explicitly set a logfile\n",
    "#logger.addSysLogger(logging.DEBUG)"
   ]
  },
@@ -125,7 +137,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.11.7"
  }
 },
 "nbformat": 4,

 %% Cell type:code id: tags:

 ``` python
 import logging
 from datetime import datetime as dt
 from collections import namedtuple
 from pathlib import Path

 from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
 from toargridding.grids import RegularGrid
 from toargridding.gridding import get_gridded_toar_data
 from toargridding.metadata import TimeSample

 from toargridding.defaultLogging import toargridding_defaultLogging

 #setup of logging
 logger = toargridding_defaultLogging()
 logger.addShellLogger(logging.DEBUG)
 logger.logExceptions()
-logger.addRotatingLogFile_scriptName(__file__)
+logger.addRotatingLogFile(Path("log/produce_data_withOptional.log"))#we need to explicitly set a logfile
 #logger.addSysLogger(logging.DEBUG)
 ```

+%% Output
+
+    ---------------------------------------------------------------------------
+    NameError                                 Traceback (most recent call last)
+Cell     In[1], line 17
+         15 logger.addShellLogger(logging.DEBUG)
+         16 logger.logExceptions()
+    ---> 17 logger.addRotatingLogFile_scriptName(__file__)
+         18 #logger.addSysLogger(logging.DEBUG)
+    NameError: name '__file__' is not defined
+
 %% Cell type:code id: tags:

 ``` python
 #creation of request.

 Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])

 #moreOptions is implemented as a dict to add additional arguments to the query to the REST API
 #For example the field toar1_category with its possible values Urban, RuralLowElevation, RuralHighElevation and Unclassified can be added.
 #see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
 #or type_of_area with urban, suburban and rural on page 20 can be used

 details4Query ={
    #"toar1_category" : "Urban" #uncomment if wished:-)
    #"toar1_category" : "RuralLowElevation" #uncomment if wished:-)
    #"toar1_category" : "RuralHighElevation" #uncomment if wished:-)
    #"type_of_area" : "Urban" #also test Rural, Suburban,
    "type_of_area" : "Rural" #also test Rural, Suburban,
    #"type_of_area" : "Suburban" #also test Rural, Suburban,
 }

 grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )

 configs = dict()
 for year in range(0,19):
    valid_data = Config(
        grid,
        #TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="monthly"),#possibly adopt range:-)
        ["mole_fraction_of_ozone_in_air"],#variable name
        #[ "mean", "dma8epax"],# will start one request after another other...
        #[ "dma8epa_strict" ],
        [ "mean" ],
        details4Query
    )

    configs[f"test_ta{year}"] = valid_data
 ```

 %% Cell type:code id: tags:

 ``` python
 #CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
 #the processing is done on the server of the TOAR database.
 #a restart of the cell continues the request to the REST API if the requested data are ready for download
 # The download can also take a few minutes

 stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
 cache_basepath = Path("cache")
 result_basepath = Path("results")
 cache_basepath.mkdir(exist_ok=True)
 result_basepath.mkdir(exist_ok=True)
 analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)


 # maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
 # as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
 analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)

 for person, config in configs.items():
    print(f"\nProcessing {person}:")
    print(f"--------------------")
    datasets, metadatas = get_gridded_toar_data(
        analysis_service=analysis_service,
        grid=config.grid,
        time=config.time,
        variables=config.variables,
        stats=config.stats,
        **config.moreOptions
    )

    for dataset, metadata in zip(datasets, metadatas):
        dataset.to_netcdf(result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc")
        print(metadata.get_id())
 ```

--- a/examples/produce_data_withOptional_country.ipynb
+++ b/examples/produce_data_withOptional_country.ipynb
@@ -17,11 +17,13 @@
    "\n",
    "from toargridding.metadata_utilities import countryCodes\n",
    "\n",
+    "from toargridding.defaultLogging import toargridding_defaultLogging\n",
    "#setup of logging\n",
    "logger = toargridding_defaultLogging()\n",
    "logger.addShellLogger(logging.DEBUG)\n",
    "logger.logExceptions()\n",
    "logger.addRotatingLogFile_scriptName(__file__)\n",
+    "logger.addRotatingLogFile(Path(\"log/produce_data_withOptional_country.log\"))#we need to explicitly set a logfile\n",
    "#logger.addSysLogger(logging.DEBUG)"
   ]
  },

 %% Cell type:code id: tags:

 ``` python
 from datetime import datetime as dt
 from collections import namedtuple
 from pathlib import Path

 from toargridding.toar_rest_client import AnalysisServiceDownload, Connection, EmptyDataError
 from toargridding.grids import RegularGrid
 from toargridding.gridding import get_gridded_toar_data
 from toargridding.metadata import TimeSample

 from toargridding.metadata_utilities import countryCodes

+from toargridding.defaultLogging import toargridding_defaultLogging
 #setup of logging
 logger = toargridding_defaultLogging()
 logger.addShellLogger(logging.DEBUG)
 logger.logExceptions()
 logger.addRotatingLogFile_scriptName(__file__)
+logger.addRotatingLogFile(Path("log/produce_data_withOptional_country.log"))#we need to explicitly set a logfile
 #logger.addSysLogger(logging.DEBUG)
 ```

 %% Cell type:code id: tags:

 ``` python
 #creation of request.

 Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])

 #see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf

 varName = "country"
 stationCountries = countryCodes()
 validCountries = stationCountries.getValidVocabular(controlName="Country Code", varName=varName)

 grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )

 configs = dict()
 for country in validCountries:
    valid_data = Config(
        grid,
        TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling="daily"),#possibly adopt range:-)
        ["mole_fraction_of_ozone_in_air"],#variable name
        [ "dma8epa_strict" ],
        {varName : country}
    )

    configs[f"test_ta{country}"] = valid_data
 ```

 %% Cell type:code id: tags:

 ``` python
 #CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
 #the processing is done on the server of the TOAR database.
 #a restart of the cell continues the request to the REST API if the requested data are ready for download
 # The download can also take a few minutes

 stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
 cache_basepath = Path("cache")
 result_basepath = Path("results")
 cache_basepath.mkdir(exist_ok=True)
 result_basepath.mkdir(exist_ok=True)
 analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)


 # maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
 # as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
 analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)

 createdFiles = []

 for person, config in configs.items():
    print(f"\nProcessing {person}:")
    print(f"--------------------")
    try:
        datasets, metadatas = get_gridded_toar_data(
            analysis_service=analysis_service,
            grid=config.grid,
            time=config.time,
            variables=config.variables,
            stats=config.stats,
            **config.moreOptions
        )
    except EmptyDataError as e:
        print("failed for ", person)
        continue

    for dataset, metadata in zip(datasets, metadatas):
        outName = result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc"
        dataset.to_netcdf(outName)
        createdFiles.append(outName)
        print(metadata.get_id())
 ```

 %% Cell type:code id: tags:

 ``` python
 ##TODO: now we only need to combine all the obtained results...
 ```