Skip to content
Snippets Groups Projects
Commit 47c5f96d authored by Carsten Hinz's avatar Carsten Hinz
Browse files

setup logging for notebooks

parent f054ef40
No related branches found
No related tags found
2 merge requests!11Creation of first beta release version,!8Resolve "add structured logging and output levels for complete module"
%% Cell type:code id: tags:
``` python
import logging
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample
from toargridding.defaultLogging import toargridding_defaultLogging
from toargridding.defaultLogging import toargridding_defaultLogging
#setup of logging
logger = toargridding_defaultLogging()
logger.addShellLogger(logging.DEBUG)
logger.logExceptions()
logger.addRotatingLogFile_scriptName(__file__)
logger.addRotatingLogFile(Path("log/produce_data_manyStations.log"))#we need to explicitly set a logfile
```
%% Cell type:code id: tags:
``` python
#creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats", "moreOptions"])
grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )
configs = dict()
for year in range (0,19):
valid_data = Config(
grid,
TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name
[ "dma8epa_strict" ]# change to dma8epa_strict
)
configs[f"test_ta{year}"] = valid_data
```
%% Cell type:code id: tags:
``` python
#CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
#here we adopt the durations before, a request is stopped.
#the default value is 30 minutes.
#waiting up to 3h for one request
analysis_service.connection.setRequestTimes(interval_min=45, maxWait_min=12*60)
for person, config in configs.items():
print(f"\nProcessing {person}:")
print(f"--------------------")
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
)
for dataset, metadata in zip(datasets, metadatas):
dataset.to_netcdf(result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc")
print(metadata.get_id())
```
......
%% Cell type:code id: tags:
``` python
import logging
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample
from toargridding.defaultLogging import toargridding_defaultLogging
#setup of logging
logger = toargridding_defaultLogging()
logger.addShellLogger(logging.DEBUG)
logger.logExceptions()
logger.addRotatingLogFile_scriptName(__file__)
logger.addRotatingLogFile(Path("log/produce_data_withOptional.log"))#we need to explicitly set a logfile
#logger.addSysLogger(logging.DEBUG)
```
%% Output
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[1], line 17
15 logger.addShellLogger(logging.DEBUG)
16 logger.logExceptions()
---> 17 logger.addRotatingLogFile_scriptName(__file__)
18 #logger.addSysLogger(logging.DEBUG)
NameError: name '__file__' is not defined
%% Cell type:code id: tags:
``` python
#creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
#moreOptions is implemented as a dict to add additional arguments to the query to the REST API
#For example the field toar1_category with its possible values Urban, RuralLowElevation, RuralHighElevation and Unclassified can be added.
#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
#or type_of_area with urban, suburban and rural on page 20 can be used
details4Query ={
#"toar1_category" : "Urban" #uncomment if wished:-)
#"toar1_category" : "RuralLowElevation" #uncomment if wished:-)
#"toar1_category" : "RuralHighElevation" #uncomment if wished:-)
#"type_of_area" : "Urban" #also test Rural, Suburban,
"type_of_area" : "Rural" #also test Rural, Suburban,
#"type_of_area" : "Suburban" #also test Rural, Suburban,
}
grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )
configs = dict()
for year in range(0,19):
valid_data = Config(
grid,
#TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="monthly"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name
#[ "mean", "dma8epax"],# will start one request after another other...
#[ "dma8epa_strict" ],
[ "mean" ],
details4Query
)
configs[f"test_ta{year}"] = valid_data
```
%% Cell type:code id: tags:
``` python
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)
for person, config in configs.items():
print(f"\nProcessing {person}:")
print(f"--------------------")
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
**config.moreOptions
)
for dataset, metadata in zip(datasets, metadatas):
dataset.to_netcdf(result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc")
print(metadata.get_id())
```
......
%% Cell type:code id: tags:
``` python
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path
from toargridding.toar_rest_client import AnalysisServiceDownload, Connection, EmptyDataError
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample
from toargridding.metadata_utilities import countryCodes
from toargridding.defaultLogging import toargridding_defaultLogging
#setup of logging
logger = toargridding_defaultLogging()
logger.addShellLogger(logging.DEBUG)
logger.logExceptions()
logger.addRotatingLogFile_scriptName(__file__)
logger.addRotatingLogFile(Path("log/produce_data_withOptional_country.log"))#we need to explicitly set a logfile
#logger.addSysLogger(logging.DEBUG)
```
%% Cell type:code id: tags:
``` python
#creation of request.
Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])
#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf
varName = "country"
stationCountries = countryCodes()
validCountries = stationCountries.getValidVocabular(controlName="Country Code", varName=varName)
grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )
configs = dict()
for country in validCountries:
valid_data = Config(
grid,
TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling="daily"),#possibly adopt range:-)
["mole_fraction_of_ozone_in_air"],#variable name
[ "dma8epa_strict" ],
{varName : country}
)
configs[f"test_ta{country}"] = valid_data
```
%% Cell type:code id: tags:
``` python
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes
stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)
# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)
createdFiles = []
for person, config in configs.items():
print(f"\nProcessing {person}:")
print(f"--------------------")
try:
datasets, metadatas = get_gridded_toar_data(
analysis_service=analysis_service,
grid=config.grid,
time=config.time,
variables=config.variables,
stats=config.stats,
**config.moreOptions
)
except EmptyDataError as e:
print("failed for ", person)
continue
for dataset, metadata in zip(datasets, metadatas):
outName = result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc"
dataset.to_netcdf(outName)
createdFiles.append(outName)
print(metadata.get_id())
```
%% Cell type:code id: tags:
``` python
##TODO: now we only need to combine all the obtained results...
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment