inclusion of packages and setting up logging

In [None]:
import logging
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path

from toargridding.toar_rest_client import AnalysisServiceDownload, Connection
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample

from toargridding.defaultLogging import toargridding_defaultLogging


from toargridding.defaultLogging import toargridding_defaultLogging
#setup of logging
logger = toargridding_defaultLogging()
logger.addShellLogger(logging.DEBUG)
logger.logExceptions()
logger.addRotatingLogFile(Path("log/produce_data_manyStations.log"))#we need to explicitly set a logfile

creation of configurations for the requests to the analysis service. The full duration is split in yearly requests

In [None]:
#creation of request.

Config = namedtuple("Config", ["grid", "time", "variables", "stats"])

grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )

configs = dict()
##for educational reasons the extraction of only two years is fine:-)
#for year in range (0,19):
for year in range (0,2):
    valid_data = Config(
        grid,
        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling="daily"),#possibly adopt range:-)
        ["mole_fraction_of_ozone_in_air"],#variable name
        [ "dma8epa_strict" ]# change to dma8epa_strict
        
    )
    
    configs[f"test_ta{year}"] = valid_data


## execution of toargridding. 
CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download.
the processing is done on the server of the TOAR database.
A restart of the cell continues the request to the REST API. The data are stored cached on the system to speed up the following analysis.
The download can also take a few minutes

In [None]:

stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)


#here we adopt the durations before, a request is stopped.
#the default value is 30 minutes. 
#waiting up to 3h for one request
analysis_service.connection.set_request_times(interval_min=45, max_wait_minutes=12*60)

for person, config in configs.items():
    print(f"\nProcessing {person}:")
    print(f"--------------------")
    datasets, metadatas = get_gridded_toar_data(
        analysis_service=analysis_service,
        grid=config.grid,
        time=config.time,
        variables=config.variables,
        stats=config.stats,
        contributors_path=result_basepath
    )

    for dataset, metadata in zip(datasets, metadatas):
        dataset.to_netcdf(result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc")
        print(metadata.get_id())