In [None]:
from datetime import datetime as dt
from collections import namedtuple
from pathlib import Path

from toargridding.toar_rest_client import AnalysisServiceDownload, Connection, EmptyDataError
from toargridding.grids import RegularGrid
from toargridding.gridding import get_gridded_toar_data
from toargridding.metadata import TimeSample

from toargridding.metadata_utilities import countryCodes

from toargridding.defaultLogging import toargridding_defaultLogging
#setup of logging
logger = toargridding_defaultLogging()
logger.addShellLogger(logging.DEBUG)
logger.logExceptions()
logger.addRotatingLogFile_scriptName(__file__)
logger.addRotatingLogFile(Path("log/produce_data_withOptional_country.log"))#we need to explicitly set a logfile
#logger.addSysLogger(logging.DEBUG)

In [None]:
#creation of request.

Config = namedtuple("Config", ["grid", "time", "variables", "stats","moreOptions"])

#see page 18 in https://toar-data.fz-juelich.de/sphinx/TOAR_UG_Vol03_Database/build/latex/toardatabase--userguide.pdf

varName = "country"
stationCountries = countryCodes()
validCountries = stationCountries.getValidVocabular(controlName="Country Code", varName=varName)

grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )

configs = dict()
for country in validCountries:
    valid_data = Config(
        grid,
        TimeSample( start=dt(2000,1,1), end=dt(2018,12,31), sampling="daily"),#possibly adopt range:-)
        ["mole_fraction_of_ozone_in_air"],#variable name
        [ "dma8epa_strict" ],
        {varName : country}
    )
    
    configs[f"test_ta{country}"] = valid_data


In [None]:
#CAVE: this cell runs about 45minutes per requested year. therefore we increase the waiting duration to 1h per request.
#the processing is done on the server of the TOAR database.
#a restart of the cell continues the request to the REST API if the requested data are ready for download
# The download can also take a few minutes

stats_endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
cache_basepath = Path("cache")
result_basepath = Path("results")
cache_basepath.mkdir(exist_ok=True)
result_basepath.mkdir(exist_ok=True)
analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)


# maybe adopt the interval for requesting the results and the total duration, before the client pauses the requests.
# as the requests take about 45min, it is more suitable to wait 60min before timing out the requests than the original 30min.
analysis_service.connection.setRequestTimes(interval_min=5, maxWait_min=60)

createdFiles = []

for person, config in configs.items():
    print(f"\nProcessing {person}:")
    print(f"--------------------")
    try:
        datasets, metadatas = get_gridded_toar_data(
            analysis_service=analysis_service,
            grid=config.grid,
            time=config.time,
            variables=config.variables,
            stats=config.stats,
            contributors_path=result_basepath
            **config.moreOptions
        )
    except EmptyDataError as e:
        print("failed for ", person)
        continue

    for dataset, metadata in zip(datasets, metadatas):
        outName = result_basepath / f"{metadata.get_id()}_{config.grid.get_id()}.nc"
        dataset.to_netcdf(outName)
        createdFiles.append(outName)
        print(metadata.get_id())

In [None]:
##TODO: now we only need to combine all the obtained results...
