02_produce_data_manyStations.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "inclusion of packages and setting up logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "from datetime import datetime as dt\n",
    "from collections import namedtuple\n",
    "from pathlib import Path\n",
    "\n",
    "from toargridding.toar_rest_client import AnalysisServiceDownload, Connection\n",
    "from toargridding.grids import RegularGrid\n",
    "from toargridding.gridding import get_gridded_toar_data\n",
    "from toargridding.metadata import TimeSample\n",
    "\n",
    "from toargridding.defaultLogging import toargridding_defaultLogging\n",
    "\n",
    "\n",
    "from toargridding.defaultLogging import toargridding_defaultLogging\n",
    "#setup of logging\n",
    "logger = toargridding_defaultLogging()\n",
    "logger.addShellLogger(logging.DEBUG)\n",
    "logger.logExceptions()\n",
    "logger.addRotatingLogFile(Path(\"log/produce_data_manyStations.log\"))#we need to explicitly set a logfile"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "creation of configurations for the requests to the analysis service. The full duration is split in yearly requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#creation of request.\n",
    "\n",
    "Config = namedtuple(\"Config\", [\"grid\", \"time\", \"variables\", \"stats\"])\n",
    "\n",
    "grid = RegularGrid( lat_resolution=1.9, lon_resolution=2.5, )\n",
    "\n",
    "configs = dict()\n",
    "##for educational reasons the extraction of only two years is fine:-)\n",
    "#for year in range (0,19):\n",
    "for year in range (0,2):\n",
    "    valid_data = Config(\n",
    "        grid,\n",
    "        TimeSample( start=dt(2000+year,1,1), end=dt(2000+year,12,31), sampling=\"daily\"),#possibly adopt range:-)\n",
    "        [\"mole_fraction_of_ozone_in_air\"],#variable name\n",
    "        [ \"dma8epa_strict\" ]# change to dma8epa_strict\n",
    "        \n",
    "    )\n",
    "    \n",
    "    configs[f\"test_ta{year}\"] = valid_data\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## execution of toargridding. \n",
    "CAVE: the request takes over 30min per requested year. Therefore this cell needs to be executed at different times to check, if the results are ready for download.\n",
    "the processing is done on the server of the TOAR database.\n",
    "A restart of the cell continues the request to the REST API. The data are stored cached on the system to speed up the following analysis.\n",
    "The download can also take a few minutes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "stats_endpoint = \"https://toar-data.fz-juelich.de/api/v2/analysis/statistics/\"\n",
    "cache_basepath = Path(\"cache\")\n",
    "result_basepath = Path(\"results\")\n",
    "cache_basepath.mkdir(exist_ok=True)\n",
    "result_basepath.mkdir(exist_ok=True)\n",
    "analysis_service = AnalysisServiceDownload(stats_endpoint=stats_endpoint, cache_dir=cache_basepath, sample_dir=result_basepath, use_downloaded=True)\n",
    "\n",
    "\n",
    "#here we adopt the durations before, a request is stopped.\n",
    "#the default value is 30 minutes. \n",
    "#waiting up to 3h for one request\n",
    "analysis_service.connection.set_request_times(interval_min=45, max_wait_minutes=12*60)\n",
    "\n",
    "for person, config in configs.items():\n",
    "    print(f\"\\nProcessing {person}:\")\n",
    "    print(f\"--------------------\")\n",
    "    datasets, metadatas = get_gridded_toar_data(\n",
    "        analysis_service=analysis_service,\n",
    "        grid=config.grid,\n",
    "        time=config.time,\n",
    "        variables=config.variables,\n",
    "        stats=config.stats,\n",
    "        contributors_path=result_basepath\n",
    "    )\n",
    "\n",
    "    for dataset, metadata in zip(datasets, metadatas):\n",
    "        dataset.to_netcdf(result_basepath / f\"{metadata.get_id()}_{config.grid.get_id()}.nc\")\n",
    "        print(metadata.get_id())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "toargridding-8RVrxzmn-py3.11",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}