Skip to content
Snippets Groups Projects
contributor_metadata.ipynb 5.51 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Investigation of Metadata\n",
    "This notebook has been created to illustrate possible ways to include contributors into the metadata of a dataset. \n",
    "The default way for an offline execution is the creation of a contributors file.\n",
    "\n",
    "Adding all timeseries IDs to the metadata field can be done, but neither looks nice nor is it supported by the contributors endpoint.\n",
    "The conversion into names, is a simple approach, that does not include programs and does not sort organizations and persons nor does it include the affiliations into the organizations.\n",
    "This request takes quite long and is therefore not advised."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Download Example Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime as dt\n",
    "from pathlib import Path\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "from toargridding.grids import RegularGrid\n",
    "from toargridding.toar_rest_client import (\n",
    "    AnalysisServiceDownload,\n",
    "    STATION_LAT,\n",
    "    STATION_LON,\n",
    ")\n",
    "from toargridding.metadata import Metadata, TimeSample, AnalysisRequestResult, Coordinates\n",
    "from toargridding.variables import Coordinate\n",
    "\n",
Carsten Hinz's avatar
Carsten Hinz committed
    "from toargridding.contributors import contributions_manager_by_id, contributions_manager_by_name\n",
    "\n",
    "import logging\n",
    "from toargridding.defaultLogging import toargridding_defaultLogging\n",
    "#setup of logging\n",
    "logger = toargridding_defaultLogging()\n",
    "logger.addShellLogger(logging.DEBUG)\n",
    "logger.logExceptions()\n",
    "\n",
    "endpoint = \"https://toar-data.fz-juelich.de/api/v2/analysis/statistics/\"\n",
    "#starts in directory [path/to/toargridding]/tests\n",
    "#maybe adopt the toargridding_base_path for your machine.\n",
    "toargridding_base_path = Path(\".\")\n",
    "cache_dir = toargridding_base_path / \"cache\"\n",
    "data_download_dir = toargridding_base_path / \"results\"\n",
    "\n",
    "analysis_service = AnalysisServiceDownload(endpoint, cache_dir, data_download_dir, use_downloaded=True)\n",
    "my_grid = RegularGrid(1.9, 2.5)\n",
    "\n",
    "time = TimeSample(dt(2016,1,1), dt(2016,2,28), \"daily\")\n",
    "metadata = Metadata.construct(\"mole_fraction_of_ozone_in_air\", time, \"mean\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Contributors as dedicated file (Default for offline operation)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this cell can runs longer than 30minutes\n",
    "data = analysis_service.get_data(metadata)\n",
    "\n",
    "# create contributors endpoint and write result to metadata\n",
Carsten Hinz's avatar
Carsten Hinz committed
    "contrib = contributions_manager_by_id(metadata.get_id(), data_download_dir)\n",
    "contrib.extract_contributors_from_data_frame(data.stations_data)\n",
    "metadata.contributors_metadata_field = contrib.setup_contributors_endpoint_for_metadata()\n",
    "ds = my_grid.as_xarray(data)\n",
    "#store dataset\n",
Carsten Hinz's avatar
Carsten Hinz committed
    "ds.to_netcdf(data_download_dir / f\"endpoint+id-file_{metadata.get_id()}_{my_grid.get_id()}.nc\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Contributors as single line request"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this cell can runs longer than 30minutes\n",
    "data = analysis_service.get_data(metadata)\n",
    "\n",
    "# create contributors endpoint and write result to metadata\n",
Carsten Hinz's avatar
Carsten Hinz committed
    "contrib = contributions_manager_by_id(metadata.get_id(), data_download_dir)\n",
    "contrib.inline_mode = True\n",
    "contrib.extract_contributors_from_data_frame(data.stations_data)\n",
    "metadata.contributors_metadata_field = contrib.setup_contributors_endpoint_for_metadata()\n",
    "ds = my_grid.as_xarray(data)\n",
    "#store dataset\n",
    "ds.to_netcdf(data_download_dir / f\"request_in_field_{metadata.get_id()}_{my_grid.get_id()}.nc\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Contributors by name within field"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this cell can runs longer than 30minutes\n",
    "data = analysis_service.get_data(metadata)\n",
    "\n",
    "# create contributors endpoint and write result to metadata\n",
    "contrib = contributions_manager_by_name(metadata.get_id(), data_download_dir)\n",
    "contrib.inline_mode = True\n",
    "contrib.extract_contributors_from_data_frame(data.stations_data)\n",
    "metadata.contributors_metadata_field = contrib.setup_contributors_endpoint_for_metadata()\n",
    "ds = my_grid.as_xarray(data)\n",
    "#store dataset\n",
    "ds.to_netcdf(data_download_dir / f\"by_name_{metadata.get_id()}_{my_grid.get_id()}.nc\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "toargridding-g-KQ1Hyq-py3.10",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}