# Investigation of Metadata
This notebook has been created to illustrate possible ways to include contributors into the metadata of a dataset. 
The default way for an offline execution is the creation of a contributors file.

Adding all timeseries IDs to the metadata field can be done, but neither looks nice nor is it supported by the contributors endpoint.
The conversion into names, is a simple approach, that does not include programs and does not sort organizations and persons nor does it include the affiliations into the organizations.
This request takes quite long and is therefore not advised.

### Download Example Dataset

In [None]:
from datetime import datetime as dt
from pathlib import Path

import pandas as pd

from toargridding.grids import RegularGrid
from toargridding.toar_rest_client import (
    AnalysisServiceDownload,
    STATION_LAT,
    STATION_LON,
)
from toargridding.metadata import Metadata, TimeSample, AnalysisRequestResult, Coordinates
from toargridding.variables import Coordinate

from toargridding.contributors import contributions_manager_by_id, contributions_manager_by_name

import logging
from toargridding.defaultLogging import toargridding_defaultLogging
#setup of logging
logger = toargridding_defaultLogging()
logger.addShellLogger(logging.DEBUG)
logger.logExceptions()

endpoint = "https://toar-data.fz-juelich.de/api/v2/analysis/statistics/"
#starts in directory [path/to/toargridding]/tests
#maybe adopt the toargridding_base_path for your machine.
toargridding_base_path = Path(".")
cache_dir = toargridding_base_path / "cache"
data_download_dir = toargridding_base_path / "results"

analysis_service = AnalysisServiceDownload(endpoint, cache_dir, data_download_dir, use_downloaded=True)
my_grid = RegularGrid(1.9, 2.5)

time = TimeSample(dt(2016,1,1), dt(2016,2,28), "daily")
metadata = Metadata.construct("mole_fraction_of_ozone_in_air", time, "mean")


# Contributors as dedicated file (Default for offline operation)

In [None]:
# this cell can runs longer than 30minutes
data = analysis_service.get_data(metadata)

# create contributors endpoint and write result to metadata
contrib = contributions_manager_by_id(metadata.get_id(), data_download_dir)
contrib.extract_contributors_from_data_frame(data.stations_data)
metadata.contributors_metadata_field = contrib.setup_contributors_endpoint_for_metadata()
ds = my_grid.as_xarray(data)
#store dataset
ds.to_netcdf(data_download_dir / f"endpoint+id-file_{metadata.get_id()}_{my_grid.get_id()}.nc")

# Contributors as single line request

In [None]:
# this cell can runs longer than 30minutes
data = analysis_service.get_data(metadata)

# create contributors endpoint and write result to metadata
contrib = contributions_manager_by_id(metadata.get_id(), data_download_dir)
contrib.inline_mode = True
contrib.extract_contributors_from_data_frame(data.stations_data)
metadata.contributors_metadata_field = contrib.setup_contributors_endpoint_for_metadata()
ds = my_grid.as_xarray(data)
#store dataset
ds.to_netcdf(data_download_dir / f"request_in_field_{metadata.get_id()}_{my_grid.get_id()}.nc")

# Contributors by name within field

In [None]:
# this cell can runs longer than 30minutes
data = analysis_service.get_data(metadata)

# create contributors endpoint and write result to metadata
contrib = contributions_manager_by_name(metadata.get_id(), data_download_dir)
contrib.inline_mode = True
contrib.extract_contributors_from_data_frame(data.stations_data)
metadata.contributors_metadata_field = contrib.setup_contributors_endpoint_for_metadata()
ds = my_grid.as_xarray(data)
#store dataset
ds.to_netcdf(data_download_dir / f"by_name_{metadata.get_id()}_{my_grid.get_id()}.nc")