-
Sabine Schröder authoredSabine Schröder authored
get_timeseries_meta.sh 18.73 KiB
#!/bin/bash
# example script for transfering ozone timeseries from UBA stations
# (country='Germany' and network_name='UBA')
# station_numid (now: station_id) has changed --> see log file!
# also id will change for timeseries --> log mapping: old_id <-> new_id
#
# TBD:
#
# matching fields:
# old | new | new table
# ====================================|======================================|==========================|
# station_numid | station_id | timeseries |
# parameter_label | - (to be newly defined: label) | timeseries |
# parameter_name | variable_id | timeseries |
# parameter_attribute | additional_metadata | timeseries |???
# | - should some of it also go to label?| |???
# | - REA should go to source=modeled! | |???
# parameter_sampling_type | additional_metadata | timeseries |???
# parameter_measurement_method | measurement_method | timeseries |
# parameter_original_units | additional_metadata | timeseries |???
# parameter_calibration | pointing to annotations | timeseries_annotations |???
# parameter_contributor_shortname | pointing to role (contrib,org) | contacts/timeseries_roles|
# parameter_contributor | pointing to role (contrib,org) | contacts/timeseries_roles|
# parameter_contributor_country | pointing to role (contrib,org) | contacts/timeseries_roles|
# parameter_dataset_type | sampling_frequency | timeseries |
# parameter_status | additional_metadata | timeseries |???
# comments | pointing to annotations | timeseries_annotations |???
# creation_date | date_added | timeseries |
# modification_date | date_modified | timeseries |
# data_start_date | data_start_date | timeseries |
# data_end_date | data_end_date | timeseries |
# parameter_pi | pointing to role (PI, person) | contacts/timeseries_roles|
# parameter_pi_email | pointing to role (PI, person) | contacts/timeseries_roles|
# parameter_instrument_manufacturer | additional_metadata | timeseries |???
# parameter_instrument_model | additional_metadata | timeseries |???
#
# converting from timestamp without timezone to timestamp with timezone
# converting sampling_frequency to controlled vocabulary (SF_vocabulary) -- UBA: 0 (Hourly)
# converting source to controlled vocabulary (DS_vocabulary) -- UBA (see below)
# converting access_rights to controlled vocabulary (DA_vocabulary) -- UBA: 0 (ByAttribution)
# converting aggregation to controlled vocabulary (AT_vocabulary) -- UBA: 1 (Mean1Of2)
# converting measurement_method to controlled vocabulary (MM_vocabulary)
# converting kind to controlled vocabulary (OK_vocabulary) -- UBA: 1 (Government)
# converting role to controlled vocabulary (RC_vocabulary) -- UBA: 1 (PrincipalInvestigator) and 3 (Contributor)
#
# new (not available in old DB):
# - order (now set to 1)
# - access_rights (now set to 0 (ByAttribution) -- DA_vocabulary)
# - aggregation (UBA: 0 (Mean), if label == REA, 1 (Mean1Of2) else -- AT_vocabulary)
# - sampling height (now set to 2 (m))
# - programme_id (now set to 0 (None-programme))
# - source (UBA: 0 (Model), if label == REA, 1 (Measurement) else -- MM_vocabulary)
# It was double-checked that all data with parameter_contributor_shortname='MIUB' have label='REA'!
# - kind (UBA: 1 (Government) -- OK_vocabulary)
# - role (UBA: 1 (PrincipalInvestigator) and 3 (Contributor) -- RC_vocabulary)
# - status (UBA: 2 (unknown for PI) and 0 (active for Contributor) -- RS_vocabulary)
#
# for UBA: use parameter_attribute for LABEL information
# --> measurements: LABEL=''
# --> models: LABEL='REA'
# ==> they have to be distinguishable (AND unique constraint on (station_id,variable_id,label)
#
# author: s.schroeder@fz-juelich.de
# date: 2020-07-05
PARAMETER_NAME=temp
SAMPLING=hourly
ORDERED_COLUMNS="id,station_numid,parameter_label,parameter_name,parameter_attribute,parameter_sampling_type,\
parameter_measurement_method,parameter_original_units,parameter_calibration,parameter_contributor_shortname,\
parameter_contributor,parameter_contributor_country,parameter_dataset_type,parameter_status,comments,\
creation_date,modification_date,data_start_date,data_end_date,parameter_pi,parameter_pi_email,\
parameter_instrument_manufacturer,parameter_instrument_model"
#just for now (see above)
ORDER=1
ACCESS_RIGHTS=0
SAMPLING_HEIGHT=2
SAMPLING_FREQUENCY=0
PROGRAMME_ID=0
KIND=1
PI_ROLE=1
PI_STATUS=2
CONTRIBUTOR_ROLE=3
CONTRIBUTOR_STATUS=0
AGGREGATION=1
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM variables\
WHERE name='$PARAMETER_NAME';"`
VARIABLE_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
while read -a line
do
OLD_STATIONID=${line[0]}
NEW_STATIONID=${line[1]}
COUNTER=0
while IFS='|' read -a row
do
if [ $COUNTER -eq 2 ]
then
ID=${row[0]}
echo "ID: $ID"
STATION_NUMID=${row[1]}
echo "STATION_NUMID: $STATION_NUMID"
PARAMETER_LABEL=${row[2]}
echo "PARAMETER_LABEL: $PARAMETER_LABEL"
# We already set PARAMETER_NAME manually!
# The following two lines can be used when debugging...
# (and we do not want to overwrite PARAMETER_NAME with untrimmed value)
# PARAMETER_NAME=${row[3]}
# echo "PARAMETER_NAME: $PARAMETER_NAME"
PARAMETER_ATTRIBUTE=${row[4]}
echo "PARAMETER_ATTRIBUTE: $PARAMETER_ATTRIBUTE"
PARAMETER_SAMPLING_TYPE=${row[5]}
echo "PARAMETER_SAMPLING_TYPE: $PARAMETER_SAMPLING_TYPE"
PARAMETER_MEASUREMENT_METHOD=${row[6]}
echo "PARAMETER_MEASUREMENT_METHOD: $PARAMETER_MEASUREMENT_METHOD"
PARAMETER_ORIGINAL_UNITS=${row[7]}
echo "PARAMETER_ORIGINAL_UNITS: $PARAMETER_ORIGINAL_UNITS"
PARAMETER_CALIBRATION=${row[8]}
echo "PARAMETER_CALIBRATION: $PARAMETER_CALIBRATION"
PARAMETER_CONTRIBUTOR_SHORTNAME=${row[9]}
echo "PARAMETER_CONTRIBUTOR_SHORTNAME: $PARAMETER_CONTRIBUTOR_SHORTNAME"
PARAMETER_CONTRIBUTOR=${row[10]}
echo "PARAMETER_CONTRIBUTOR: $PARAMETER_CONTRIBUTOR"
PARAMETER_CONTRIBUTOR_COUNTRY=${row[11]}
echo "PARAMETER_CONTRIBUTOR_COUNTRY: $PARAMETER_CONTRIBUTOR_COUNTRY"
PARAMETER_DATASET_TYPE=${row[12]}
echo "PARAMETER_DATASET_TYPE: $PARAMETER_DATASET_TYPE"
PARAMETER_STATUS=${row[13]}
echo "PARAMETER_STATUS: $PARAMETER_STATUS"
COMMENTS=${row[14]}
echo "COMMENTS: $COMMENTS"
CREATION_DATE=${row[15]}
echo "CREATION_DATE: $CREATION_DATE"
MODIFICATION_DATE=${row[16]}
echo "MODIFICATION_DATE: $MODIFICATION_DATE"
DATA_START_DATE=${row[17]}
echo "DATA_START_DATE: $DATA_START_DATE"
DATA_END_DATE=${row[18]}
echo "DATA_END_DATE: $DATA_END_DATE"
PARAMETER_PI=${row[19]}
echo "PARAMETER_PI: $PARAMETER_PI"
PARAMETER_PI_EMAIL=${row[20]}
echo "PARAMETER_PI_EMAIL: $PARAMETER_PI_EMAIL"
PARAMETER_INSTRUMENT_MANUFACTURER=${row[21]}
echo "PARAMETER_INSTRUMENT_MANUFACTURER: $PARAMETER_INSTRUMENT_MANUFACTURER"
PARAMETER_INSTRUMENT_MODEL=${row[22]}
echo "PARAMETER_INSTRUMENT_MODEL: $PARAMETER_INSTRUMENT_MODEL"
fi
let COUNTER=COUNTER+1
done < <(echo "SELECT ${ORDERED_COLUMNS} FROM parameter_series WHERE station_numid=${OLD_STATIONID} AND parameter_name='${PARAMETER_NAME}' AND parameter_dataset_type='${SAMPLING}'" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder)
if [ "$ID" != "(0 rows)" ]
then
# 1. contacts/timeseries_roles
# ============================
# 1. a) persons
# =============
EMAIL=`echo -n "${PARAMETER_PI_EMAIL//[[:space:]]/}"`
NAME=`echo -e $PARAMETER_PI | awk '{$1=$1};1'`
PHONE=''
ISPRIVATE=True
# at least for UBA data: email='unknown', name='unknown' given for undefined
# ==> to be checked for other providers!!!
PERSON_ID=-1
if [ "$EMAIL" != "unknown" ] || [ "$NAME" != "unknown" ]
then
# check, whether person exists already in database
# res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM persons\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM persons\
WHERE name='$NAME' AND EMAIL='$EMAIL';"`
PERS_EXISTED=1
PERSON_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
if [ "$PERSON_ID" == "(0" ]
then
PERS_EXISTED=0
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO persons\
(name,email,phone,isprivate) VALUES
('$NAME','$EMAIL','$PHONE',$ISPRIVATE)\
RETURNING id;"`
PERSON_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
fi
fi
# 1. b) organisations
# ===================
NAME=`echo -n "${PARAMETER_CONTRIBUTOR_SHORTNAME//[[:space:]]/}"`
LONGNAME=`echo -e $PARAMETER_CONTRIBUTOR | awk '{$1=$1};1'`
COUNTRY=`echo -e $PARAMETER_CONTRIBUTOR_COUNTRY | awk '{$1=$1};1'`
CITY=''
POSTCODE=''
STREET_ADDRESS=''
HOMEPAGE=''
ORGANISATION_ID=-1
if [ "$NAME" != "unknown" ] || [ "$LONGNAME" != "unknown" ]
then
# check, whether organisation exists already in database
# res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM organisations\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM organisations\
WHERE name='$NAME' AND longname='$LONGNAME';"`
ORG_EXISTED=1
ORGANISATION_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
if [ "$ORGANISATION_ID" == "(0" ]
then
ORG_EXISTED=0
# res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO organisations\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO organisations\
(name,longname,kind,city,postcode,street_address,country,homepage) VALUES \
('$NAME','$LONGNAME',$KIND,'$CITY','$POSTCODE','$STREET_ADDRESS','$COUNTRY','$HOMEPAGE') \
RETURNING id;"`
ORGANISATION_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
fi
fi
# 1. c) contacts
# ==============
if [ $PERSON_ID -ne -1 ]
then
if [ $PERS_EXISTED -eq 1 ]
then
# res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM contacts\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM contacts\
WHERE person_id=$PERSON_ID;"`
else
# res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO contacts\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO contacts\
(person_id,organisation_id) VALUES \
($PERSON_ID,0) \
RETURNING id;"`
fi
PI_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
fi
if [ $ORGANISATION_ID -ne -1 ]
then
if [ $ORG_EXISTED -eq 1 ]
then
# res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM contacts\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM contacts\
WHERE organisation_id=$ORGANISATION_ID;"`
else
# res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO contacts\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO contacts\
(person_id,organisation_id) VALUES \
(0,$ORGANISATION_ID) \
RETURNING id;"`
fi
CONTRIBUTOR_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
fi
# 1. d) timeseries_roles
# ======================
if [ $PERSON_ID -ne -1 ]
then
# check, whether timeseries_role already exists
# res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM timeseries_roles\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM timeseries_roles\
WHERE role=$PI_ROLE AND status=$PI_STATUS AND contact_id=$PI_ID;"`
ROLE1=$(echo $res | cut -f2 | cut -f3 -d' ')
if [ "$ROLE1" == "(0" ]
then
# res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_roles\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_roles\
(role,status,contact_id) VALUES \
($PI_ROLE,$PI_STATUS,$PI_ID)\
RETURNING id;"`
ROLE1=$(echo $res | cut -f2 | cut -f3 -d' ')
fi
fi
if [ $ORGANISATION_ID -ne -1 ]
then
# check, whether timeseries_role already exists
# res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM timeseries_roles\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM timeseries_roles\
WHERE role=$CONTRIBUTOR_ROLE AND status=$CONTRIBUTOR_STATUS AND contact_id=$CONTRIBUTOR_ID;"`
ROLE2=$(echo $res | cut -f2 | cut -f3 -d' ')
if [ "$ROLE2" == "(0" ]
then
# res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_roles\
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_roles\
(role,status,contact_id) VALUES \
($CONTRIBUTOR_ROLE,$CONTRIBUTOR_STATUS,$CONTRIBUTOR_ID) \
RETURNING id;"`
ROLE2=$(echo $res | cut -f2 | cut -f3 -d' ')
fi
fi
# 2. timeseries_annotations
# =========================
# parameter_calibration: none given for UBA measured ozone
# comments: none given for UBA measured ozone
# 3. timeseries
# =============
# "order" (column name of table timeseries) is a reserved word of PostgreSQL!!!
# fill additional_metadata
# accessing special fields within PostgreSQL is then possible via (f. ex.)
# select additional_metadata->>'original_units' from timeseries;
TRIMMED_PARAMETER_ATTRIBUTE=`echo -n "${PARAMETER_ATTRIBUTE//[[:space:]]/}"`
TRIMMED_PARAMETER_SAMPLING_TYPE=`echo -n "${PARAMETER_SAMPLING_TYPE//[[:space:]]/}"`
TRIMMED_PARAMETER_ORIGINAL_UNITS=`echo -e $PARAMETER_ORIGINAL_UNITS | awk '{$1=$1};1'`
TRIMMED_PARAMETER_STATUS=`echo -n "${PARAMETER_STATUS//[[:space:]]/}"`
TRIMMED_PARAMETER_INSTRUMENT_MANUFACTURER=`echo -e $PARAMETER_INSTRUMENT_MANUFACTURER | awk '{$1=$1};1'`
TRIMMED_PARAMETER_INSTRUMENT_MODEL=`echo -e $PARAMETER_INSTRUMENT_MODEL | awk '{$1=$1};1'`
ADDITIONAL_METADATA="{\"parameter_attribute\":\"$TRIMMED_PARAMETER_ATTRIBUTE\",\
\"parameter_sampling_type\":\"$TRIMMED_PARAMETER_SAMPLING_TYPE\",\
\"parameter_original_units\":\"$TRIMMED_PARAMETER_ORIGINAL_UNITS\",\
\"parameter_status\":\"$TRIMMED_PARAMETER_STATUS\",\
\"parameter_instrument_manufacturer\":\"$TRIMMED_PARAMETER_INSTRUMENT_MANUFACTURER\",\
\"parameter_instrument_model\":\"$TRIMMED_PARAMETER_INSTRUMENT_MODEL\"}"
DATE_ADDED=$CREATION_DATE
DATE_MODIFIED=$MODIFICATION_DATE
# what about lower_case?????
if [[ $PARAMETER_MEASUREMENT_METHOD == *"UV"* ]]
then
MEASUREMENT_METHOD=0
else
MEASUREMENT_METHOD=1
fi
SOURCE=1
if [ "$TRIMMED_PARAMETER_ATTRIBUTE" == "REA" ]
then
SOURCE=0
fi
LABEL=$TRIMMED_PARAMETER_ATTRIBUTE
# res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries \
res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries\
(label,\"order\",access_rights,sampling_frequency,aggregation,data_start_date,data_end_date,sampling_height,\
additional_metadata,date_added,date_modified,station_id,variable_id,source,measurement_method,programme_id) VALUES \
('$LABEL',$ORDER,$ACCESS_RIGHTS,$SAMPLING_FREQUENCY,$AGGREGATION,'$DATA_START_DATE','$DATA_END_DATE',$SAMPLING_HEIGHT,\
'$ADDITIONAL_METADATA','$DATE_ADDED','$DATE_MODIFIED',$NEW_STATIONID,$VARIABLE_ID,$SOURCE,$MEASUREMENT_METHOD,$PROGRAMME_ID) \
RETURNING id;"`
# log new id
NEW_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
echo "$ID $NEW_ID $PARAMETER_NAME $SAMPLING" >>"timeseries_oldID_newID_${PARAMETER_NAME}_${SAMPLING}.txt"
# 4. many-to-many relations
# =========================
# 4. a) timeseries_timeseries_roles
# =================================
if [ $PERSON_ID -ne -1 ]
then
# psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_timeseries_roles\
psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_timeseries_roles\
(timeseries_id,role_id) VALUES \
($NEW_ID,$ROLE1);"
fi
if [ $ORGANISATION_ID -ne -1 ]
then
# psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_timeseries_roles\
psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_timeseries_roles\
(timeseries_id,role_id) VALUES \
($NEW_ID,$ROLE2);"
fi
# 4. b) timeseries_timeseries_annotations
# =======================================
# for UBA: none (see above)
fi
done < stations_oldID_newID.txt