#!/bin/bash # example script for transfering ozone timeseries from UBA stations # (country='Germany' and network_name='UBA') # station_numid (now: station_id) has changed --> see log file! # also id will change for timeseries --> log mapping: old_id <-> new_id # # TBD: # # matching fields: # old | new | new table # ====================================|======================================|==========================| # station_numid | station_id | timeseries | # parameter_label | - (to be newly defined: label) | timeseries | # parameter_name | variable_id | timeseries | # parameter_attribute | additional_metadata | timeseries |??? # | - should some of it also go to label?| |??? # | - REA should go to source=modeled! | |??? # parameter_sampling_type | additional_metadata | timeseries |??? # parameter_measurement_method | measurement_method | timeseries | # parameter_original_units | additional_metadata | timeseries |??? # parameter_calibration | pointing to annotations | timeseries_annotations |??? # parameter_contributor_shortname | pointing to role (contrib,org) | contacts/timeseries_roles| # parameter_contributor | pointing to role (contrib,org) | contacts/timeseries_roles| # parameter_contributor_country | pointing to role (contrib,org) | contacts/timeseries_roles| # parameter_dataset_type | sampling_frequency | timeseries | # parameter_status | additional_metadata | timeseries |??? # comments | pointing to annotations | timeseries_annotations |??? # creation_date | date_added | timeseries | # modification_date | date_modified | timeseries | # data_start_date | data_start_date | timeseries | # data_end_date | data_end_date | timeseries | # parameter_pi | pointing to role (PI, person) | contacts/timeseries_roles| # parameter_pi_email | pointing to role (PI, person) | contacts/timeseries_roles| # parameter_instrument_manufacturer | additional_metadata | timeseries |??? # parameter_instrument_model | additional_metadata | timeseries |??? # # converting from timestamp without timezone to timestamp with timezone # converting sampling_frequency to controlled vocabulary (SF_vocabulary) -- UBA: 0 (Hourly) # converting source to controlled vocabulary (DS_vocabulary) -- UBA (see below) # converting access_rights to controlled vocabulary (DA_vocabulary) -- UBA: 0 (ByAttribution) # converting aggregation to controlled vocabulary (AT_vocabulary) -- UBA: 1 (Mean1Of2) # converting measurement_method to controlled vocabulary (MM_vocabulary) # converting kind to controlled vocabulary (OK_vocabulary) -- UBA: 1 (Government) # converting role to controlled vocabulary (RC_vocabulary) -- UBA: 1 (PrincipalInvestigator) and 3 (Contributor) # # new (not available in old DB): # - order (now set to 1) # - access_rights (now set to 0 (ByAttribution) -- DA_vocabulary) # - aggregation (UBA: 0 (Mean), if label == REA, 1 (Mean1Of2) else -- AT_vocabulary) # - sampling height (now set to 2 (m)) # - programme_id (now set to 0 (None-programme)) # - source (UBA: 0 (Model), if label == REA, 1 (Measurement) else -- MM_vocabulary) # It was double-checked that all data with parameter_contributor_shortname='MIUB' have label='REA'! # - kind (UBA: 1 (Government) -- OK_vocabulary) # - role (UBA: 1 (PrincipalInvestigator) and 3 (Contributor) -- RC_vocabulary) # - status (UBA: 2 (unknown for PI) and 0 (active for Contributor) -- RS_vocabulary) # # for UBA: use parameter_attribute for LABEL information # --> measurements: LABEL='' # --> models: LABEL='REA' # ==> they have to be distinguishable (AND unique constraint on (station_id,variable_id,label) # # author: s.schroeder@fz-juelich.de # date: 2020-07-05 PARAMETER_NAME=temp SAMPLING=hourly ORDERED_COLUMNS="id,station_numid,parameter_label,parameter_name,parameter_attribute,parameter_sampling_type,\ parameter_measurement_method,parameter_original_units,parameter_calibration,parameter_contributor_shortname,\ parameter_contributor,parameter_contributor_country,parameter_dataset_type,parameter_status,comments,\ creation_date,modification_date,data_start_date,data_end_date,parameter_pi,parameter_pi_email,\ parameter_instrument_manufacturer,parameter_instrument_model" #just for now (see above) ORDER=1 ACCESS_RIGHTS=0 SAMPLING_HEIGHT=2 SAMPLING_FREQUENCY=0 PROGRAMME_ID=0 KIND=1 PI_ROLE=1 PI_STATUS=2 CONTRIBUTOR_ROLE=3 CONTRIBUTOR_STATUS=0 AGGREGATION=1 res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM variables\ WHERE name='$PARAMETER_NAME';"` VARIABLE_ID=$(echo $res | cut -f2 | cut -f3 -d' ') while read -a line do OLD_STATIONID=${line[0]} NEW_STATIONID=${line[1]} COUNTER=0 while IFS='|' read -a row do if [ $COUNTER -eq 2 ] then ID=${row[0]} echo "ID: $ID" STATION_NUMID=${row[1]} echo "STATION_NUMID: $STATION_NUMID" PARAMETER_LABEL=${row[2]} echo "PARAMETER_LABEL: $PARAMETER_LABEL" # We already set PARAMETER_NAME manually! # The following two lines can be used when debugging... # (and we do not want to overwrite PARAMETER_NAME with untrimmed value) # PARAMETER_NAME=${row[3]} # echo "PARAMETER_NAME: $PARAMETER_NAME" PARAMETER_ATTRIBUTE=${row[4]} echo "PARAMETER_ATTRIBUTE: $PARAMETER_ATTRIBUTE" PARAMETER_SAMPLING_TYPE=${row[5]} echo "PARAMETER_SAMPLING_TYPE: $PARAMETER_SAMPLING_TYPE" PARAMETER_MEASUREMENT_METHOD=${row[6]} echo "PARAMETER_MEASUREMENT_METHOD: $PARAMETER_MEASUREMENT_METHOD" PARAMETER_ORIGINAL_UNITS=${row[7]} echo "PARAMETER_ORIGINAL_UNITS: $PARAMETER_ORIGINAL_UNITS" PARAMETER_CALIBRATION=${row[8]} echo "PARAMETER_CALIBRATION: $PARAMETER_CALIBRATION" PARAMETER_CONTRIBUTOR_SHORTNAME=${row[9]} echo "PARAMETER_CONTRIBUTOR_SHORTNAME: $PARAMETER_CONTRIBUTOR_SHORTNAME" PARAMETER_CONTRIBUTOR=${row[10]} echo "PARAMETER_CONTRIBUTOR: $PARAMETER_CONTRIBUTOR" PARAMETER_CONTRIBUTOR_COUNTRY=${row[11]} echo "PARAMETER_CONTRIBUTOR_COUNTRY: $PARAMETER_CONTRIBUTOR_COUNTRY" PARAMETER_DATASET_TYPE=${row[12]} echo "PARAMETER_DATASET_TYPE: $PARAMETER_DATASET_TYPE" PARAMETER_STATUS=${row[13]} echo "PARAMETER_STATUS: $PARAMETER_STATUS" COMMENTS=${row[14]} echo "COMMENTS: $COMMENTS" CREATION_DATE=${row[15]} echo "CREATION_DATE: $CREATION_DATE" MODIFICATION_DATE=${row[16]} echo "MODIFICATION_DATE: $MODIFICATION_DATE" DATA_START_DATE=${row[17]} echo "DATA_START_DATE: $DATA_START_DATE" DATA_END_DATE=${row[18]} echo "DATA_END_DATE: $DATA_END_DATE" PARAMETER_PI=${row[19]} echo "PARAMETER_PI: $PARAMETER_PI" PARAMETER_PI_EMAIL=${row[20]} echo "PARAMETER_PI_EMAIL: $PARAMETER_PI_EMAIL" PARAMETER_INSTRUMENT_MANUFACTURER=${row[21]} echo "PARAMETER_INSTRUMENT_MANUFACTURER: $PARAMETER_INSTRUMENT_MANUFACTURER" PARAMETER_INSTRUMENT_MODEL=${row[22]} echo "PARAMETER_INSTRUMENT_MODEL: $PARAMETER_INSTRUMENT_MODEL" fi let COUNTER=COUNTER+1 done < <(echo "SELECT ${ORDERED_COLUMNS} FROM parameter_series WHERE station_numid=${OLD_STATIONID} AND parameter_name='${PARAMETER_NAME}' AND parameter_dataset_type='${SAMPLING}'" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder) if [ "$ID" != "(0 rows)" ] then # 1. contacts/timeseries_roles # ============================ # 1. a) persons # ============= EMAIL=`echo -n "${PARAMETER_PI_EMAIL//[[:space:]]/}"` NAME=`echo -e $PARAMETER_PI | awk '{$1=$1};1'` PHONE='' ISPRIVATE=True # at least for UBA data: email='unknown', name='unknown' given for undefined # ==> to be checked for other providers!!! PERSON_ID=-1 if [ "$EMAIL" != "unknown" ] || [ "$NAME" != "unknown" ] then # check, whether person exists already in database # res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM persons\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM persons\ WHERE name='$NAME' AND EMAIL='$EMAIL';"` PERS_EXISTED=1 PERSON_ID=$(echo $res | cut -f2 | cut -f3 -d' ') if [ "$PERSON_ID" == "(0" ] then PERS_EXISTED=0 res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO persons\ (name,email,phone,isprivate) VALUES ('$NAME','$EMAIL','$PHONE',$ISPRIVATE)\ RETURNING id;"` PERSON_ID=$(echo $res | cut -f2 | cut -f3 -d' ') fi fi # 1. b) organisations # =================== NAME=`echo -n "${PARAMETER_CONTRIBUTOR_SHORTNAME//[[:space:]]/}"` LONGNAME=`echo -e $PARAMETER_CONTRIBUTOR | awk '{$1=$1};1'` COUNTRY=`echo -e $PARAMETER_CONTRIBUTOR_COUNTRY | awk '{$1=$1};1'` CITY='' POSTCODE='' STREET_ADDRESS='' HOMEPAGE='' ORGANISATION_ID=-1 if [ "$NAME" != "unknown" ] || [ "$LONGNAME" != "unknown" ] then # check, whether organisation exists already in database # res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM organisations\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM organisations\ WHERE name='$NAME' AND longname='$LONGNAME';"` ORG_EXISTED=1 ORGANISATION_ID=$(echo $res | cut -f2 | cut -f3 -d' ') if [ "$ORGANISATION_ID" == "(0" ] then ORG_EXISTED=0 # res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO organisations\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO organisations\ (name,longname,kind,city,postcode,street_address,country,homepage) VALUES \ ('$NAME','$LONGNAME',$KIND,'$CITY','$POSTCODE','$STREET_ADDRESS','$COUNTRY','$HOMEPAGE') \ RETURNING id;"` ORGANISATION_ID=$(echo $res | cut -f2 | cut -f3 -d' ') fi fi # 1. c) contacts # ============== if [ $PERSON_ID -ne -1 ] then if [ $PERS_EXISTED -eq 1 ] then # res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM contacts\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM contacts\ WHERE person_id=$PERSON_ID;"` else # res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO contacts\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO contacts\ (person_id,organisation_id) VALUES \ ($PERSON_ID,0) \ RETURNING id;"` fi PI_ID=$(echo $res | cut -f2 | cut -f3 -d' ') fi if [ $ORGANISATION_ID -ne -1 ] then if [ $ORG_EXISTED -eq 1 ] then # res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM contacts\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM contacts\ WHERE organisation_id=$ORGANISATION_ID;"` else # res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO contacts\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO contacts\ (person_id,organisation_id) VALUES \ (0,$ORGANISATION_ID) \ RETURNING id;"` fi CONTRIBUTOR_ID=$(echo $res | cut -f2 | cut -f3 -d' ') fi # 1. d) timeseries_roles # ====================== if [ $PERSON_ID -ne -1 ] then # check, whether timeseries_role already exists # res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM timeseries_roles\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM timeseries_roles\ WHERE role=$PI_ROLE AND status=$PI_STATUS AND contact_id=$PI_ID;"` ROLE1=$(echo $res | cut -f2 | cut -f3 -d' ') if [ "$ROLE1" == "(0" ] then # res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_roles\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_roles\ (role,status,contact_id) VALUES \ ($PI_ROLE,$PI_STATUS,$PI_ID)\ RETURNING id;"` ROLE1=$(echo $res | cut -f2 | cut -f3 -d' ') fi fi if [ $ORGANISATION_ID -ne -1 ] then # check, whether timeseries_role already exists # res=`psql -h localhost -d toardb -U toaradmin -c "SELECT id FROM timeseries_roles\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "SELECT id FROM timeseries_roles\ WHERE role=$CONTRIBUTOR_ROLE AND status=$CONTRIBUTOR_STATUS AND contact_id=$CONTRIBUTOR_ID;"` ROLE2=$(echo $res | cut -f2 | cut -f3 -d' ') if [ "$ROLE2" == "(0" ] then # res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_roles\ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_roles\ (role,status,contact_id) VALUES \ ($CONTRIBUTOR_ROLE,$CONTRIBUTOR_STATUS,$CONTRIBUTOR_ID) \ RETURNING id;"` ROLE2=$(echo $res | cut -f2 | cut -f3 -d' ') fi fi # 2. timeseries_annotations # ========================= # parameter_calibration: none given for UBA measured ozone # comments: none given for UBA measured ozone # 3. timeseries # ============= # "order" (column name of table timeseries) is a reserved word of PostgreSQL!!! # fill additional_metadata # accessing special fields within PostgreSQL is then possible via (f. ex.) # select additional_metadata->>'original_units' from timeseries; TRIMMED_PARAMETER_ATTRIBUTE=`echo -n "${PARAMETER_ATTRIBUTE//[[:space:]]/}"` TRIMMED_PARAMETER_SAMPLING_TYPE=`echo -n "${PARAMETER_SAMPLING_TYPE//[[:space:]]/}"` TRIMMED_PARAMETER_ORIGINAL_UNITS=`echo -e $PARAMETER_ORIGINAL_UNITS | awk '{$1=$1};1'` TRIMMED_PARAMETER_STATUS=`echo -n "${PARAMETER_STATUS//[[:space:]]/}"` TRIMMED_PARAMETER_INSTRUMENT_MANUFACTURER=`echo -e $PARAMETER_INSTRUMENT_MANUFACTURER | awk '{$1=$1};1'` TRIMMED_PARAMETER_INSTRUMENT_MODEL=`echo -e $PARAMETER_INSTRUMENT_MODEL | awk '{$1=$1};1'` ADDITIONAL_METADATA="{\"parameter_attribute\":\"$TRIMMED_PARAMETER_ATTRIBUTE\",\ \"parameter_sampling_type\":\"$TRIMMED_PARAMETER_SAMPLING_TYPE\",\ \"parameter_original_units\":\"$TRIMMED_PARAMETER_ORIGINAL_UNITS\",\ \"parameter_status\":\"$TRIMMED_PARAMETER_STATUS\",\ \"parameter_instrument_manufacturer\":\"$TRIMMED_PARAMETER_INSTRUMENT_MANUFACTURER\",\ \"parameter_instrument_model\":\"$TRIMMED_PARAMETER_INSTRUMENT_MODEL\"}" DATE_ADDED=$CREATION_DATE DATE_MODIFIED=$MODIFICATION_DATE # what about lower_case????? if [[ $PARAMETER_MEASUREMENT_METHOD == *"UV"* ]] then MEASUREMENT_METHOD=0 else MEASUREMENT_METHOD=1 fi SOURCE=1 if [ "$TRIMMED_PARAMETER_ATTRIBUTE" == "REA" ] then SOURCE=0 fi LABEL=$TRIMMED_PARAMETER_ATTRIBUTE # res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries \ res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries\ (label,\"order\",access_rights,sampling_frequency,aggregation,data_start_date,data_end_date,sampling_height,\ additional_metadata,date_added,date_modified,station_id,variable_id,source,measurement_method,programme_id) VALUES \ ('$LABEL',$ORDER,$ACCESS_RIGHTS,$SAMPLING_FREQUENCY,$AGGREGATION,'$DATA_START_DATE','$DATA_END_DATE',$SAMPLING_HEIGHT,\ '$ADDITIONAL_METADATA','$DATE_ADDED','$DATE_MODIFIED',$NEW_STATIONID,$VARIABLE_ID,$SOURCE,$MEASUREMENT_METHOD,$PROGRAMME_ID) \ RETURNING id;"` # log new id NEW_ID=$(echo $res | cut -f2 | cut -f3 -d' ') echo "$ID $NEW_ID $PARAMETER_NAME $SAMPLING" >>"timeseries_oldID_newID_${PARAMETER_NAME}_${SAMPLING}.txt" # 4. many-to-many relations # ========================= # 4. a) timeseries_timeseries_roles # ================================= if [ $PERSON_ID -ne -1 ] then # psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_timeseries_roles\ psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_timeseries_roles\ (timeseries_id,role_id) VALUES \ ($NEW_ID,$ROLE1);" fi if [ $ORGANISATION_ID -ne -1 ] then # psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_timeseries_roles\ psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_timeseries_roles\ (timeseries_id,role_id) VALUES \ ($NEW_ID,$ROLE2);" fi # 4. b) timeseries_timeseries_annotations # ======================================= # for UBA: none (see above) fi done < stations_oldID_newID.txt