diff --git a/imports_from_TOAR1/get_timeseries_meta.sh b/imports_from_TOAR1/get_timeseries_meta.sh index 1570d55477a985f24e9c5ed8e8db230a77cae6ea..2534f7d4c82026da6b9cb2cd205985c5858fa0da 100755 --- a/imports_from_TOAR1/get_timeseries_meta.sh +++ b/imports_from_TOAR1/get_timeseries_meta.sh @@ -1,46 +1,295 @@ #!/bin/bash -# example script (for country='China' and network_name='OTHER') +# example script for transfering ozone timeseries from UBA stations +# (country='Germany' and network_name='UBA') +# station_numid (now: station_id) has changed --> see log file! +# also id will change for timeseries --> log mapping: old_id <-> new_id # -# new design of timeseries: -# Table "public.timeseries" -# Column | Type | Collation | Nullable | Default -#------------------------------+------------------------+-----------+----------+---------------------------------------- -#id | integer | | not null | nextval('timeseries_id_seq'::regclass) -#label | character varying(128) | | not null | -#order | integer | | not null | -#access_rights | integer | | not null | -#sampling_frequency | integer | | not null | -#aggregation | integer | | not null | -#data_start_date | timestamp with time zone | | not null | -#data_end_date | timestamp with time zone | | not null | -#measurement_method | character varying(128) | | not null | -#sampling_height | double precision | | not null | -#additional_metadata | jsonb | | not null | -#date_added | timestamp with time zone | | not null | -#date_modified | timestamp with time zone | | not null | -#station_id | integer | | | -#variable_id | integer | | | +# TBD: +# +# matching fields: +# old | new | new table +# ====================================|======================================|==========================| +# station_numid | station_id | timeseries | +# parameter_label | - (to be newly defined: label) | timeseries | +# parameter_name | variable_id | timeseries | +# parameter_attribute | additional_metadata | timeseries |??? +# | - should some of it also go to label?| |??? +# | - REA should go to source=modeled! | |??? +# parameter_sampling_type | additional_metadata | timeseries |??? +# parameter_measurement_method | measurement_method | timeseries | +# parameter_original_units | additional_metadata | timeseries |??? +# parameter_calibration | pointing to annotations | timeseries_annotations |??? +# parameter_contributor_shortname | pointing to role (contrib,org) | contacts/timeseries_roles| +# parameter_contributor | pointing to role (contrib,org) | contacts/timeseries_roles| +# parameter_contributor_country | pointing to role (contrib,org) | contacts/timeseries_roles| +# parameter_dataset_type | sampling_frequency | timeseries | +# parameter_status | additional_metadata | timeseries |??? +# comments | pointing to annotations | timeseries_annotations |??? +# creation_date | date_added | timeseries | +# modification_date | date_modified | timeseries | +# data_start_date | data_start_date | timeseries | +# data_end_date | data_end_date | timeseries | +# parameter_pi | pointing to role (PI, person) | contacts/timeseries_roles| +# parameter_pi_email | pointing to role (PI, person) | contacts/timeseries_roles| +# parameter_instrument_manufacturer | additional_metadata | timeseries |??? +# parameter_instrument_model | additional_metadata | timeseries |??? +# +# converting from timestamp without timezone to timestamp with timezone +# converting sampling_frequency to controlled vocabulary (SF_vocabulary) -- UBA: 0 (Hourly) +# converting source to controlled vocabulary (DS_vocabulary) -- UBA (see below) +# converting access_rights to controlled vocabulary (DA_vocabulary) -- UBA: 0 (ByAttribution) +# converting aggregation to controlled vocabulary (AT_vocabulary) -- UBA: 1 (Mean1Of2) +# converting measurement_method to controlled vocabulary (MM_vocabulary) +# converting kind to controlled vocabulary (OK_vocabulary) -- UBA: 1 (Government) +# converting role to controlled vocabulary (RC_vocabulary) -- UBA: 1 (PrincipalInvestigator) and 3 (Contributor) # # new (not available in old DB): -# - sampling_height (just for now: put 10 (m) -- just invented!) -# - sampling_frequency (just for now: put "hourly" (because I know!!!) -- this is choice 0) -# - aggregation (just for now: put "mean: 1h" (because I know!!!) -- this is choice 0) -#!!!!!!!!!!!!! did we lose this information: # - calibration_report (just for now: put ' ' (because nothing is known)) -# - evaluation_report (just for now: put ' ' (because nothing is known)) -# - access_rights (was: parameter_status (now set embargoed data (formerly flagged by 1) to flag 2! -- script is hard-wired at the moment!) -# - change_record (just for now: 'test insertion' -- should at least be expanded by NOW() -- script is hard-wired at the moment!) -# - variable_id (was: parameter_name and no link to id of separate table) (just for now: ozone (9) -- because I know)) -# attention! should label really be completely UNIQUE (this has not been before! -- for now: add id to it!) -# label: it has been discussion about, what to put here: last discussion was about parameter_contributor_shortname -# +# - order (now set to 1) +# - access_rights (now set to 0 (ByAttribution) -- DA_vocabulary) +# - aggregation (UBA: 0 (Mean), if label == REA, 1 (Mean1Of2) else -- AT_vocabulary) +# - sampling height (now set to 2 (m)) +# - programme_id (now set to 0 (None-programme)) +# - source (UBA: 0 (Model), if label == REA, 1 (Measurement) else -- MM_vocabulary) +# - kind (UBA: 1 (Government) -- OK_vocabulary) +# - role (UBA: 1 (PrincipalInvestigator) and 3 (Contributor) -- RC_vocabulary) +# - status (UBA: 2 (unknown for PI) and 0 (active for Contributor) -- RS_vocabulary) # # author: s.schroeder@fz-juelich.de -# date: 2020-06-05 +# date: 2020-07-05 + +ORDERED_COLUMNS="id,station_numid,parameter_label,parameter_name,parameter_attribute,parameter_sampling_type,\ + parameter_measurement_method,parameter_original_units,parameter_calibration,parameter_contributor_shortname,\ + parameter_contributor,parameter_contributor_country,parameter_dataset_type,parameter_status,comments,\ + creation_date,modification_date,data_start_date,data_end_date,parameter_pi,parameter_pi_email,\ + parameter_instrument_manufacturer,parameter_instrument_model" +#just for now (see above) +ORDER=1 +ACCESS_RIGHTS=0 +SAMPLING_HEIGHT=2 +SAMPLING_FREQUENCY=0 +PROGRAMME_ID=0 +KIND=1 +PI_ROLE=1 +PI_STATUS=2 +CONTRIBUTOR_ROLE=3 +CONTRIBUTOR_STATUS=0 +LABEL='' +VARIABLE_ID=5 +AGGREGATION=1 +SOURCE=1 + +while read -a line +do + OLD_STATIONID=${line[0]} + NEW_STATIONID=${line[1]} + + COUNTER=0 + while IFS='|' read -a row + do + if [ $COUNTER -eq 2 ] + then + ID=${row[0]} + echo "ID: $ID" + STATION_NUMID=${row[1]} + echo "STATION_NUMID: $STATION_NUMID" + PARAMETER_LABEL=${row[2]} + echo "PARAMETER_LABEL: $PARAMETER_LABEL" + PARAMETER_NAME=${row[3]} + echo "PARAMETER_NAME: $PARAMETER_NAME" + PARAMETER_ATTRIBUTE=${row[4]} + echo "PARAMETER_ATTRIBUTE: $PARAMETER_ATTRIBUTE" + PARAMETER_SAMPLING_TYPE=${row[5]} + echo "PARAMETER_SAMPLING_TYPE: $PARAMETER_SAMPLING_TYPE" + PARAMETER_MEASUREMENT_METHOD=${row[6]} + echo "PARAMETER_MEASUREMENT_METHOD: $PARAMETER_MEASUREMENT_METHOD" + PARAMETER_ORIGINAL_UNITS=${row[7]} + echo "PARAMETER_ORIGINAL_UNITS: $PARAMETER_ORIGINAL_UNITS" + PARAMETER_CALIBRATION=${row[8]} + echo "PARAMETER_CALIBRATION: $PARAMETER_CALIBRATION" + PARAMETER_CONTRIBUTOR_SHORTNAME=${row[9]} + echo "PARAMETER_CONTRIBUTOR_SHORTNAME: $PARAMETER_CONTRIBUTOR_SHORTNAME" + PARAMETER_CONTRIBUTOR=${row[10]} + echo "PARAMETER_CONTRIBUTOR: $PARAMETER_CONTRIBUTOR" + PARAMETER_CONTRIBUTOR_COUNTRY=${row[11]} + echo "PARAMETER_CONTRIBUTOR_COUNTRY: $PARAMETER_CONTRIBUTOR_COUNTRY" + PARAMETER_DATASET_TYPE=${row[12]} + echo "PARAMETER_DATASET_TYPE: $PARAMETER_DATASET_TYPE" + PARAMETER_STATUS=${row[13]} + echo "PARAMETER_STATUS: $PARAMETER_STATUS" + COMMENTS=${row[14]} + echo "COMMENTS: $COMMENTS" + CREATION_DATE=${row[15]} + echo "CREATION_DATE: $CREATION_DATE" + MODIFICATION_DATE=${row[16]} + echo "MODIFICATION_DATE: $MODIFICATION_DATE" + DATA_START_DATE=${row[17]} + echo "DATA_START_DATE: $DATA_START_DATE" + DATA_END_DATE=${row[18]} + echo "DATA_END_DATE: $DATA_END_DATE" + PARAMETER_PI=${row[19]} + echo "PARAMETER_PI: $PARAMETER_PI" + PARAMETER_PI_EMAIL=${row[20]} + echo "PARAMETER_PI_EMAIL: $PARAMETER_PI_EMAIL" + PARAMETER_INSTRUMENT_MANUFACTURER=${row[21]} + echo "PARAMETER_INSTRUMENT_MANUFACTURER: $PARAMETER_INSTRUMENT_MANUFACTURER" + PARAMETER_INSTRUMENT_MODEL=${row[22]} + echo "PARAMETER_INSTRUMENT_MODEL: $PARAMETER_INSTRUMENT_MODEL" + fi + let COUNTER=COUNTER+1 + done < <(echo "SELECT ${ORDERED_COLUMNS} FROM parameter_series WHERE station_numid=${OLD_STATIONID} AND parameter_name='o3'" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder) + + # 1. contacts/timeseries_roles + # ============================ + + # 1. a) persons + # ============= + + # no personal contacts given for UBA data! + # only model data from REA give personal contacts! + # ozone data is only measured from UBA! +# EMAIL=`echo -n "${PARAMETER_PI_EMAIL//[[:space:]]/}"` +# NAME=`echo -n "${PARAMETER_PI//[[:space:]]/}"` +# PHONE='' +# ISPRIVATE=True + +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO persons\ +# (name,email,phone,isprivate) VALUES +# ('$NAME','$EMAIL','$PHONE',$ISPRIVATE)\ +# RETURNING id;"` +# PERSON_ID=$(echo $res | cut -f2 | cut -f3 -d' ') + PERSON_ID=-1 + + # 1. b) organisations + # =================== + + NAME=`echo -n "${PARAMETER_CONTRIBUTOR_SHORTNAME//[[:space:]]/}"` + LONGNAME=`echo -n "${PARAMETER_CONTRIBUTOR//[[:space:]]/}"` + COUNTRY=`echo -n "${PARAMETER_CONTRIBUTOR_COUNTRY//[[:space:]]/}"` + CITY='' + POSTCODE='' + STREET_ADDRESS='' + HOMEPAGE='' + +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO organisations\ + + intentionally commented without comment! + script should not be run without + check, whether organisation exists already in contacts!!! + + res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO organisations\ + (name,longname,kind,city,postcode,street_address,country,homepage) VALUES \ + ('$NAME','$LONGNAME',$KIND,'$CITY','$POSTCODE','$STREET_ADDRESS','$COUNTRY','$HOMEPAGE') \ + RETURNING id;"` + ORGANISATION_ID=$(echo $res | cut -f2 | cut -f3 -d' ') + + # 1. c) contacts + # ============== + + if [ $PERSON_ID -ne -1 ] + then +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO contacts\ + res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO contacts\ + (person_id,organisation_id) VALUES \ + ($PERSON_ID,0) \ + RETURNING id;"` + PI_ID=$(echo $res | cut -f2 | cut -f3 -d' ') + fi + + if [ $ORGANISATION_ID -ne -1 ] + then +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO contacts\ + res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO contacts\ + (person_id,organisation_id) VALUES \ + (0,$ORGANISATION_ID) \ + RETURNING id;"` + CONTRIBUTOR_ID=$(echo $res | cut -f2 | cut -f3 -d' ') + fi + + # 1. d) timeseries_roles + # ====================== + + if [ $PERSON_ID -ne -1 ] + then +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_roles\ + res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_roles\ + (role,status,contact_id) VALUES \ + ($PI_ROLE,$PI_STATUS,$PI_ID)\ + RETURNING id;"` + ROLE1=$(echo $res | cut -f2 | cut -f3 -d' ') + fi + if [ $ORGANISATION_ID -ne -1 ] + then +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_roles\ + res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_roles\ + (role,status,contact_id) VALUES \ + ($CONTRIBUTOR_ROLE,$CONTRIBUTOR_STATUS,$CONTRIBUTOR_ID) \ + RETURNING id;"` + ROLE2=$(echo $res | cut -f2 | cut -f3 -d' ') + fi + + # 2. timeseries_annotations + # ========================= + + # parameter_calibration: none given for UBA measured ozone + # comments: none given for UBA measured ozone + + # 3. timeseries + # ============= + + # "order" (column name of table timeseries) is a reserved word of PostgreSQL!!! + + # fill additional_metadata + # accessing special fields within PostgreSQL is then possible via (f. ex.) + # select additional_metadata->>'original_units' from timeseries; + TRIMMED_PARAMETER_ATTRIBUTE=`echo -n "${PARAMETER_ATTRIBUTE//[[:space:]]/}"` + TRIMMED_PARAMETER_SAMPLING_TYPE=`echo -n "${PARAMETER_SAMPLING_TYPE//[[:space:]]/}"` + TRIMMED_PARAMETER_ORIGINAL_UNITS=`echo -n "${PARAMETER_ORIGINAL_UNITS//[[:space:]]/}"` + TRIMMED_PARAMETER_STATUS=`echo -n "${PARAMETER_STATUS//[[:space:]]/}"` + TRIMMED_PARAMETER_INSTRUMENT_MANUFACTURER=`echo -n "${PARAMETER_INSTRUMENT_MANUFACTURER//[[:space:]]/}"` + TRIMMED_PARAMETER_INSTRUMENT_MODEL=`echo -n "${PARAMETER_INSTRUMENT_MODEL//[[:space:]]/}"` + ADDITIONAL_METADATA="{\"parameter_attribute\":\"$TRIMMED_PARAMETER_ATTRIBUTE\",\ + \"parameter_sampling_type\":\"$TRIMMED_PARAMETER_SAMPLING_TYPE\",\ + \"parameter_original_units\":\"$TRIMMED_PARAMETER_ORIGINAL_UNITS\",\ + \"parameter_status\":\"$TRIMMED_PARAMETER_STATUS\",\ + \"parameter_instrument_manufacturer\":\"$TRIMMED_PARAMETER_INSTRUMENT_MANUFACTURER\",\ + \"parameter_instrument_model\":\"$TRIMMED_PARAMETER_INSTRUMENT_MODEL\"}" + DATE_ADDED=$CREATION_DATE + DATE_MODIFIED=$MODIFICATION_DATE + if [[ $PARAMETER_MEASUREMENT_METHOD == *"UV"* ]] + then + MEASUREMENT_METHOD=0 + else + MEASUREMENT_METHOD=1 + fi +# res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries\ + res=`psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries \ + (label,\"order\",access_rights,sampling_frequency,aggregation,data_start_date,data_end_date,sampling_height,\ + additional_metadata,date_added,date_modified,station_id,variable_id,source,measurement_method,programme_id) VALUES \ + ('$LABEL',$ORDER,$ACCESS_RIGHTS,$SAMPLING_FREQUENCY,$AGGREGATION,'$DATA_START_DATE','$DATA_END_DATE',$SAMPLING_HEIGHT,\ + '$ADDITIONAL_METADATA','$DATE_ADDED','$DATE_MODIFIED',$NEW_STATIONID,$VARIABLE_ID,$SOURCE,$MEASUREMENT_METHOD,$PROGRAMME_ID) \ + RETURNING id;"` + # log new id + NEW_ID=$(echo $res | cut -f2 | cut -f3 -d' ') + echo "$ID $NEW_ID" >>timeseries_oldID_newID.txt + + # 4. many-to-many relations + # ========================= + + # 4. a) timeseries_timeseries_roles + # ================================= + + # for UBA: only Contributor (see above) +# psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO timeseries_timeseries_roles\ + psql -h localhost -d toardb -U toaradmin -c "INSERT INTO timeseries_timeseries_roles\ + (timeseries_id,role_id) VALUES \ + ($NEW_ID,$ROLE2);" + + + # 4. b) timeseries_timeseries_annotations + # ======================================= -psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(id,'_',parameter_contributor_shortname), 10, 0, 0, parameter_measurement_method, parameter_original_units, '', '' , 2, comments, 'test insertion', 1, station_numid, 9 from parameter_series p, stations s WHERE p.station_numid=s.numid AND s.station_country='China' AND s.network_name='OTHER' AND p.parameter_name='o3') TO 'timeseries_China_20200605_export.txt'" + # for UBA: none (see above) -#now translate old DB to new design (at the moment: just fake data!) -cp timeseries_China_20200605_export.txt timeseries_China_20200605_import.txt + exit -psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'timeseries_China_20200605_import.txt'" -psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);" +done < stations_oldID_newID.txt diff --git a/imports_from_TOAR1/get_timeseries_meta_v0.1.sh b/imports_from_TOAR1/get_timeseries_meta_v0.1.sh new file mode 100755 index 0000000000000000000000000000000000000000..1570d55477a985f24e9c5ed8e8db230a77cae6ea --- /dev/null +++ b/imports_from_TOAR1/get_timeseries_meta_v0.1.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# example script (for country='China' and network_name='OTHER') +# +# new design of timeseries: +# Table "public.timeseries" +# Column | Type | Collation | Nullable | Default +#------------------------------+------------------------+-----------+----------+---------------------------------------- +#id | integer | | not null | nextval('timeseries_id_seq'::regclass) +#label | character varying(128) | | not null | +#order | integer | | not null | +#access_rights | integer | | not null | +#sampling_frequency | integer | | not null | +#aggregation | integer | | not null | +#data_start_date | timestamp with time zone | | not null | +#data_end_date | timestamp with time zone | | not null | +#measurement_method | character varying(128) | | not null | +#sampling_height | double precision | | not null | +#additional_metadata | jsonb | | not null | +#date_added | timestamp with time zone | | not null | +#date_modified | timestamp with time zone | | not null | +#station_id | integer | | | +#variable_id | integer | | | +# +# new (not available in old DB): +# - sampling_height (just for now: put 10 (m) -- just invented!) +# - sampling_frequency (just for now: put "hourly" (because I know!!!) -- this is choice 0) +# - aggregation (just for now: put "mean: 1h" (because I know!!!) -- this is choice 0) +#!!!!!!!!!!!!! did we lose this information: # - calibration_report (just for now: put ' ' (because nothing is known)) +# - evaluation_report (just for now: put ' ' (because nothing is known)) +# - access_rights (was: parameter_status (now set embargoed data (formerly flagged by 1) to flag 2! -- script is hard-wired at the moment!) +# - change_record (just for now: 'test insertion' -- should at least be expanded by NOW() -- script is hard-wired at the moment!) +# - variable_id (was: parameter_name and no link to id of separate table) (just for now: ozone (9) -- because I know)) +# attention! should label really be completely UNIQUE (this has not been before! -- for now: add id to it!) +# label: it has been discussion about, what to put here: last discussion was about parameter_contributor_shortname +# +# +# author: s.schroeder@fz-juelich.de +# date: 2020-06-05 + +psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(id,'_',parameter_contributor_shortname), 10, 0, 0, parameter_measurement_method, parameter_original_units, '', '' , 2, comments, 'test insertion', 1, station_numid, 9 from parameter_series p, stations s WHERE p.station_numid=s.numid AND s.station_country='China' AND s.network_name='OTHER' AND p.parameter_name='o3') TO 'timeseries_China_20200605_export.txt'" + +#now translate old DB to new design (at the moment: just fake data!) +cp timeseries_China_20200605_export.txt timeseries_China_20200605_import.txt + +psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'timeseries_China_20200605_import.txt'" +psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);"