From fa67a688836fe2cfcf8c5d48ac288b8d2ef8c100 Mon Sep 17 00:00:00 2001 From: schroeder5 <s.schroeder@fz-juelich.de> Date: Sun, 5 Jul 2020 20:30:58 +0200 Subject: [PATCH] #6: transfering UBA stations from old database to new database (prototype version) --- imports_from_TOAR1/get_stations_meta.sh | 287 +++++++++++++++--------- 1 file changed, 182 insertions(+), 105 deletions(-) diff --git a/imports_from_TOAR1/get_stations_meta.sh b/imports_from_TOAR1/get_stations_meta.sh index 44d8326..e99a97d 100755 --- a/imports_from_TOAR1/get_stations_meta.sh +++ b/imports_from_TOAR1/get_stations_meta.sh @@ -13,6 +13,8 @@ # (same holds for timezones!) # TBD: rename nightlight --> avg_nightlight # (does this also hold for population density?) +# TBD: coordinate_validator_id --> 1 (sschroeder) --> for now OK (we do not have authentication method in place) +# TBD: for UBA data: Where to put "UBA"? # # matching fields: # old | new | new table @@ -58,27 +60,49 @@ # converting old station_coordinates to PostGIS POINT structure # converting type_of_environment to controlled vocabulary (ST_vocabulary) # converting type_of_area to controlled vocabulary (TA_vocabulary) -# converting climatic_zone to controlled vocabulary (CZ_vocabulary) # converting coordinate_validation_status to controlled vocabulary (CV_vocabulary) -# converting toar1_category to controlled vocabulary (TC_vocabulary) -# converting htap_region_tier1 to controlled vocabulary (TR_vocabulary) -# converting dominant_landcover_year2012 to controlled vocabulary (DL_vocabulary) +# +# The following fields from old database already use the same values as the controlled vocabulary: +# - climatic_zone: CZ_vocabulary +# - toar1_category: TC_vocabulary +# - htap_region_tier1: TR_vocabulary +# - dominant_landcover_year2012: DL_vocabulary +# UBA data: station_climatic_zone (old DB) is either -1, 1, 2, 3, or 4 +# UBA data: station_toar_category (old DB) is either -1, 0, 1, 2, or 3 +# UBA data: station_htap_region (old DB) is either -1, 2, or 4 +# UBA data: station_dominant_landcover (old DB) is either -1, 0, 1, 5, 10, 11, 12, 13, 14 # # new (not available in old DB): # - coordinate_validation_date (now set to NOW()) -# - station_coordinate_validator_id (now set to 1 (superuser: sschroeder)) +# - coordinate_validator_id (now set to 1 (superuser: sschroeder)) # # author: s.schroeder@fz-juelich.de # date: 2020-07-05 -# just for now: do one station after the other (with manual checks) -# surface_observations_toar=> select min(numid), max(numid) from stations WHERE network_name='UBA'; -# min | max -# ------+------- -# 3336 | 19316 +# Hash tables (associative arrays) + +# Be careful to declare the hash table as associative array (strings as indices allowed)! +# otherwise (tested from https://sysware.computer/linux/scripte_variablen_arrays.html): +# Wenn der Index mit dem ein Arrayelement adressiert wird keine natürliche Zahl ist, sondern ein negativer Wert (z.B.-1) +# oder ein Bruch (z.B. 0.5) dann führt das zu einer Fehlermeldung. Ist der Index ein String, dann wird er mit dem Wert 0 gleich gesetzt, +# so dass auf des Arrayelement mit dem Index 0 zugeriffen wird. +# Wird kein Index angegeben, also nur der Name des Arrays benutzt, so erfolgt der Zugriff ebenfalls auf das Arrayelement mit dem Index 0. + +# UBA data: station_coordinate_status (old DB) is either -1 or 0: +declare -A station_coordinate_status +# station_coordinate_status=-1 (not checked) --> coordinate_validation_status=0 ('not checked') +# station_coordinate_status=0 (verified by google earth or other means) --> coordinate_validation_status=1 ('verified') +# OTHER VALUES TO BE DISCUSSED!!! +station_coordinate_status=(["-1"]="0" ["0"]="1") + +# UBA data: station_type (old DB) is either background, unknown, industrial or traffic +declare -A station_type +station_type=(["unknown"]="0" ["background"]="1" ["traffic"]="2" ["industrial"]="3" ["other"]="4") + +# UBA data: station_type_of_area (old DB) is either rural, urban, suburban, or unknown +declare -A station_type_of_area +station_type_of_area=(["unknown"]="0" ["urban"]="1" ["suburban"]="2" ["rural"]="3" ["remote"]="4") -#psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select numid from stations WHERE network_name='UBA' ORDER BY numid) TO 'UBA_stations.txt'" -#for STATION in `cat UBA_stations.txt` ORDERED_COLUMNS="numid,network_name,station_id,station_local_id,station_type,station_type_of_area,station_category,\ station_name,station_country,station_state,station_lon,station_lat,station_alt,station_timezone,\ station_nightlight_5km,station_climatic_zone,station_wheat_production,station_rice_production,\ @@ -87,103 +111,156 @@ ORDERED_COLUMNS="numid,network_name,station_id,station_local_id,station_type,sta station_etopo_relative_alt,station_dominant_landcover,station_landcover_description,station_max_nightlight_25km,\ station_max_population_density_25km,station_nightlight_1km,station_population_density,google_resolution,\ station_comments,station_max_population_density_5km" -for STATION in 3336 + +#just for now (see above) +COORDINATE_VALIDATOR_ID=1 + +psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select numid from stations WHERE network_name='UBA' ORDER BY numid) TO 'UBA_stations.txt'" +for STATION in `cat UBA_stations.txt` do COUNTER=0 while IFS='|' read -a row do - if [ $COUNTER -eq 2 ] + if [ $COUNTER -eq 2 ] + then + NUMID=${row[0]} +# echo "NUMID: $NUMID" + NETWORK_NAME=${row[1]} +# echo "NETWORK_NAME: $NETWORK_NAME" + STATION_ID=${row[2]} +# echo "STATION_ID: $STATION_ID" + STATION_LOCAL_ID=${row[3]} +# echo "STATION_LOCAL_ID: $STATION_LOCAL_ID" + STATION_TYPE=${row[4]} +# echo "STATION_TYPE: $STATION_TYPE" + STATION_TYPE_OF_AREA=${row[5]} +# echo "STATION_TYPE_OF_AREA: $STATION_TYPE_OF_AREA" + STATION_CATEGORY=${row[6]} +# echo "STATION_CATEGORY: $STATION_CATEGORY" + STATION_NAME=${row[7]} +# echo "STATION_NAME: $STATION_NAME" + STATION_COUNTRY=${row[8]} +# echo "STATION_COUNTRY: $STATION_COUNTRY" + STATION_STATE=${row[9]} +# echo "STATION_STATE: $STATION_STATE" + STATION_LON=${row[10]} +# echo "STATION_LON: $STATION_LON" + STATION_LAT=${row[11]} +# echo "STATION_LAT: $STATION_LAT" + STATION_ALT=${row[12]} +# echo "STATION_ALT: $STATION_ALT" + STATION_TIMEZONE=${row[13]} +# echo "STATION_TIMEZONE: $STATION_TIMEZONE" + STATION_NIGHTLIGHT_5KM=${row[14]} +# echo "STATION_NIGHTLIGHT_5KM: $STATION_NIGHTLIGHT_5KM" + STATION_CLIMATIC_ZONE=${row[15]} +# echo "STATION_CLIMATIC_ZONE: $STATION_CLIMATIC_ZONE" + STATION_WHEAT_PRODUCTION=${row[16]} +# echo "STATION_WHEAT_PRODUCTION: $STATION_WHEAT_PRODUCTION" + STATION_RICE_PRODUCTION=${row[17]} +# echo "STATION_RICE_PRODUCTION: $STATION_RICE_PRODUCTION" + STATION_NOX_EMISSIONS=${row[18]} +# echo "STATION_NOX_EMISSIONS: $STATION_NOX_EMISSIONS" + STATION_OMI_NO2_COLUMN=${row[19]} +# echo "STATION_OMI_NO2_COLUMN: $STATION_OMI_NO2_COLUMN" + STATION_TOAR_CATEGORY=${row[20]} +# echo "STATION_TOAR_CATEGORY: $STATION_TOAR_CATEGORY" + STATION_HTAP_REGION=${row[21]} +# echo "STATION_HTAP_REGION: $STATION_HTAP_REGION" + STATION_REPORTED_ALT=${row[22]} +# echo "STATION_REPORTED_ALT: $STATION_REPORTED_ALT" + STATION_ALT_FLAG=${row[23]} +# echo "STATION_ALT_FLAG: $STATION_ALT_FLAG" + STATION_COORDINATE_STATUS=${row[24]} +# echo "STATION_COORDINATE_STATUS: $STATION_COORDINATE_STATUS" + STATION_GOOGLE_ALT=${row[25]} +# echo "STATION_GOOGLE_ALT: $STATION_GOOGLE_ALT" + STATION_ETOPO_ALT=${row[26]} +# echo "STATION_ETOPO_ALT: $STATION_ETOPO_ALT" + STATION_ETOPO_MIN_ALT_5KM=${row[27]} +# echo "STATION_ETOPO_MIN_ALT_5KM: $STATION_ETOPO_MIN_ALT_5KM" + STATION_ETOPO_RELATIVE_ALT=${row[28]} +# echo "STATION_ETOPO_RELATIVE_ALT: $STATION_ETOPO_RELATIVE_ALT" + STATION_DOMINANT_LANDCOVER=${row[29]} +# echo "STATION_DOMINANT_LANDCOVER: $STATION_DOMINANT_LANDCOVER" + STATION_LANDCOVER_DESCRIPTION=${row[30]} +# echo "STATION_LANDCOVER_DESCRIPTION: $STATION_LANDCOVER_DESCRIPTION" + STATION_MAX_NIGHTLIGHT_25KM=${row[31]} +# echo "STATION_MAX_NIGHTLIGHT_25KM: $STATION_MAX_NIGHTLIGHT_25KM" + STATION_MAX_POPULATION_DENSITY_25KM=${row[32]} +# echo "STATION_MAX_POPULATION_DENSITY_25KM: $STATION_MAX_POPULATION_DENSITY_25KM" + STATION_NIGHTLIGHT_1KM=${row[33]} +# echo "STATION_NIGHTLIGHT_1KM: $STATION_NIGHTLIGHT_1KM" + STATION_POPULATION_DENSITY=${row[34]} +# echo "STATION_POPULATION_DENSITY: $STATION_POPULATION_DENSITY" + GOOGLE_RESOLUTION=${row[35]} +# echo "GOOGLE_RESOLUTION: $GOOGLE_RESOLUTION" + STATION_COMMENTS=${row[36]} +# echo "STATION_COMMENTS: $STATION_COMMENTS" + STATION_MAX_POPULATION_DENSITY_5KM=${row[37]} +# echo "STATION_MAX_POPULATION_DENSITY_5KM: $STATION_MAX_POPULATION_DENSITY_5KM" + fi + let COUNTER=COUNTER+1 + done < <(echo "SELECT ${ORDERED_COLUMNS} FROM stations WHERE numid=${STATION}" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder) + + # 1. stationmeta_core + # =================== + + TRIMMED_VAR=`echo -n "${STATION_COORDINATE_STATUS//[[:space:]]/}"` + COORDINATE_VALIDATION_STATUS=${station_coordinate_status[$TRIMMED_VAR]} + + TRIMMED_VAR=`echo -n "${STATION_TYPE//[[:space:]]/}"` + TYPE_OF_ENVIRONMENT=${station_type[$TRIMMED_VAR]} + + TRIMMED_VAR=`echo -n "${STATION_TYPE_OF_AREA//[[:space:]]/}"` + TYPE_OF_AREA=${station_type_of_area[$TRIMMED_VAR]} + + # trim codes + TRIMMED_ID=`echo -n "${STATION_ID//[[:space:]]/}"` + TRIMMED_LID=`echo -n "${STATION_LOCAL_ID//[[:space:]]/}"` + if [ "$TRIMMED_ID" = "$TRIMMED_LID" ] then - NUMID=${row[0]} -# echo "NUMID: $NUMID" - NETWORK_NAME=${row[1]} -# echo "NETWORK_NAME: $NETWORK_NAME" - STATION_ID=${row[2]} -# echo "STATION_ID: $STATION_ID" - STATION_LOCAL_ID=${row[3]} -# echo "STATION_LOCAL_ID: $STATION_LOCAL_ID" - STATION_TYPE=${row[4]} -# echo "STATION_TYPE: $STATION_TYPE" - STATION_TYPE_OF_AREA=${row[5]} -# echo "STATION_TYPE_OF_AREA: $STATION_TYPE_OF_AREA" - STATION_CATEGORY=${row[6]} -# echo "STATION_CATEGORY: $STATION_CATEGORY" - STATION_NAME=${row[7]} -# echo "STATION_NAME: $STATION_NAME" - STATION_COUNTRY=${row[8]} -# echo "STATION_COUNTRY: $STATION_COUNTRY" - STATION_STATE=${row[9]} -# echo "STATION_STATE: $STATION_STATE" - STATION_LON=${row[10]} -# echo "STATION_LON: $STATION_LON" - STATION_LAT=${row[11]} -# echo "STATION_LAT: $STATION_LAT" - STATION_ALT=${row[12]} -# echo "STATION_ALT: $STATION_ALT" - STATION_TIMEZONE=${row[13]} -# echo "STATION_TIMEZONE: $STATION_TIMEZONE" - STATION_NIGHTLIGHT_5KM=${row[14]} -# echo "STATION_NIGHTLIGHT_5KM: $STATION_NIGHTLIGHT_5KM" - STATION_CLIMATIC_ZONE=${row[15]} -# echo "STATION_CLIMATIC_ZONE: $STATION_CLIMATIC_ZONE" - STATION_WHEAT_PRODUCTION=${row[16]} -# echo "STATION_WHEAT_PRODUCTION: $STATION_WHEAT_PRODUCTION" - STATION_RICE_PRODUCTION=${row[17]} -# echo "STATION_RICE_PRODUCTION: $STATION_RICE_PRODUCTION" - STATION_NOX_EMISSIONS=${row[18]} -# echo "STATION_NOX_EMISSIONS: $STATION_NOX_EMISSIONS" - STATION_OMI_NO2_COLUMN=${row[19]} -# echo "STATION_OMI_NO2_COLUMN: $STATION_OMI_NO2_COLUMN" - STATION_TOAR_CATEGORY=${row[20]} -# echo "STATION_TOAR_CATEGORY: $STATION_TOAR_CATEGORY" - STATION_HTAP_REGION=${row[21]} -# echo "STATION_HTAP_REGION: $STATION_HTAP_REGION" - STATION_REPORTED_ALT=${row[22]} -# echo "STATION_REPORTED_ALT: $STATION_REPORTED_ALT" - STATION_ALT_FLAG=${row[23]} -# echo "STATION_ALT_FLAG: $STATION_ALT_FLAG" - STATION_COORDINATE_STATUS=${row[24]} -# echo "STATION_COORDINATE_STATUS: $STATION_COORDINATE_STATUS" - STATION_GOOGLE_ALT=${row[25]} -# echo "STATION_GOOGLE_ALT: $STATION_GOOGLE_ALT" - STATION_ETOPO_ALT=${row[26]} -# echo "STATION_ETOPO_ALT: $STATION_ETOPO_ALT" - STATION_ETOPO_MIN_ALT_5KM=${row[27]} -# echo "STATION_ETOPO_MIN_ALT_5KM: $STATION_ETOPO_MIN_ALT_5KM" - STATION_ETOPO_RELATIVE_ALT=${row[28]} -# echo "STATION_ETOPO_RELATIVE_ALT: $STATION_ETOPO_RELATIVE_ALT" - STATION_DOMINANT_LANDCOVER=${row[29]} -# echo "STATION_DOMINANT_LANDCOVER: $STATION_DOMINANT_LANDCOVER" - STATION_LANDCOVER_DESCRIPTION=${row[30]} -# echo "STATION_LANDCOVER_DESCRIPTION: $STATION_LANDCOVER_DESCRIPTION" - STATION_MAX_NIGHTLIGHT_25KM=${row[31]} -# echo "STATION_MAX_NIGHTLIGHT_25KM: $STATION_MAX_NIGHTLIGHT_25KM" - STATION_MAX_POPULATION_DENSITY_25KM=${row[32]} -# echo "STATION_MAX_POPULATION_DENSITY_25KM: $STATION_MAX_POPULATION_DENSITY_25KM" - STATION_NIGHTLIGHT_1KM=${row[33]} -# echo "STATION_NIGHTLIGHT_1KM: $STATION_NIGHTLIGHT_1KM" - STATION_POPULATION_DENSITY=${row[34]} -# echo "STATION_POPULATION_DENSITY: $STATION_POPULATION_DENSITY" - GOOGLE_RESOLUTION=${row[35]} -# echo "GOOGLE_RESOLUTION: $GOOGLE_RESOLUTION" - STATION_COMMENTS=${row[36]} -# echo "STATION_COMMENTS: $STATION_COMMENTS" - STATION_MAX_POPULATION_DENSITY_5KM=${row[37]} -# echo "STATION_MAX_POPULATION_DENSITY_5KM: $STATION_MAX_POPULATION_DENSITY_5KM" + CODES="$TRIMMED_ID" + else + CODES="$TRIMMED_ID, $TRIMMED_LID" fi - let COUNTER=COUNTER+1 -done < <(echo "SELECT ${ORDERED_COLUMNS} FROM stations WHERE numid=${STATION}" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder) -# res=`psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder -c "SELECT * FROM stations WHERE numid=${STATION}"` -# echo $res -# MIN_DATETIME=$(echo $res | cut -f2 -d'|' | cut -f4,5 -d' ') -# HELPMAX=$(echo $res | cut -f3 -d'|' | cut -f1 -d'(') -# MAX_DATETIME=$(echo $HELPMAX) -# psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder -c "UPDATE parameter_series SET data_start_date='${MIN_DATETIME}', data_end_date='${MAX_DATETIME}' WHERE id=${SERIES}" -done -# first test on test database -#psql -h localhost -U toaradmin toardb -W -c "\COPY stationmeta_core FROM 'stations_China_20200605_import.txt'" -#psql -h localhost -U toaradmin toardb -W -c "UPDATE stationmeta_core SET country=TRIM(country),codes=TRIM(codes),name=TRIM(name),state=TRIM(state);" -# operational database -##psql -h zam10116.zam.kfa-juelich.de -U s.schroeder toardb_v2 -W -c "\COPY stationmeta_core FROM 'stations_China_20200605_import.txt'" -##psql -h zam10116.zam.kfa-juelich.de -U s.schroeder toardb_v2 -W -c "UPDATE stationmeta_core SET country=TRIM(country),codes=TRIM(codes),name=TRIM(name),state=TRIM(state);" + # fill additional_metadata + # accessing special fields within PostgreSQL is then possible via (f. ex.) + # select additional_metadata->>'station_reported_alt' from stationmeta_core; + TRIMMED_STATION_REPORTED_ALT=`echo -n "${STATION_REPORTED_ALT//[[:space:]]/}"` + TRIMMED_STATION_ALT_FLAG=`echo -n "${STATION_ALT_FLAG//[[:space:]]/}"` + TRIMMED_STATION_GOOGLE_ALT=`echo -n "${STATION_GOOGLE_ALT//[[:space:]]/}"` + TRIMMED_GOOGLE_RESOLUTION=`echo -n "${GOOGLE_RESOLUTION//[[:space:]]/}"` + TRIMMED_STATION_LANDCOVER_DESCRIPTION=`echo -n "${STATION_LANDCOVER_DESCRIPTION//[[:space:]]/}"` + TRIMMED_STATION_MAX_POPULATION_DENSITY_5KM=`echo -n "${STATION_MAX_POPULATION_DENSITY_5KM//[[:space:]]/}"` + ADDITIONAL_METADATA="{\"station_reported_alt\":\"$TRIMMED_STATION_REPORTED_ALT\",\ + \"station_alt_flag\":\"$TRIMMED_STATION_ALT_FLAG\",\ + \"station_google_alt\":\"$TRIMMED_STATION_GOOGLE_ALT\",\ + \"google_resolution\":\"$TRIMMED_GOOGLE_RESOLUTION\",\ + \"station_landcover_description\":\"$TRIMMED_STATION_LANDCOVER_DESCRIPTION\",\ + \"station_max_population_density_5km\":\"$TRIMMED_STATION_MAX_POPULATION_DENSITY_5KM\"}" + res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO stationmeta_core\ + (codes,name,coordinates,country,state,coordinate_validation_status,coordinate_validation_date,type_of_environment,\ + type_of_area,timezone,additional_metadata,coordinate_validator_id) VALUES \ + ('{$CODES}',TRIM('$STATION_NAME'),ST_SetSRID(ST_MakePoint($STATION_LON,$STATION_LAT,$STATION_ALT),4326),TRIM('$STATION_COUNTRY'),\ + TRIM('$STATION_STATE'),$COORDINATE_VALIDATION_STATUS,NOW(),$TYPE_OF_ENVIRONMENT,$TYPE_OF_AREA,TRIM('$STATION_TIMEZONE'),\ + '$ADDITIONAL_METADATA',$COORDINATE_VALIDATOR_ID) RETURNING id;"` + # log new id + NEW_ID=$(echo $res | cut -f2 | cut -f3 -d' ') + echo "$NUMID $NEW_ID" >>stations_oldID_newID.txt + + # 2. stationmeta_global + # ===================== + + psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO stationmeta_global\ + (population_density_year2010,max_population_density_25km_year2010,climatic_zone,nightlight_1km_year2013,nightlight_5km_year2013,\ + max_nightlight_25km_year2013,wheat_production_year2000,rice_production_year2000,edgar_htap_v2_nox_emissions_year2010,\ + omi_no2_column_years2011to2015,htap_region_tier1,etopo_alt,etopo_min_alt_5km,etopo_relative_alt,dominant_landcover_year2012,\ + toar1_category,station_id) VALUES \ + ($STATION_POPULATION_DENSITY,$STATION_MAX_POPULATION_DENSITY_25KM,$STATION_CLIMATIC_ZONE,$STATION_NIGHTLIGHT_1KM,$STATION_NIGHTLIGHT_5KM,\ + $STATION_MAX_NIGHTLIGHT_25KM,$STATION_WHEAT_PRODUCTION,$STATION_RICE_PRODUCTION,$STATION_NOX_EMISSIONS,\ + $STATION_OMI_NO2_COLUMN,$STATION_HTAP_REGION,$STATION_ETOPO_ALT,$STATION_ETOPO_MIN_ALT_5KM,$STATION_ETOPO_RELATIVE_ALT,\ + $STATION_DOMINANT_LANDCOVER,$STATION_TOAR_CATEGORY,$NEW_ID);" + +done -- GitLab