#!/bin/bash # example script for transfering all UBA stations # (country='Germany' and network_name='UBA') # numid (now: id) will change --> log mapping: old_id <-> new_id # # Since UBA stations are the first to be inserted, they are unique. # Therefore, no check, if station exists already in database is done. # --> next version has to check this # --> station_id (and -- if different -- station_local_id) # can also be inserted into codes with no further checks # # TBD: list of allowed station_countries # (same holds for timezones!) # TBD: rename nightlight --> avg_nightlight # (does this also hold for population density?) # TBD: coordinate_validator_id --> 1 (sschroeder) --> for now OK (we do not have authentication method in place) # TBD: for UBA data: Where to put "UBA"? # # matching fields: # old | new | new table # ====================================|======================================|==========================| # network_name | pointing to programmes | timeseries_programmes |??? # station_id | codes | stationmeta_core | # station_local_id | codes | stationmeta_core | # station_type | type_of_environment | stationmeta_core | # station_type_of_area | type_of_area | stationmeta_core | # station_category | additional_metadata | stationmeta_core | # station_name | name | stationmeta_core | # station_country | country | stationmeta_core | # station_state | state | stationmeta_core | # station_lon | coordinates | stationmeta_core | # station_lat | coordinates | stationmeta_core | # station_alt | coordinates | stationmeta_core | # station_reported_alt | additional_metadata | stationmeta_core |??? # station_alt_flag | additional_metadata | stationmeta_core |??? # station_google_alt | additional_metadata | stationmeta_core |??? # google_resolution | additional_metadata | stationmeta_core |??? # station_coordinate_status | coordinate_validation_status | stationmeta_core | # station_timezone | timezone | stationmeta_core | # station_landcover_description | additional_metadata | stationmeta_core |??? # station_max_population_density_5km | additional_metadata | stationmeta_core |??? # station_nightlight_5km | nightlight_5km_year2013 | stationmeta_global | # station_nightlight_1km | nightlight_1km_year2013 | stationmeta_global | # station_max_nightlight_25km | max_nightlight_25km_year2013 | stationmeta_global | # station_climatic_zone | climatic_zone | stationmeta_global | # station_wheat_production | wheat_production_year2000 | stationmeta_global | # station_rice_production | rice_production_year2000 | stationmeta_global | # station_nox_emissions | edgar_htap_v2_nox_emissions_year2010 | stationmeta_global | # station_omi_no2_column | omi_no2_column_years2011to2015 | stationmeta_global | # station_toar_category | toar1_category | stationmeta_global | # station_htap_region | htap_region_tier1 | stationmeta_global | # station_etopo_alt | etopo_alt | stationmeta_global | # station_etopo_min_alt_5km | etopo_min_alt_5km | stationmeta_global | # station_etopo_relative_alt | etopo_relative_alt | stationmeta_global | # station_dominant_landcover | dominant_landcover_year2012 | stationmeta_global | # station_population_density | population_density_year2010 | stationmeta_global | # station_max_population_density_25km | max_population_density_25km_year2010 | stationmeta_global | # station_comments | pointing to annotations | stationmeta_annotations | # # converting old station_coordinates to PostGIS POINT structure # converting type_of_environment to controlled vocabulary (ST_vocabulary) # converting type_of_area to controlled vocabulary (TA_vocabulary) # converting coordinate_validation_status to controlled vocabulary (CV_vocabulary) # # The following fields from old database already use the same values as the controlled vocabulary: # - climatic_zone: CZ_vocabulary # - toar1_category: TC_vocabulary # - htap_region_tier1: TR_vocabulary # - dominant_landcover_year2012: DL_vocabulary # UBA data: station_climatic_zone (old DB) is either -1, 1, 2, 3, or 4 # UBA data: station_toar_category (old DB) is either -1, 0, 1, 2, or 3 # UBA data: station_htap_region (old DB) is either -1, 2, or 4 # UBA data: station_dominant_landcover (old DB) is either -1, 0, 1, 5, 10, 11, 12, 13, 14 # # new (not available in old DB): # - coordinate_validation_date (now set to NOW()) # - coordinate_validator_id (now set to 1 (superuser: sschroeder)) # # author: s.schroeder@fz-juelich.de # date: 2020-07-05 # Hash tables (associative arrays) # Be careful to declare the hash table as associative array (strings as indices allowed)! # otherwise (tested from https://sysware.computer/linux/scripte_variablen_arrays.html): # Wenn der Index mit dem ein Arrayelement adressiert wird keine natürliche Zahl ist, sondern ein negativer Wert (z.B.-1) # oder ein Bruch (z.B. 0.5) dann führt das zu einer Fehlermeldung. Ist der Index ein String, dann wird er mit dem Wert 0 gleich gesetzt, # so dass auf des Arrayelement mit dem Index 0 zugeriffen wird. # Wird kein Index angegeben, also nur der Name des Arrays benutzt, so erfolgt der Zugriff ebenfalls auf das Arrayelement mit dem Index 0. # UBA data: station_coordinate_status (old DB) is either -1 or 0: declare -A station_coordinate_status # station_coordinate_status=-1 (not checked) --> coordinate_validation_status=0 ('not checked') # station_coordinate_status=0 (verified by google earth or other means) --> coordinate_validation_status=1 ('verified') # OTHER VALUES TO BE DISCUSSED!!! station_coordinate_status=(["-1"]="0" ["0"]="1") # UBA data: station_type (old DB) is either background, unknown, industrial or traffic declare -A station_type station_type=(["unknown"]="0" ["background"]="1" ["traffic"]="2" ["industrial"]="3" ["other"]="4") # UBA data: station_type_of_area (old DB) is either rural, urban, suburban, or unknown declare -A station_type_of_area station_type_of_area=(["unknown"]="0" ["urban"]="1" ["suburban"]="2" ["rural"]="3" ["remote"]="4") ORDERED_COLUMNS="numid,network_name,station_id,station_local_id,station_type,station_type_of_area,station_category,\ station_name,station_country,station_state,station_lon,station_lat,station_alt,station_timezone,\ station_nightlight_5km,station_climatic_zone,station_wheat_production,station_rice_production,\ station_nox_emissions,station_omi_no2_column,station_toar_category,station_htap_region,station_reported_alt,\ station_alt_flag,station_coordinate_status,station_google_alt,station_etopo_alt,station_etopo_min_alt_5km,\ station_etopo_relative_alt,station_dominant_landcover,station_landcover_description,station_max_nightlight_25km,\ station_max_population_density_25km,station_nightlight_1km,station_population_density,google_resolution,\ station_comments,station_max_population_density_5km" #just for now (see above) COORDINATE_VALIDATOR_ID=1 #psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select numid from stations WHERE network_name='UBA' ORDER BY numid) TO 'UBA_stations.txt'" for STATION in `cat UBA_stations.txt` do COUNTER=0 while IFS='|' read -a row do if [ $COUNTER -eq 2 ] then NUMID=${row[0]} # echo "NUMID: $NUMID" NETWORK_NAME=${row[1]} # echo "NETWORK_NAME: $NETWORK_NAME" STATION_ID=${row[2]} # echo "STATION_ID: $STATION_ID" STATION_LOCAL_ID=${row[3]} # echo "STATION_LOCAL_ID: $STATION_LOCAL_ID" STATION_TYPE=${row[4]} # echo "STATION_TYPE: $STATION_TYPE" STATION_TYPE_OF_AREA=${row[5]} # echo "STATION_TYPE_OF_AREA: $STATION_TYPE_OF_AREA" STATION_CATEGORY=${row[6]} # echo "STATION_CATEGORY: $STATION_CATEGORY" STATION_NAME=${row[7]} # echo "STATION_NAME: $STATION_NAME" STATION_COUNTRY=${row[8]} # echo "STATION_COUNTRY: $STATION_COUNTRY" STATION_STATE=${row[9]} # echo "STATION_STATE: $STATION_STATE" STATION_LON=${row[10]} # echo "STATION_LON: $STATION_LON" STATION_LAT=${row[11]} # echo "STATION_LAT: $STATION_LAT" STATION_ALT=${row[12]} # echo "STATION_ALT: $STATION_ALT" STATION_TIMEZONE=${row[13]} # echo "STATION_TIMEZONE: $STATION_TIMEZONE" STATION_NIGHTLIGHT_5KM=${row[14]} # echo "STATION_NIGHTLIGHT_5KM: $STATION_NIGHTLIGHT_5KM" STATION_CLIMATIC_ZONE=${row[15]} # echo "STATION_CLIMATIC_ZONE: $STATION_CLIMATIC_ZONE" STATION_WHEAT_PRODUCTION=${row[16]} # echo "STATION_WHEAT_PRODUCTION: $STATION_WHEAT_PRODUCTION" STATION_RICE_PRODUCTION=${row[17]} # echo "STATION_RICE_PRODUCTION: $STATION_RICE_PRODUCTION" STATION_NOX_EMISSIONS=${row[18]} # echo "STATION_NOX_EMISSIONS: $STATION_NOX_EMISSIONS" STATION_OMI_NO2_COLUMN=${row[19]} # echo "STATION_OMI_NO2_COLUMN: $STATION_OMI_NO2_COLUMN" STATION_TOAR_CATEGORY=${row[20]} # echo "STATION_TOAR_CATEGORY: $STATION_TOAR_CATEGORY" STATION_HTAP_REGION=${row[21]} # echo "STATION_HTAP_REGION: $STATION_HTAP_REGION" STATION_REPORTED_ALT=${row[22]} # echo "STATION_REPORTED_ALT: $STATION_REPORTED_ALT" STATION_ALT_FLAG=${row[23]} # echo "STATION_ALT_FLAG: $STATION_ALT_FLAG" STATION_COORDINATE_STATUS=${row[24]} # echo "STATION_COORDINATE_STATUS: $STATION_COORDINATE_STATUS" STATION_GOOGLE_ALT=${row[25]} # echo "STATION_GOOGLE_ALT: $STATION_GOOGLE_ALT" STATION_ETOPO_ALT=${row[26]} # echo "STATION_ETOPO_ALT: $STATION_ETOPO_ALT" STATION_ETOPO_MIN_ALT_5KM=${row[27]} # echo "STATION_ETOPO_MIN_ALT_5KM: $STATION_ETOPO_MIN_ALT_5KM" STATION_ETOPO_RELATIVE_ALT=${row[28]} # echo "STATION_ETOPO_RELATIVE_ALT: $STATION_ETOPO_RELATIVE_ALT" STATION_DOMINANT_LANDCOVER=${row[29]} # echo "STATION_DOMINANT_LANDCOVER: $STATION_DOMINANT_LANDCOVER" STATION_LANDCOVER_DESCRIPTION=${row[30]} # echo "STATION_LANDCOVER_DESCRIPTION: $STATION_LANDCOVER_DESCRIPTION" STATION_MAX_NIGHTLIGHT_25KM=${row[31]} # echo "STATION_MAX_NIGHTLIGHT_25KM: $STATION_MAX_NIGHTLIGHT_25KM" STATION_MAX_POPULATION_DENSITY_25KM=${row[32]} # echo "STATION_MAX_POPULATION_DENSITY_25KM: $STATION_MAX_POPULATION_DENSITY_25KM" STATION_NIGHTLIGHT_1KM=${row[33]} # echo "STATION_NIGHTLIGHT_1KM: $STATION_NIGHTLIGHT_1KM" STATION_POPULATION_DENSITY=${row[34]} # echo "STATION_POPULATION_DENSITY: $STATION_POPULATION_DENSITY" GOOGLE_RESOLUTION=${row[35]} # echo "GOOGLE_RESOLUTION: $GOOGLE_RESOLUTION" STATION_COMMENTS=${row[36]} # echo "STATION_COMMENTS: $STATION_COMMENTS" STATION_MAX_POPULATION_DENSITY_5KM=${row[37]} # echo "STATION_MAX_POPULATION_DENSITY_5KM: $STATION_MAX_POPULATION_DENSITY_5KM" fi let COUNTER=COUNTER+1 done < <(echo "SELECT ${ORDERED_COLUMNS} FROM stations WHERE numid=${STATION}" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder) # 1. stationmeta_core # =================== TRIMMED_VAR=`echo -n "${STATION_COORDINATE_STATUS//[[:space:]]/}"` COORDINATE_VALIDATION_STATUS=${station_coordinate_status[$TRIMMED_VAR]} TRIMMED_VAR=`echo -n "${STATION_TYPE//[[:space:]]/}"` TYPE_OF_ENVIRONMENT=${station_type[$TRIMMED_VAR]} TRIMMED_VAR=`echo -n "${STATION_TYPE_OF_AREA//[[:space:]]/}"` TYPE_OF_AREA=${station_type_of_area[$TRIMMED_VAR]} # trim codes TRIMMED_ID=`echo -n "${STATION_ID//[[:space:]]/}"` TRIMMED_LID=`echo -n "${STATION_LOCAL_ID//[[:space:]]/}"` if [ "$TRIMMED_ID" = "$TRIMMED_LID" ] then CODES="$TRIMMED_ID" else CODES="$TRIMMED_ID, $TRIMMED_LID" fi # fill additional_metadata # accessing special fields within PostgreSQL is then possible via (f. ex.) # select additional_metadata->>'station_reported_alt' from stationmeta_core; TRIMMED_STATION_REPORTED_ALT=`echo -n "${STATION_REPORTED_ALT//[[:space:]]/}"` TRIMMED_STATION_ALT_FLAG=`echo -n "${STATION_ALT_FLAG//[[:space:]]/}"` TRIMMED_STATION_GOOGLE_ALT=`echo -n "${STATION_GOOGLE_ALT//[[:space:]]/}"` TRIMMED_GOOGLE_RESOLUTION=`echo -n "${GOOGLE_RESOLUTION//[[:space:]]/}"` TRIMMED_STATION_LANDCOVER_DESCRIPTION=`echo -n "${STATION_LANDCOVER_DESCRIPTION//[[:space:]]/}"` TRIMMED_STATION_MAX_POPULATION_DENSITY_5KM=`echo -n "${STATION_MAX_POPULATION_DENSITY_5KM//[[:space:]]/}"` ADDITIONAL_METADATA="{\"station_reported_alt\":\"$TRIMMED_STATION_REPORTED_ALT\",\ \"station_alt_flag\":\"$TRIMMED_STATION_ALT_FLAG\",\ \"station_google_alt\":\"$TRIMMED_STATION_GOOGLE_ALT\",\ \"google_resolution\":\"$TRIMMED_GOOGLE_RESOLUTION\",\ \"station_landcover_description\":\"$TRIMMED_STATION_LANDCOVER_DESCRIPTION\",\ \"station_max_population_density_5km\":\"$TRIMMED_STATION_MAX_POPULATION_DENSITY_5KM\"}" res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO stationmeta_core\ (codes,name,coordinates,country,state,coordinate_validation_status,coordinate_validation_date,type_of_environment,\ type_of_area,timezone,additional_metadata,coordinate_validator_id) VALUES \ ('{$CODES}',TRIM('$STATION_NAME'),ST_SetSRID(ST_MakePoint($STATION_LON,$STATION_LAT,$STATION_ALT),4326),TRIM('$STATION_COUNTRY'),\ TRIM('$STATION_STATE'),$COORDINATE_VALIDATION_STATUS,NOW(),$TYPE_OF_ENVIRONMENT,$TYPE_OF_AREA,TRIM('$STATION_TIMEZONE'),\ '$ADDITIONAL_METADATA',$COORDINATE_VALIDATOR_ID) RETURNING id;"` # log new id NEW_ID=$(echo $res | cut -f2 | cut -f3 -d' ') echo "$NUMID $NEW_ID" >>stations_oldID_newID.txt # 2. stationmeta_global # ===================== psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO stationmeta_global\ (population_density_year2010,max_population_density_25km_year2010,climatic_zone,nightlight_1km_year2013,nightlight_5km_year2013,\ max_nightlight_25km_year2013,wheat_production_year2000,rice_production_year2000,edgar_htap_v2_nox_emissions_year2010,\ omi_no2_column_years2011to2015,htap_region_tier1,etopo_alt,etopo_min_alt_5km,etopo_relative_alt,dominant_landcover_year2012,\ toar1_category,station_id) VALUES \ ($STATION_POPULATION_DENSITY,$STATION_MAX_POPULATION_DENSITY_25KM,$STATION_CLIMATIC_ZONE,$STATION_NIGHTLIGHT_1KM,$STATION_NIGHTLIGHT_5KM,\ $STATION_MAX_NIGHTLIGHT_25KM,$STATION_WHEAT_PRODUCTION,$STATION_RICE_PRODUCTION,$STATION_NOX_EMISSIONS,\ $STATION_OMI_NO2_COLUMN,$STATION_HTAP_REGION,$STATION_ETOPO_ALT,$STATION_ETOPO_MIN_ALT_5KM,$STATION_ETOPO_RELATIVE_ALT,\ $STATION_DOMINANT_LANDCOVER,$STATION_TOAR_CATEGORY,$NEW_ID);" # 3. stationmeta_annotations # ========================== echo "no station_comments available for UBA! TBD for other networks!" done