Skip to content
Snippets Groups Projects
get_stations_meta.sh 16.6 KiB
Newer Older
#!/bin/bash
# example script for transfering all UBA stations
# (country='Germany' and network_name='UBA')
# numid (now: id) will change --> log mapping: old_id <-> new_id
#
# Since UBA stations are the first to be inserted, they are unique.
# Therefore, no check, if station exists already in database is done.
# --> next version has to check this
# --> station_id (and -- if different -- station_local_id)
#     can also be inserted into codes with no further checks
#
# TBD: list of allowed station_countries
#      (same holds for timezones!)
# TBD: rename nightlight --> avg_nightlight
#      (does this also hold for population density?)
# TBD: coordinate_validator_id --> 1 (sschroeder) --> for now OK (we do not have authentication method in place)
# TBD: for UBA data: Where to put "UBA"?
#
# matching fields:
# old                                 | new                                  | new table
# ====================================|======================================|==========================|
# network_name                        | pointing to programmes               | timeseries_programmes    |???
# station_id                          | codes                                | stationmeta_core         |
# station_local_id                    | codes                                | stationmeta_core         |
# station_type                        | type_of_environment                  | stationmeta_core         |
# station_type_of_area                | type_of_area                         | stationmeta_core         |
# station_category                    | additional_metadata                  | stationmeta_core         |
# station_name                        | name                                 | stationmeta_core         |
# station_country                     | country                              | stationmeta_core         |
# station_state                       | state                                | stationmeta_core         |
# station_lon                         | coordinates                          | stationmeta_core         |
# station_lat                         | coordinates                          | stationmeta_core         |
# station_alt                         | coordinates                          | stationmeta_core         |
# station_reported_alt                | additional_metadata                  | stationmeta_core         |???
# station_alt_flag                    | additional_metadata                  | stationmeta_core         |???
# station_google_alt                  | additional_metadata                  | stationmeta_core         |???
# google_resolution                   | additional_metadata                  | stationmeta_core         |???
# station_coordinate_status           | coordinate_validation_status         | stationmeta_core         |
# station_timezone                    | timezone                             | stationmeta_core         |
# station_landcover_description       | additional_metadata                  | stationmeta_core         |???
# station_max_population_density_5km  | additional_metadata                  | stationmeta_core         |???
# station_nightlight_5km              | nightlight_5km_year2013              | stationmeta_global       |
# station_nightlight_1km              | nightlight_1km_year2013              | stationmeta_global       |
# station_max_nightlight_25km         | max_nightlight_25km_year2013         | stationmeta_global       |
# station_climatic_zone               | climatic_zone                        | stationmeta_global       |
# station_wheat_production            | wheat_production_year2000            | stationmeta_global       |
# station_rice_production             | rice_production_year2000             | stationmeta_global       |
# station_nox_emissions               | edgar_htap_v2_nox_emissions_year2010 | stationmeta_global       |
# station_omi_no2_column              | omi_no2_column_years2011to2015       | stationmeta_global       |
# station_toar_category               | toar1_category                       | stationmeta_global       |
# station_htap_region                 | htap_region_tier1                    | stationmeta_global       |
# station_etopo_alt                   | etopo_alt                            | stationmeta_global       |
# station_etopo_min_alt_5km           | etopo_min_alt_5km                    | stationmeta_global       |
# station_etopo_relative_alt          | etopo_relative_alt                   | stationmeta_global       |
# station_dominant_landcover          | dominant_landcover_year2012          | stationmeta_global       |
# station_population_density          | population_density_year2010          | stationmeta_global       |
# station_max_population_density_25km | max_population_density_25km_year2010 | stationmeta_global       |
# station_comments                    | pointing to annotations              | stationmeta_annotations  |
#
# converting old station_coordinates to PostGIS POINT structure
# converting type_of_environment to controlled vocabulary (ST_vocabulary)
# converting type_of_area to controlled vocabulary (TA_vocabulary)
# converting coordinate_validation_status to controlled vocabulary (CV_vocabulary)
#
# The following fields from old database already use the same values as the controlled vocabulary:
# - climatic_zone: CZ_vocabulary
# - toar1_category: TC_vocabulary
# - htap_region_tier1: TR_vocabulary
# - dominant_landcover_year2012: DL_vocabulary 
# UBA data: station_climatic_zone (old DB) is either -1, 1, 2, 3, or 4
# UBA data: station_toar_category (old DB) is either -1, 0, 1, 2, or 3
# UBA data: station_htap_region (old DB) is either -1, 2, or 4
# UBA data: station_dominant_landcover (old DB) is either -1, 0, 1, 5, 10, 11, 12, 13, 14
#
# new (not available in old DB):
# - coordinate_validation_date (now set to NOW())
# - coordinate_validator_id (now set to 1 (superuser: sschroeder))
#
# author: s.schroeder@fz-juelich.de
# date:   2020-07-05

# Hash tables (associative arrays)

# Be careful to declare the hash table as associative array (strings as indices allowed)!
# otherwise (tested from https://sysware.computer/linux/scripte_variablen_arrays.html):
# Wenn der Index mit dem ein Arrayelement adressiert wird keine natürliche Zahl ist, sondern ein negativer Wert (z.B.-1)
# oder ein Bruch (z.B. 0.5) dann führt das zu einer Fehlermeldung. Ist der Index ein String, dann wird er mit dem Wert 0 gleich gesetzt,
# so dass auf des Arrayelement mit dem Index 0 zugeriffen wird.
# Wird kein Index angegeben, also nur der Name des Arrays benutzt, so erfolgt der Zugriff ebenfalls auf das Arrayelement mit dem Index 0.

# UBA data: station_coordinate_status (old DB) is either -1 or 0:
declare -A station_coordinate_status
# station_coordinate_status=-1 (not checked) --> coordinate_validation_status=0 ('not checked')
# station_coordinate_status=0 (verified by google earth or other means) --> coordinate_validation_status=1 ('verified')
# OTHER VALUES TO BE DISCUSSED!!!
station_coordinate_status=(["-1"]="0" ["0"]="1")

# UBA data: station_type (old DB) is either background, unknown, industrial or traffic
declare -A station_type
station_type=(["unknown"]="0" ["background"]="1" ["traffic"]="2" ["industrial"]="3" ["other"]="4")

# UBA data: station_type_of_area (old DB) is either rural, urban, suburban, or unknown
declare -A station_type_of_area
station_type_of_area=(["unknown"]="0" ["urban"]="1" ["suburban"]="2" ["rural"]="3" ["remote"]="4")

ORDERED_COLUMNS="numid,network_name,station_id,station_local_id,station_type,station_type_of_area,station_category,\
                 station_name,station_country,station_state,station_lon,station_lat,station_alt,station_timezone,\
                 station_nightlight_5km,station_climatic_zone,station_wheat_production,station_rice_production,\
                 station_nox_emissions,station_omi_no2_column,station_toar_category,station_htap_region,station_reported_alt,\
                 station_alt_flag,station_coordinate_status,station_google_alt,station_etopo_alt,station_etopo_min_alt_5km,\
                 station_etopo_relative_alt,station_dominant_landcover,station_landcover_description,station_max_nightlight_25km,\
                 station_max_population_density_25km,station_nightlight_1km,station_population_density,google_resolution,\
                 station_comments,station_max_population_density_5km"
#psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select numid from stations WHERE network_name='UBA' ORDER BY numid) TO 'UBA_stations.txt'"
        if [  $COUNTER -eq 2 ]
        then
            NUMID=${row[0]}
#           echo "NUMID: $NUMID"
            NETWORK_NAME=${row[1]}
#           echo "NETWORK_NAME: $NETWORK_NAME"
            STATION_ID=${row[2]}
#           echo "STATION_ID: $STATION_ID"
            STATION_LOCAL_ID=${row[3]}
#           echo "STATION_LOCAL_ID: $STATION_LOCAL_ID"
            STATION_TYPE=${row[4]}
#           echo "STATION_TYPE: $STATION_TYPE"
            STATION_TYPE_OF_AREA=${row[5]}
#           echo "STATION_TYPE_OF_AREA: $STATION_TYPE_OF_AREA"
            STATION_CATEGORY=${row[6]}
#           echo "STATION_CATEGORY: $STATION_CATEGORY"
            STATION_NAME=${row[7]}
#           echo "STATION_NAME: $STATION_NAME"
            STATION_COUNTRY=${row[8]}
#           echo "STATION_COUNTRY: $STATION_COUNTRY"
            STATION_STATE=${row[9]}
#           echo "STATION_STATE: $STATION_STATE"
            STATION_LON=${row[10]}
#           echo "STATION_LON: $STATION_LON"
            STATION_LAT=${row[11]}
#           echo "STATION_LAT: $STATION_LAT"
            STATION_ALT=${row[12]}
#           echo "STATION_ALT: $STATION_ALT"
            STATION_TIMEZONE=${row[13]}
#           echo "STATION_TIMEZONE: $STATION_TIMEZONE"
            STATION_NIGHTLIGHT_5KM=${row[14]}
#           echo "STATION_NIGHTLIGHT_5KM: $STATION_NIGHTLIGHT_5KM"
            STATION_CLIMATIC_ZONE=${row[15]}
#           echo "STATION_CLIMATIC_ZONE: $STATION_CLIMATIC_ZONE"
            STATION_WHEAT_PRODUCTION=${row[16]}
#           echo "STATION_WHEAT_PRODUCTION: $STATION_WHEAT_PRODUCTION"
            STATION_RICE_PRODUCTION=${row[17]}
#           echo "STATION_RICE_PRODUCTION: $STATION_RICE_PRODUCTION"
            STATION_NOX_EMISSIONS=${row[18]}
#           echo "STATION_NOX_EMISSIONS: $STATION_NOX_EMISSIONS"
            STATION_OMI_NO2_COLUMN=${row[19]}
#           echo "STATION_OMI_NO2_COLUMN: $STATION_OMI_NO2_COLUMN"
            STATION_TOAR_CATEGORY=${row[20]}
#           echo "STATION_TOAR_CATEGORY: $STATION_TOAR_CATEGORY"
            STATION_HTAP_REGION=${row[21]}
#           echo "STATION_HTAP_REGION: $STATION_HTAP_REGION"
            STATION_REPORTED_ALT=${row[22]}
#           echo "STATION_REPORTED_ALT: $STATION_REPORTED_ALT"
            STATION_ALT_FLAG=${row[23]}
#           echo "STATION_ALT_FLAG: $STATION_ALT_FLAG"
            STATION_COORDINATE_STATUS=${row[24]}
#           echo "STATION_COORDINATE_STATUS: $STATION_COORDINATE_STATUS"
            STATION_GOOGLE_ALT=${row[25]}
#           echo "STATION_GOOGLE_ALT: $STATION_GOOGLE_ALT"
            STATION_ETOPO_ALT=${row[26]}
#           echo "STATION_ETOPO_ALT: $STATION_ETOPO_ALT"
            STATION_ETOPO_MIN_ALT_5KM=${row[27]}
#           echo "STATION_ETOPO_MIN_ALT_5KM: $STATION_ETOPO_MIN_ALT_5KM"
            STATION_ETOPO_RELATIVE_ALT=${row[28]}
#           echo "STATION_ETOPO_RELATIVE_ALT: $STATION_ETOPO_RELATIVE_ALT"
            STATION_DOMINANT_LANDCOVER=${row[29]}
#           echo "STATION_DOMINANT_LANDCOVER: $STATION_DOMINANT_LANDCOVER"
            STATION_LANDCOVER_DESCRIPTION=${row[30]}
#           echo "STATION_LANDCOVER_DESCRIPTION: $STATION_LANDCOVER_DESCRIPTION"
            STATION_MAX_NIGHTLIGHT_25KM=${row[31]}
#           echo "STATION_MAX_NIGHTLIGHT_25KM: $STATION_MAX_NIGHTLIGHT_25KM"
            STATION_MAX_POPULATION_DENSITY_25KM=${row[32]}
#           echo "STATION_MAX_POPULATION_DENSITY_25KM: $STATION_MAX_POPULATION_DENSITY_25KM"
            STATION_NIGHTLIGHT_1KM=${row[33]}
#           echo "STATION_NIGHTLIGHT_1KM: $STATION_NIGHTLIGHT_1KM"
            STATION_POPULATION_DENSITY=${row[34]}
#           echo "STATION_POPULATION_DENSITY: $STATION_POPULATION_DENSITY"
            GOOGLE_RESOLUTION=${row[35]}
#           echo "GOOGLE_RESOLUTION: $GOOGLE_RESOLUTION"
            STATION_COMMENTS=${row[36]}
#           echo "STATION_COMMENTS: $STATION_COMMENTS"
            STATION_MAX_POPULATION_DENSITY_5KM=${row[37]}
#           echo "STATION_MAX_POPULATION_DENSITY_5KM: $STATION_MAX_POPULATION_DENSITY_5KM"
        fi
        let COUNTER=COUNTER+1
    done < <(echo "SELECT ${ORDERED_COLUMNS} FROM stations WHERE numid=${STATION}" | psql -h zam10131.zam.kfa-juelich.de -d surface_observations_toar -U s.schroeder)

    # 1. stationmeta_core
    # ===================

    TRIMMED_VAR=`echo -n "${STATION_COORDINATE_STATUS//[[:space:]]/}"`
    COORDINATE_VALIDATION_STATUS=${station_coordinate_status[$TRIMMED_VAR]}

    TRIMMED_VAR=`echo -n "${STATION_TYPE//[[:space:]]/}"`
    TYPE_OF_ENVIRONMENT=${station_type[$TRIMMED_VAR]}

    TRIMMED_VAR=`echo -n "${STATION_TYPE_OF_AREA//[[:space:]]/}"`
    TYPE_OF_AREA=${station_type_of_area[$TRIMMED_VAR]}

    # trim codes
    TRIMMED_ID=`echo -n "${STATION_ID//[[:space:]]/}"`
    TRIMMED_LID=`echo -n "${STATION_LOCAL_ID//[[:space:]]/}"`
    if [ "$TRIMMED_ID" = "$TRIMMED_LID" ]
        CODES="$TRIMMED_ID"
    else
        CODES="$TRIMMED_ID, $TRIMMED_LID"
    # fill additional_metadata
    # accessing special fields within PostgreSQL is then possible via (f. ex.)
    # select additional_metadata->>'station_reported_alt' from stationmeta_core;
    TRIMMED_STATION_REPORTED_ALT=`echo -n "${STATION_REPORTED_ALT//[[:space:]]/}"`
    TRIMMED_STATION_ALT_FLAG=`echo -n "${STATION_ALT_FLAG//[[:space:]]/}"`
    TRIMMED_STATION_GOOGLE_ALT=`echo -n "${STATION_GOOGLE_ALT//[[:space:]]/}"`
    TRIMMED_GOOGLE_RESOLUTION=`echo -n "${GOOGLE_RESOLUTION//[[:space:]]/}"`
    TRIMMED_STATION_LANDCOVER_DESCRIPTION=`echo -n "${STATION_LANDCOVER_DESCRIPTION//[[:space:]]/}"`
    TRIMMED_STATION_MAX_POPULATION_DENSITY_5KM=`echo -n "${STATION_MAX_POPULATION_DENSITY_5KM//[[:space:]]/}"`
    ADDITIONAL_METADATA="{\"station_reported_alt\":\"$TRIMMED_STATION_REPORTED_ALT\",\
                          \"station_alt_flag\":\"$TRIMMED_STATION_ALT_FLAG\",\
                          \"station_google_alt\":\"$TRIMMED_STATION_GOOGLE_ALT\",\
                          \"google_resolution\":\"$TRIMMED_GOOGLE_RESOLUTION\",\
                          \"station_landcover_description\":\"$TRIMMED_STATION_LANDCOVER_DESCRIPTION\",\
                          \"station_max_population_density_5km\":\"$TRIMMED_STATION_MAX_POPULATION_DENSITY_5KM\"}"
    res=`psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO stationmeta_core\
	    (codes,name,coordinates,country,state,coordinate_validation_status,coordinate_validation_date,type_of_environment,\
             type_of_area,timezone,additional_metadata,coordinate_validator_id) VALUES \
	     ('{$CODES}',TRIM('$STATION_NAME'),ST_SetSRID(ST_MakePoint($STATION_LON,$STATION_LAT,$STATION_ALT),4326),TRIM('$STATION_COUNTRY'),\
             TRIM('$STATION_STATE'),$COORDINATE_VALIDATION_STATUS,NOW(),$TYPE_OF_ENVIRONMENT,$TYPE_OF_AREA,TRIM('$STATION_TIMEZONE'),\
	     '$ADDITIONAL_METADATA',$COORDINATE_VALIDATOR_ID) RETURNING id;"`
    # log new id
    NEW_ID=$(echo $res | cut -f2 | cut -f3 -d' ')
    echo "$NUMID $NEW_ID" >>stations_oldID_newID.txt

    # 2. stationmeta_global
    # =====================

    psql -h zam10116.zam.kfa-juelich.de -d toardb_v2 -U s.schroeder -c "INSERT INTO stationmeta_global\
	    (population_density_year2010,max_population_density_25km_year2010,climatic_zone,nightlight_1km_year2013,nightlight_5km_year2013,\
             max_nightlight_25km_year2013,wheat_production_year2000,rice_production_year2000,edgar_htap_v2_nox_emissions_year2010,\
             omi_no2_column_years2011to2015,htap_region_tier1,etopo_alt,etopo_min_alt_5km,etopo_relative_alt,dominant_landcover_year2012,\
	     toar1_category,station_id) VALUES \
	    ($STATION_POPULATION_DENSITY,$STATION_MAX_POPULATION_DENSITY_25KM,$STATION_CLIMATIC_ZONE,$STATION_NIGHTLIGHT_1KM,$STATION_NIGHTLIGHT_5KM,\
             $STATION_MAX_NIGHTLIGHT_25KM,$STATION_WHEAT_PRODUCTION,$STATION_RICE_PRODUCTION,$STATION_NOX_EMISSIONS,\
             $STATION_OMI_NO2_COLUMN,$STATION_HTAP_REGION,$STATION_ETOPO_ALT,$STATION_ETOPO_MIN_ALT_5KM,$STATION_ETOPO_RELATIVE_ALT,\
	     $STATION_DOMINANT_LANDCOVER,$STATION_TOAR_CATEGORY,$NEW_ID);"

    # 3. stationmeta_annotations
    # ==========================

    echo "no station_comments available for UBA! TBD for other networks!"