From 08fe79e8f08a21c946ec279daee562fd0dadef93 Mon Sep 17 00:00:00 2001 From: schroeder5 <s.schroeder@fz-juelich.de> Date: Fri, 5 Jun 2020 14:44:45 +0200 Subject: [PATCH] #6: starting point for transfering data from old DB to new DB (DB approach (because only done once) -- no workflow) --- imports_from_TOAR1/get_data.sh | 46 ++++++++++++++++++++ imports_from_TOAR1/get_parameters_meta.sh | 29 ++++++++++++ imports_from_TOAR1/get_stations_core_meta.sh | 39 +++++++++++++++++ imports_from_TOAR1/get_timeseries_meta.sh | 46 ++++++++++++++++++++ imports_from_TOAR1/test_date_added.sh | 9 ++++ 5 files changed, 169 insertions(+) create mode 100755 imports_from_TOAR1/get_data.sh create mode 100755 imports_from_TOAR1/get_parameters_meta.sh create mode 100755 imports_from_TOAR1/get_stations_core_meta.sh create mode 100755 imports_from_TOAR1/get_timeseries_meta.sh create mode 100755 imports_from_TOAR1/test_date_added.sh diff --git a/imports_from_TOAR1/get_data.sh b/imports_from_TOAR1/get_data.sh new file mode 100755 index 0000000..f008424 --- /dev/null +++ b/imports_from_TOAR1/get_data.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# script for transfering data (old DB: spec_samplingInterval) to table data(new DB) +# +# new design of data: +# Table "public.data" +# Column | Type | Collation | Nullable | Default +#--------------+--------------------------+-----------+----------+---------------------------------- +#datetime | timestamp with time zone | | not null | +#value | double precision | | not null | +#flags | bigint | | not null | +#timeseries_id | integer | | not null | +# +# new (not available in old DB): +# - flags -- this is a combination of former flag, preliminary flag, ... (at the moment hardwired in this script!) +# - datetime -- with time zone included ==> This means copy within UTC! +# +# author: s.schroeder@fz-juelich.de +# date: 2020-06-05 + +HOURLY_SPEC="co no pm1 o3 no2 so2 ox aswdir pm10 rn ch4 wdir pm2p5 nox temp wspeed press cloudcover pblheight relhum totprecip u v albedo aswdifu humidity irradiance" +EVENT_SPEC="benzene toluene ethane propane mpxylene oxylene" +MONTHLY_SPEC="ch4 co" + +for sampling in hourly event monthly +do + if [ "$sampling" = "hourly" ] + then + speclist=$HOURLY_SPEC + elif [ "$sampling" = "event" ] + then + speclist=$EVENT_SPEC + else + speclist=$MONTHLY_SPEC + fi + for SPEC in $speclist + do + for ID in ... # here id-list (SELECT all!) + echo "extracting data for timeseries ${ID}" + psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(datetime, '+00'), value, flag from ${SPEC}_${sampling} WHERE id=${ID}) TO 'data_export.txt'" + + #do some adaptions to the data (especially flags=flag+prelimary+... (see above)) + #... + + psql -h localhost -U django toar2 -W -c "\COPY data (timeseries_id, date_time, value, flags) FROM 'data_export.txt'" + done +done diff --git a/imports_from_TOAR1/get_parameters_meta.sh b/imports_from_TOAR1/get_parameters_meta.sh new file mode 100755 index 0000000..d131858 --- /dev/null +++ b/imports_from_TOAR1/get_parameters_meta.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# script for transfering table parameters (old DB) to table variable (new DB) +# +# new design of variable: +# Table "public.variable" +# Column | Type | Collation | Nullable | Default +#----------------------+------------------------+-----------+----------+-------------------------------------- +#id | integer | | not null | nextval('variable_id_seq'::regclass) +#variable_name | character varying(32) | | not null | +#variable_longname | character varying(128) | | not null | +#variable_displayname | character varying(128) | | not null | +#variable_standardname | character varying(128) | | not null | +#variable_units | character varying(64) | | not null | +#variable_formula | character varying(128) | | not null | +# +# new (not available in old DB): +# id +# attention!!! variable_formula is defined as NOT NULL (but some variables do not have a formula!) +# attention!!! The above remark also holds for variable_standardname! +# +# author: s.schroeder@fz-juelich.de +# date: 2020-06-05 + +psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select * from parameters) TO 'parameters_20200103_export.txt'" + +echo "null values of variable_formula have been manually exchanged!" +echo "null values of variable_standardname have been manually exchanged!" + +psql -h localhost -U django toar2 -W -c "\COPY variable (variable_name, variable_longname, variable_displayname, variable_standardname, variable_units, variable_formula) FROM 'parameters_20200103_export.txt'" diff --git a/imports_from_TOAR1/get_stations_core_meta.sh b/imports_from_TOAR1/get_stations_core_meta.sh new file mode 100755 index 0000000..a878552 --- /dev/null +++ b/imports_from_TOAR1/get_stations_core_meta.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# example script (for country='China' and network_name='OTHER') +# converting old station_coordinates to PostGIS POINT structure +# +# new design of stationmeta_core: +# Table "public.stationmeta_core" +# Column | Type | Collation | Nullable | Default +#-----------------------------------+--------------------------+-----------+----------+---------------------------------------------- +# id | integer | | not null | nextval('stationmeta_core_id_seq'::regclass) +# codes | character varying(16)[] | | | +# name | character varying(128) | | not null | +# coordinates | geometry(PointZ,4326) | | not null | +# country | character varying(128) | | not null | +# state | character varying(128) | | not null | +# coordinate_validation_status | integer | | not null | +# coordinate_validation_date | timestamp with time zone | | not null | +# type_of_environment | integer | | not null | +# type_of_area | integer | | not null | +# category | character varying(128) | | not null | +# timezone | character varying(64) | | not null | +# additional_metadata | jsonb | | not null | +# coordinate_validator_id | integer | | not null | +# +# new (not available in old DB): +# - coordinate_validation_date (now set to NOW()) +# - station_coordinate_validator_id (now set to 1 (superuser: sschroeder)) +# +# author: s.schroeder@fz-juelich.de +# date: 2020-06-05 + +psql -h zam10131 -U s.schroeder surface_observations_toar -W -c "\COPY (select numid,station_id,station_name,station_country,station_state,station_coordinate_status,station_lon,station_lat,station_alt from stations WHERE station_country='China' and network_name='OTHER') TO 'stations_China_20200605_export.txt'" + +cut -c1-427 stations_China_20200103_export.txt | sed -e "s/-/0/g" | sed -e "s/$/ NOW() 1 SRID=4326;POINT(/g" > tmp_head.txt +cut -c430- stations_China_20200103_export.txt | sed -e "s/ / /g" | sed -e "s/$/)/g" > tmp_tail.txt +paste -d'\0' tmp_head.txt tmp_tail.txt > stations_China_20200605_import.txt +rm tmp_*.txt + +psql -h localhost -U django toar2 -W -c "\COPY stationmeta_core FROM 'stations_China_20200605_import.txt'" +psql -h localhost -U django toar2 -W -c "UPDATE stationmeta_core SET country=TRIM(country),codes=TRIM(codes),name=TRIM(name),state=TRIM(state);" diff --git a/imports_from_TOAR1/get_timeseries_meta.sh b/imports_from_TOAR1/get_timeseries_meta.sh new file mode 100755 index 0000000..1570d55 --- /dev/null +++ b/imports_from_TOAR1/get_timeseries_meta.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# example script (for country='China' and network_name='OTHER') +# +# new design of timeseries: +# Table "public.timeseries" +# Column | Type | Collation | Nullable | Default +#------------------------------+------------------------+-----------+----------+---------------------------------------- +#id | integer | | not null | nextval('timeseries_id_seq'::regclass) +#label | character varying(128) | | not null | +#order | integer | | not null | +#access_rights | integer | | not null | +#sampling_frequency | integer | | not null | +#aggregation | integer | | not null | +#data_start_date | timestamp with time zone | | not null | +#data_end_date | timestamp with time zone | | not null | +#measurement_method | character varying(128) | | not null | +#sampling_height | double precision | | not null | +#additional_metadata | jsonb | | not null | +#date_added | timestamp with time zone | | not null | +#date_modified | timestamp with time zone | | not null | +#station_id | integer | | | +#variable_id | integer | | | +# +# new (not available in old DB): +# - sampling_height (just for now: put 10 (m) -- just invented!) +# - sampling_frequency (just for now: put "hourly" (because I know!!!) -- this is choice 0) +# - aggregation (just for now: put "mean: 1h" (because I know!!!) -- this is choice 0) +#!!!!!!!!!!!!! did we lose this information: # - calibration_report (just for now: put ' ' (because nothing is known)) +# - evaluation_report (just for now: put ' ' (because nothing is known)) +# - access_rights (was: parameter_status (now set embargoed data (formerly flagged by 1) to flag 2! -- script is hard-wired at the moment!) +# - change_record (just for now: 'test insertion' -- should at least be expanded by NOW() -- script is hard-wired at the moment!) +# - variable_id (was: parameter_name and no link to id of separate table) (just for now: ozone (9) -- because I know)) +# attention! should label really be completely UNIQUE (this has not been before! -- for now: add id to it!) +# label: it has been discussion about, what to put here: last discussion was about parameter_contributor_shortname +# +# +# author: s.schroeder@fz-juelich.de +# date: 2020-06-05 + +psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(id,'_',parameter_contributor_shortname), 10, 0, 0, parameter_measurement_method, parameter_original_units, '', '' , 2, comments, 'test insertion', 1, station_numid, 9 from parameter_series p, stations s WHERE p.station_numid=s.numid AND s.station_country='China' AND s.network_name='OTHER' AND p.parameter_name='o3') TO 'timeseries_China_20200605_export.txt'" + +#now translate old DB to new design (at the moment: just fake data!) +cp timeseries_China_20200605_export.txt timeseries_China_20200605_import.txt + +psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'timeseries_China_20200605_import.txt'" +psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);" diff --git a/imports_from_TOAR1/test_date_added.sh b/imports_from_TOAR1/test_date_added.sh new file mode 100755 index 0000000..a2973ba --- /dev/null +++ b/imports_from_TOAR1/test_date_added.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# just to test right (automatic) values for date_added, date_modified +# +# author: s.schroeder@fz-juelich.de +# date: 2020-06-05 + +psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'test_date_added.txt'" +sleep 2m +psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);" -- GitLab