From 08fe79e8f08a21c946ec279daee562fd0dadef93 Mon Sep 17 00:00:00 2001
From: schroeder5 <s.schroeder@fz-juelich.de>
Date: Fri, 5 Jun 2020 14:44:45 +0200
Subject: [PATCH] #6: starting point for transfering data from old DB to new DB
 (DB approach (because only done once) -- no workflow)

---
 imports_from_TOAR1/get_data.sh               | 46 ++++++++++++++++++++
 imports_from_TOAR1/get_parameters_meta.sh    | 29 ++++++++++++
 imports_from_TOAR1/get_stations_core_meta.sh | 39 +++++++++++++++++
 imports_from_TOAR1/get_timeseries_meta.sh    | 46 ++++++++++++++++++++
 imports_from_TOAR1/test_date_added.sh        |  9 ++++
 5 files changed, 169 insertions(+)
 create mode 100755 imports_from_TOAR1/get_data.sh
 create mode 100755 imports_from_TOAR1/get_parameters_meta.sh
 create mode 100755 imports_from_TOAR1/get_stations_core_meta.sh
 create mode 100755 imports_from_TOAR1/get_timeseries_meta.sh
 create mode 100755 imports_from_TOAR1/test_date_added.sh

diff --git a/imports_from_TOAR1/get_data.sh b/imports_from_TOAR1/get_data.sh
new file mode 100755
index 0000000..f008424
--- /dev/null
+++ b/imports_from_TOAR1/get_data.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# script for transfering data (old DB: spec_samplingInterval) to table data(new DB)
+#
+# new design of data:
+#                                       Table "public.data"
+#   Column     |           Type           | Collation | Nullable |             Default
+#--------------+--------------------------+-----------+----------+----------------------------------
+#datetime      | timestamp with time zone |           | not null |
+#value         | double precision         |           | not null |
+#flags         | bigint                   |           | not null |
+#timeseries_id | integer                  |           | not null |
+#
+# new (not available in old DB):
+# - flags -- this is a combination of former flag, preliminary flag, ... (at the moment hardwired in this script!)
+# - datetime -- with time zone included ==> This means copy within UTC!
+#
+# author: s.schroeder@fz-juelich.de
+# date:   2020-06-05
+
+HOURLY_SPEC="co no pm1 o3 no2 so2 ox aswdir pm10 rn ch4 wdir pm2p5 nox temp wspeed press cloudcover pblheight relhum totprecip u v albedo aswdifu humidity irradiance"
+EVENT_SPEC="benzene toluene ethane propane mpxylene oxylene"
+MONTHLY_SPEC="ch4 co"
+
+for sampling in hourly event monthly
+do
+  if [ "$sampling" = "hourly" ]
+  then
+    speclist=$HOURLY_SPEC
+  elif [ "$sampling" = "event" ]
+  then
+    speclist=$EVENT_SPEC
+  else
+    speclist=$MONTHLY_SPEC
+  fi
+  for SPEC in $speclist
+  do
+    for ID in ... # here id-list (SELECT all!)
+    echo "extracting data for timeseries ${ID}"
+    psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(datetime, '+00'), value, flag from ${SPEC}_${sampling} WHERE id=${ID}) TO 'data_export.txt'"
+
+    #do some adaptions to the data (especially flags=flag+prelimary+... (see above))
+    #...
+
+    psql -h localhost -U django toar2 -W -c "\COPY data (timeseries_id, date_time, value, flags) FROM 'data_export.txt'"
+  done
+done
diff --git a/imports_from_TOAR1/get_parameters_meta.sh b/imports_from_TOAR1/get_parameters_meta.sh
new file mode 100755
index 0000000..d131858
--- /dev/null
+++ b/imports_from_TOAR1/get_parameters_meta.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# script for transfering table parameters (old DB) to table variable (new DB)
+#
+# new design of variable:
+#                                          Table "public.variable"
+#       Column         |          Type          | Collation | Nullable |               Default
+#----------------------+------------------------+-----------+----------+--------------------------------------
+#id                    | integer                |           | not null | nextval('variable_id_seq'::regclass)
+#variable_name         | character varying(32)  |           | not null |
+#variable_longname     | character varying(128) |           | not null |
+#variable_displayname  | character varying(128) |           | not null |
+#variable_standardname | character varying(128) |           | not null |
+#variable_units        | character varying(64)  |           | not null |
+#variable_formula      | character varying(128) |           | not null |
+#
+# new (not available in old DB):
+# id
+# attention!!! variable_formula is defined as NOT NULL (but some variables do not have a formula!)
+# attention!!! The above remark also holds for variable_standardname!
+#
+# author: s.schroeder@fz-juelich.de
+# date:   2020-06-05
+
+psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select * from parameters) TO 'parameters_20200103_export.txt'"
+
+echo "null values of variable_formula have been manually exchanged!"
+echo "null values of variable_standardname have been manually exchanged!"
+
+psql -h localhost -U django toar2 -W -c "\COPY variable (variable_name, variable_longname, variable_displayname, variable_standardname, variable_units, variable_formula) FROM 'parameters_20200103_export.txt'"
diff --git a/imports_from_TOAR1/get_stations_core_meta.sh b/imports_from_TOAR1/get_stations_core_meta.sh
new file mode 100755
index 0000000..a878552
--- /dev/null
+++ b/imports_from_TOAR1/get_stations_core_meta.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# example script (for country='China' and network_name='OTHER')
+# converting old station_coordinates to PostGIS POINT structure
+#
+# new design of stationmeta_core:
+#                                                  Table "public.stationmeta_core"
+#              Column               |           Type           | Collation | Nullable |                   Default                                                                                         
+#-----------------------------------+--------------------------+-----------+----------+----------------------------------------------                                                                     
+# id                           | integer                  |           | not null | nextval('stationmeta_core_id_seq'::regclass)
+# codes                        | character varying(16)[]  |           |          |
+# name                         | character varying(128)   |           | not null |
+# coordinates                  | geometry(PointZ,4326)    |           | not null |
+# country                      | character varying(128)   |           | not null |
+# state                        | character varying(128)   |           | not null |
+# coordinate_validation_status | integer                  |           | not null |
+# coordinate_validation_date   | timestamp with time zone |           | not null |
+# type_of_environment          | integer                  |           | not null |
+# type_of_area                 | integer                  |           | not null |
+# category                     | character varying(128)   |           | not null |
+# timezone                     | character varying(64)    |           | not null |
+# additional_metadata          | jsonb                    |           | not null |
+# coordinate_validator_id      | integer                  |           | not null |
+#
+# new (not available in old DB):
+# - coordinate_validation_date (now set to NOW())
+# - station_coordinate_validator_id (now set to 1 (superuser: sschroeder))
+#
+# author: s.schroeder@fz-juelich.de
+# date:   2020-06-05
+
+psql -h zam10131 -U s.schroeder surface_observations_toar -W -c "\COPY (select numid,station_id,station_name,station_country,station_state,station_coordinate_status,station_lon,station_lat,station_alt from stations WHERE station_country='China' and network_name='OTHER') TO 'stations_China_20200605_export.txt'"
+
+cut -c1-427 stations_China_20200103_export.txt | sed -e "s/-/0/g" | sed -e "s/$/	NOW()	1	SRID=4326;POINT(/g" > tmp_head.txt
+cut -c430- stations_China_20200103_export.txt | sed -e "s/	/ /g" | sed -e "s/$/)/g" > tmp_tail.txt
+paste -d'\0' tmp_head.txt tmp_tail.txt > stations_China_20200605_import.txt
+rm tmp_*.txt
+
+psql -h localhost -U django toar2 -W -c "\COPY stationmeta_core FROM 'stations_China_20200605_import.txt'"
+psql -h localhost -U django toar2 -W -c "UPDATE stationmeta_core SET country=TRIM(country),codes=TRIM(codes),name=TRIM(name),state=TRIM(state);"
diff --git a/imports_from_TOAR1/get_timeseries_meta.sh b/imports_from_TOAR1/get_timeseries_meta.sh
new file mode 100755
index 0000000..1570d55
--- /dev/null
+++ b/imports_from_TOAR1/get_timeseries_meta.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# example script (for country='China' and network_name='OTHER')
+#
+# new design of timeseries:
+#                                              Table "public.timeseries"
+# Column             |          Type            | Collation | Nullable |                Default
+#------------------------------+------------------------+-----------+----------+----------------------------------------
+#id                  | integer                  |           | not null | nextval('timeseries_id_seq'::regclass)
+#label               | character varying(128)   |           | not null | 
+#order               | integer                  |           | not null | 
+#access_rights       | integer                  |           | not null | 
+#sampling_frequency  | integer                  |           | not null | 
+#aggregation         | integer                  |           | not null | 
+#data_start_date     | timestamp with time zone |           | not null | 
+#data_end_date       | timestamp with time zone |           | not null | 
+#measurement_method  | character varying(128)   |           | not null | 
+#sampling_height     | double precision         |           | not null | 
+#additional_metadata | jsonb                    |           | not null | 
+#date_added          | timestamp with time zone |           | not null | 
+#date_modified       | timestamp with time zone |           | not null | 
+#station_id          | integer                  |           |          | 
+#variable_id         | integer                  |           |          |
+#
+# new (not available in old DB):
+# - sampling_height (just for now: put 10 (m) -- just invented!)
+# - sampling_frequency (just for now: put "hourly" (because I know!!!) -- this is choice 0)
+# - aggregation (just for now: put "mean: 1h" (because I know!!!) -- this is choice 0)
+#!!!!!!!!!!!!! did we lose this information: # - calibration_report (just for now: put ' ' (because nothing is known))
+# - evaluation_report (just for now: put ' ' (because nothing is known))
+# - access_rights (was: parameter_status (now set embargoed data (formerly flagged by 1) to flag 2! -- script is hard-wired at the moment!)
+# - change_record (just for now: 'test insertion' -- should at least be expanded by NOW() -- script is hard-wired at the moment!)
+# - variable_id (was: parameter_name and no link to id of separate table) (just for now: ozone (9) -- because I know))
+# attention! should label really be completely UNIQUE (this has not been before! -- for now: add id to it!)
+# label: it has been discussion about, what to put here: last discussion was about parameter_contributor_shortname
+# 
+#
+# author: s.schroeder@fz-juelich.de
+# date:   2020-06-05
+
+psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(id,'_',parameter_contributor_shortname), 10, 0, 0, parameter_measurement_method, parameter_original_units, '', '' , 2, comments, 'test insertion', 1, station_numid, 9  from parameter_series p, stations s WHERE p.station_numid=s.numid AND s.station_country='China' AND s.network_name='OTHER' AND p.parameter_name='o3') TO 'timeseries_China_20200605_export.txt'"
+
+#now translate old DB to new design (at the moment: just fake data!)
+cp timeseries_China_20200605_export.txt timeseries_China_20200605_import.txt
+
+psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'timeseries_China_20200605_import.txt'"
+psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);"
diff --git a/imports_from_TOAR1/test_date_added.sh b/imports_from_TOAR1/test_date_added.sh
new file mode 100755
index 0000000..a2973ba
--- /dev/null
+++ b/imports_from_TOAR1/test_date_added.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# just to test right (automatic) values for date_added, date_modified
+#
+# author: s.schroeder@fz-juelich.de
+# date:   2020-06-05
+
+psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'test_date_added.txt'"
+sleep 2m
+psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);"
-- 
GitLab