Skip to content
Snippets Groups Projects
Commit 08fe79e8 authored by Sabine Schröder's avatar Sabine Schröder
Browse files

#6: starting point for transfering data from old DB to new DB (DB approach...

#6: starting point for transfering data from old DB to new DB (DB approach (because only done once) -- no workflow)
parent b5b51424
No related branches found
No related tags found
No related merge requests found
Pipeline #38352 passed
#!/bin/bash
# script for transfering data (old DB: spec_samplingInterval) to table data(new DB)
#
# new design of data:
# Table "public.data"
# Column | Type | Collation | Nullable | Default
#--------------+--------------------------+-----------+----------+----------------------------------
#datetime | timestamp with time zone | | not null |
#value | double precision | | not null |
#flags | bigint | | not null |
#timeseries_id | integer | | not null |
#
# new (not available in old DB):
# - flags -- this is a combination of former flag, preliminary flag, ... (at the moment hardwired in this script!)
# - datetime -- with time zone included ==> This means copy within UTC!
#
# author: s.schroeder@fz-juelich.de
# date: 2020-06-05
HOURLY_SPEC="co no pm1 o3 no2 so2 ox aswdir pm10 rn ch4 wdir pm2p5 nox temp wspeed press cloudcover pblheight relhum totprecip u v albedo aswdifu humidity irradiance"
EVENT_SPEC="benzene toluene ethane propane mpxylene oxylene"
MONTHLY_SPEC="ch4 co"
for sampling in hourly event monthly
do
if [ "$sampling" = "hourly" ]
then
speclist=$HOURLY_SPEC
elif [ "$sampling" = "event" ]
then
speclist=$EVENT_SPEC
else
speclist=$MONTHLY_SPEC
fi
for SPEC in $speclist
do
for ID in ... # here id-list (SELECT all!)
echo "extracting data for timeseries ${ID}"
psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(datetime, '+00'), value, flag from ${SPEC}_${sampling} WHERE id=${ID}) TO 'data_export.txt'"
#do some adaptions to the data (especially flags=flag+prelimary+... (see above))
#...
psql -h localhost -U django toar2 -W -c "\COPY data (timeseries_id, date_time, value, flags) FROM 'data_export.txt'"
done
done
#!/bin/bash
# script for transfering table parameters (old DB) to table variable (new DB)
#
# new design of variable:
# Table "public.variable"
# Column | Type | Collation | Nullable | Default
#----------------------+------------------------+-----------+----------+--------------------------------------
#id | integer | | not null | nextval('variable_id_seq'::regclass)
#variable_name | character varying(32) | | not null |
#variable_longname | character varying(128) | | not null |
#variable_displayname | character varying(128) | | not null |
#variable_standardname | character varying(128) | | not null |
#variable_units | character varying(64) | | not null |
#variable_formula | character varying(128) | | not null |
#
# new (not available in old DB):
# id
# attention!!! variable_formula is defined as NOT NULL (but some variables do not have a formula!)
# attention!!! The above remark also holds for variable_standardname!
#
# author: s.schroeder@fz-juelich.de
# date: 2020-06-05
psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select * from parameters) TO 'parameters_20200103_export.txt'"
echo "null values of variable_formula have been manually exchanged!"
echo "null values of variable_standardname have been manually exchanged!"
psql -h localhost -U django toar2 -W -c "\COPY variable (variable_name, variable_longname, variable_displayname, variable_standardname, variable_units, variable_formula) FROM 'parameters_20200103_export.txt'"
#!/bin/bash
# example script (for country='China' and network_name='OTHER')
# converting old station_coordinates to PostGIS POINT structure
#
# new design of stationmeta_core:
# Table "public.stationmeta_core"
# Column | Type | Collation | Nullable | Default
#-----------------------------------+--------------------------+-----------+----------+----------------------------------------------
# id | integer | | not null | nextval('stationmeta_core_id_seq'::regclass)
# codes | character varying(16)[] | | |
# name | character varying(128) | | not null |
# coordinates | geometry(PointZ,4326) | | not null |
# country | character varying(128) | | not null |
# state | character varying(128) | | not null |
# coordinate_validation_status | integer | | not null |
# coordinate_validation_date | timestamp with time zone | | not null |
# type_of_environment | integer | | not null |
# type_of_area | integer | | not null |
# category | character varying(128) | | not null |
# timezone | character varying(64) | | not null |
# additional_metadata | jsonb | | not null |
# coordinate_validator_id | integer | | not null |
#
# new (not available in old DB):
# - coordinate_validation_date (now set to NOW())
# - station_coordinate_validator_id (now set to 1 (superuser: sschroeder))
#
# author: s.schroeder@fz-juelich.de
# date: 2020-06-05
psql -h zam10131 -U s.schroeder surface_observations_toar -W -c "\COPY (select numid,station_id,station_name,station_country,station_state,station_coordinate_status,station_lon,station_lat,station_alt from stations WHERE station_country='China' and network_name='OTHER') TO 'stations_China_20200605_export.txt'"
cut -c1-427 stations_China_20200103_export.txt | sed -e "s/-/0/g" | sed -e "s/$/ NOW() 1 SRID=4326;POINT(/g" > tmp_head.txt
cut -c430- stations_China_20200103_export.txt | sed -e "s/ / /g" | sed -e "s/$/)/g" > tmp_tail.txt
paste -d'\0' tmp_head.txt tmp_tail.txt > stations_China_20200605_import.txt
rm tmp_*.txt
psql -h localhost -U django toar2 -W -c "\COPY stationmeta_core FROM 'stations_China_20200605_import.txt'"
psql -h localhost -U django toar2 -W -c "UPDATE stationmeta_core SET country=TRIM(country),codes=TRIM(codes),name=TRIM(name),state=TRIM(state);"
#!/bin/bash
# example script (for country='China' and network_name='OTHER')
#
# new design of timeseries:
# Table "public.timeseries"
# Column | Type | Collation | Nullable | Default
#------------------------------+------------------------+-----------+----------+----------------------------------------
#id | integer | | not null | nextval('timeseries_id_seq'::regclass)
#label | character varying(128) | | not null |
#order | integer | | not null |
#access_rights | integer | | not null |
#sampling_frequency | integer | | not null |
#aggregation | integer | | not null |
#data_start_date | timestamp with time zone | | not null |
#data_end_date | timestamp with time zone | | not null |
#measurement_method | character varying(128) | | not null |
#sampling_height | double precision | | not null |
#additional_metadata | jsonb | | not null |
#date_added | timestamp with time zone | | not null |
#date_modified | timestamp with time zone | | not null |
#station_id | integer | | |
#variable_id | integer | | |
#
# new (not available in old DB):
# - sampling_height (just for now: put 10 (m) -- just invented!)
# - sampling_frequency (just for now: put "hourly" (because I know!!!) -- this is choice 0)
# - aggregation (just for now: put "mean: 1h" (because I know!!!) -- this is choice 0)
#!!!!!!!!!!!!! did we lose this information: # - calibration_report (just for now: put ' ' (because nothing is known))
# - evaluation_report (just for now: put ' ' (because nothing is known))
# - access_rights (was: parameter_status (now set embargoed data (formerly flagged by 1) to flag 2! -- script is hard-wired at the moment!)
# - change_record (just for now: 'test insertion' -- should at least be expanded by NOW() -- script is hard-wired at the moment!)
# - variable_id (was: parameter_name and no link to id of separate table) (just for now: ozone (9) -- because I know))
# attention! should label really be completely UNIQUE (this has not been before! -- for now: add id to it!)
# label: it has been discussion about, what to put here: last discussion was about parameter_contributor_shortname
#
#
# author: s.schroeder@fz-juelich.de
# date: 2020-06-05
psql -h zam10131.zam.kfa-juelich.de -U s.schroeder surface_observations_toar -W -c "\COPY (select id, CONCAT(id,'_',parameter_contributor_shortname), 10, 0, 0, parameter_measurement_method, parameter_original_units, '', '' , 2, comments, 'test insertion', 1, station_numid, 9 from parameter_series p, stations s WHERE p.station_numid=s.numid AND s.station_country='China' AND s.network_name='OTHER' AND p.parameter_name='o3') TO 'timeseries_China_20200605_export.txt'"
#now translate old DB to new design (at the moment: just fake data!)
cp timeseries_China_20200605_export.txt timeseries_China_20200605_import.txt
psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'timeseries_China_20200605_import.txt'"
psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);"
#!/bin/bash
# just to test right (automatic) values for date_added, date_modified
#
# author: s.schroeder@fz-juelich.de
# date: 2020-06-05
psql -h localhost -U django toar2 -W -c "\COPY timeseries FROM 'test_date_added.txt'"
sleep 2m
psql -h localhost -U django toar2 -W -c "UPDATE timeseries SET timeseries_label=TRIM(timeseries_label),timeseries_measurement_method=TRIM(timeseries_measurement_method),timeseries_original_units=TRIM(timeseries_original_units);"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment