Skip to content
Snippets Groups Projects
Commit 8c568e4d authored by Sabine Schröder's avatar Sabine Schröder
Browse files

script and dry run changes for merging stations

parent 66fe62ee
No related branches found
No related tags found
No related merge requests found
Pipeline #53696 passed
# -*- coding: utf-8 -*-
"""populate TOAR2 database via REST API
This should be done from pure text files (taken -- for example -- from old database)
Advantage: small database can be constructed by everyone
call:
python populate_database_via_REST.py TOKEN
"""
import sys
import getopt
import pandas as pd
from io import StringIO
import json
import requests
TOAR_SERVICE_URL = 'http://127.0.0.1:8000/'
# from old database:
# station_coordinate_status:
# -1 : not checked (default value)
# 0 : verified by google earth or other means
# 1 : verification not possible, but no reason to doubt that the measurement location should be accurate to within 100 metres or so
# 2 : unspecified potential issue with the station coordinates
# 3 : obvious error in station coordinate information
# 4 : severe mismatch between reported station altitude and google elevation at station location (> 100 m) indicating wrong station coordinates
# 5 : no coordinates available – given coordinates are completely invented!
# from new database: coordinate_validation_status:
# 0: 'NotChecked', 'not checked'), -- default value
# 1: 'Verified', 'verified'),
# 2: 'Plausible', 'plausible'),
# 3: 'Doubtful', 'doubtful'),
# 4: 'Unverifyable', 'not verifyable');
# ==> old new
# -1 0
# 0 1
# 1 2
# 2 3
# 3 4
# 4 4
# 5 3
tr_coord_status = { -1 : 'NotChecked',
0 : 'Verified',
1 : 'Plausible',
2 : 'Doubtful',
3 : 'Unverifyable',
4 : 'Unverifyable',
5 : 'Doubtful' }
tr_climatic_zone = {
-1 : 'Undefined',
0 : 'Unclassified',
1 : 'WarmTemperateMoist',
2 : 'WarmTemperateDry',
3 : 'CoolTemperateMoist',
4 : 'CoolTemperateDry',
5 : 'PolarMoist',
6 : 'PolarDry',
7 : 'BorealMoist',
8 : 'BorealDry',
9 : 'TropicalMontane',
10 : 'TropicalWet',
11 : 'TropicalMoist',
12 : 'TropicalDry' }
tr_type_of_environment = {
'unknown' : 'Unknown',
'background' : 'Background',
'traffic' : 'Traffic',
'industrial' : 'Industrial',
'other' : 'Other' }
tr_type_of_area = {
'unknown' : 'Unknown',
'urban' : 'Urban',
'suburban' : 'Suburban',
'rural' : 'Rural',
'remote' : 'Remote' }
if __name__ == "__main__":
access_token = sys.argv[1]
headers = { 'Content-Type': 'application/json',
'accept': 'application/json',
'Authorization': 'Token ' + access_token }
insecure_ssl=True
column_names = [
'numid',
'network_name',
'station_id',
'station_local_id',
'station_type',
'station_type_of_area',
'station_category',
'station_name',
'station_country',
'station_state',
'station_lon',
'station_lat',
'station_alt',
'station_timezone',
'station_nightlight_5km',
'station_climatic_zone',
'station_wheat_production',
'station_rice_production',
'station_nox_emissions',
'station_omi_no2_column',
'station_toar_category',
'station_htap_region',
'station_reported_alt',
'station_alt_flag',
'station_coordinate_status',
'station_google_alt',
'station_etopo_alt',
'station_etopo_min_alt_5km',
'station_etopo_relative_alt',
'station_dominant_landcover',
'station_landcover_description',
'station_max_nightlight_25km',
'station_max_population_density_25km',
'station_nightlight_1km',
'station_population_density',
'google_resolution',
'station_comments',
'station_max_population_density_5km' ]
df = pd.read_csv("stations_Germany_AIRBASE_20201029_export.txt", header=None, index_col=None,sep='\t',names=column_names)
for index, row in df.iterrows():
# create json structure for POST request
stationmeta = {"codes": [row['station_id'].strip()],
"name": row['station_name'].strip(),
"coordinates": {"lat": row['station_lat'],"lng": row['station_lon'],"alt": row['station_alt']},
"coordinate_validation_status": tr_coord_status[row['station_coordinate_status']],
"country": row['station_country'].strip(),
"state": row['station_state'].strip(),
"type_of_environment": tr_type_of_environment[row['station_type'].strip()],
"type_of_area": tr_type_of_area[row['station_type_of_area'].strip()],
"timezone": row['station_timezone'].strip(),
#to be done --> collect all data from old database!
"additional_metadata": "{}",
#to be done!
"roles": [],
"globalmeta": {"climatic_zone": tr_climatic_zone[row['station_climatic_zone']]}
}
data = {"stationmeta": stationmeta}
print(TOAR_SERVICE_URL + 'stationmeta/',json.dumps(data))
print("==>")
r = requests.post(TOAR_SERVICE_URL + 'stationmeta/',
data=json.dumps(data),
headers=headers,verify=insecure_ssl)
# to do (German station are not critical!):
# if code already exists, is it really the same station?
msg = r.text
print(f"{msg}\n")
......@@ -95,7 +95,45 @@ def get_unique_stationmeta_annotation(db: Session, text: str, contributor_id: in
return db_object
# this is just to fake what would be done! This is a dry-run!
def create_stationmeta(db: Session, engine: Engine, stationmeta: StationmetaCreate):
stationmeta_dict = stationmeta.dict()
roles_data = stationmeta_dict.pop('roles', None)
annotations_data = stationmeta_dict.pop('annotations', None)
aux_images_data = stationmeta_dict.pop('aux_images', None)
aux_docs_data = stationmeta_dict.pop('aux_docs', None)
aux_urls_data = stationmeta_dict.pop('aux_urls', None)
globalmeta_data = stationmeta_dict.pop('globalmeta', None)
globalservice_data = stationmeta_dict.pop('globalservice', None)
db_stationmeta = models.StationmetaCore(**stationmeta_dict)
# there's a mismatch with coordinates --> how to automatically switch back and forth?!
tmp_coordinates = db_stationmeta.coordinates
db_stationmeta.coordinates = get_geom_from_coordinates(Coordinates(**db_stationmeta.coordinates))
# there's also a mismatch with additional_metadata --> BUT: this should not be switched back!
# in upload command, we have now: "additional_metadata": "{}"
# but return from this method gives: "additional_metadata": {}
# ==> there is a mismatch between model(JSONB) and schema(JSON)
db_stationmeta.additional_metadata = str(db_stationmeta.additional_metadata).replace("'",'"')
fake_conn = engine.raw_connection()
fake_cur = fake_conn.cursor()
radius = 100
db_cmd = f"select * from stationmeta_core where ST_DistanceSphere(stationmeta_core.coordinates, '{db_stationmeta.coordinates}') < {radius}"
fake_cur.execute(db_cmd)
records = fake_cur.fetchall()
if records:
if len(records) == 1:
message=f'already in database --> patch record (add station code)'
else:
message = 'more than one station falls within the given radius!\n' + \
'choose which station record to patch (add station code)'
for record in records:
message += str(record)
else:
message=f'new station: {db_stationmeta.codes},{db_stationmeta.name},{db_stationmeta.coordinates}'
status_code=200
return JSONResponse(status_code=status_code, content=message)
def create_stationmeta_theOriginal(db: Session, engine: Engine, stationmeta: StationmetaCreate):
stationmeta_dict = stationmeta.dict()
roles_data = stationmeta_dict.pop('roles', None)
annotations_data = stationmeta_dict.pop('annotations', None)
......
......@@ -4,7 +4,7 @@ class StationmetaCore (Base)
============================
"""
from sqlalchemy import Column, DateTime, Float, ForeignKey, Integer, String, \
Text, CheckConstraint, Sequence
Text, text, CheckConstraint, Sequence
from geoalchemy2.types import Geometry
from sqlalchemy.orm import relationship
from sqlalchemy.dialects.postgresql import JSONB, ARRAY
......@@ -55,6 +55,7 @@ class StationmetaCore_WithoutCoords(Base):
Check constraints:
"stationmeta_core_type_of_area_check" CHECK (type_of_area >= 0)
"stationmeta_core_type_of_environment_check" CHECK (type_of_environment >= 0)
"stationmeta_core_coordinate_validation_status_check" CHECK (coordinate_validation_status >= 0)
Foreign-key constraints:
"stationmeta_core_type_of_area_fk_ta_vocabulary_enum_val" FOREIGN KEY (type_of_area) REFERENCES ta_vocabulary(enum_val)
"stationmeta_core_type_of_environment_fk_st_vocabulary_enum_val" FOREIGN KEY (type_of_environment) REFERENCES st_vocabulary(enum_val)
......@@ -71,7 +72,8 @@ class StationmetaCore_WithoutCoords(Base):
__tablename__ = 'stationmeta_core'
__table_args__ = (
CheckConstraint('type_of_area >= 0'),
CheckConstraint('type_of_environment >= 0')
CheckConstraint('type_of_environment >= 0'),
CheckConstraint('coordinate_validation_status >= 0')
)
id = Column(Integer, STATIONMETA_CORE_ID_SEQ, primary_key=True, server_default=STATIONMETA_CORE_ID_SEQ.next_value())
......@@ -100,3 +102,4 @@ class StationmetaCore_WithoutCoords(Base):
class StationmetaCore(StationmetaCore_WithoutCoords):
coordinates = Column(Geometry('POINTZ', 4326))
coordinate_validation_status = Column(ForeignKey('cv_vocabulary.enum_val'), nullable=False, server_default=text("'0'::integer"))
......@@ -422,7 +422,12 @@ class StationmetaRoleBase(BaseModel):
id: int = None
role: str
status: str
contact: Contact
# contact: Contact
# at the moment contact_id has to be given...
# ==> in the future: give unique contact_email
# patching stationmeta should not result in creating new contacts!
# ==> still to do: check, whether contact already exists (otherwise patching cannot be done)
contact_id: int
@validator('role')
def check_role(cls, v):
......
......@@ -53,7 +53,10 @@ def get_stationmeta_changelog(station_id: int, db: Session = Depends(get_db)):
# - get stationmeta_aux
# - ...
@router.post('/stationmeta/', response_model=schemas.StationmetaCreate)
# the original post command!
#@router.post('/stationmeta/', response_model=schemas.StationmetaCreate)
# now the dry run!
@router.post('/stationmeta/')
# The following command was not working as long as the upload via Body was defined.
# See bug report: https://github.com/tiangolo/fastapi/issues/300
# (Although this seems to be fixed in the meantime, it is not working in my FastAPI version.)
......@@ -63,7 +66,12 @@ def create_stationmeta_core(stationmeta: schemas.StationmetaCreate = Body(..., e
db_stationmeta_core= crud.get_stationmeta_core(db, station_code=station_code)
if db_stationmeta_core:
raise HTTPException(status_code=400, detail="Station already registered.")
return crud.create_stationmeta(db=db, engine=engine, stationmeta=stationmeta)
# the original post command!
# return crud.create_stationmeta(db=db, engine=engine, stationmeta=stationmeta)
# now the dry run!
response=crud.create_stationmeta(db=db, engine=engine, stationmeta=stationmeta)
msg = response.body.decode('utf-8')
raise HTTPException(status_code=200, detail=str(msg))
@router.patch('/stationmeta/{station_code}', response_model=schemas.StationmetaPatch)
def patch_stationmeta_core(station_code: str, description: str, stationmeta: schemas.StationmetaPatch = Body(..., embed = True), db: Session = Depends(get_db)):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment