diff --git a/imports_from_TOAR1/populate_database_via_REST.py b/imports_from_TOAR1/populate_database_via_REST.py new file mode 100644 index 0000000000000000000000000000000000000000..b774acd9a2ebf00898802339c95247638e732f31 --- /dev/null +++ b/imports_from_TOAR1/populate_database_via_REST.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +"""populate TOAR2 database via REST API + This should be done from pure text files (taken -- for example -- from old database) + Advantage: small database can be constructed by everyone + + call: + python populate_database_via_REST.py TOKEN +""" + +import sys +import getopt +import pandas as pd +from io import StringIO + +import json +import requests + +TOAR_SERVICE_URL = 'http://127.0.0.1:8000/' + +# from old database: +# station_coordinate_status: +# -1 : not checked (default value) +# 0 : verified by google earth or other means +# 1 : verification not possible, but no reason to doubt that the measurement location should be accurate to within 100 metres or so +# 2 : unspecified potential issue with the station coordinates +# 3 : obvious error in station coordinate information +# 4 : severe mismatch between reported station altitude and google elevation at station location (> 100 m) indicating wrong station coordinates +# 5 : no coordinates available – given coordinates are completely invented! + +# from new database: coordinate_validation_status: +# 0: 'NotChecked', 'not checked'), -- default value +# 1: 'Verified', 'verified'), +# 2: 'Plausible', 'plausible'), +# 3: 'Doubtful', 'doubtful'), +# 4: 'Unverifyable', 'not verifyable'); + +# ==> old new +# -1 0 +# 0 1 +# 1 2 +# 2 3 +# 3 4 +# 4 4 +# 5 3 +tr_coord_status = { -1 : 'NotChecked', + 0 : 'Verified', + 1 : 'Plausible', + 2 : 'Doubtful', + 3 : 'Unverifyable', + 4 : 'Unverifyable', + 5 : 'Doubtful' } + +tr_climatic_zone = { + -1 : 'Undefined', + 0 : 'Unclassified', + 1 : 'WarmTemperateMoist', + 2 : 'WarmTemperateDry', + 3 : 'CoolTemperateMoist', + 4 : 'CoolTemperateDry', + 5 : 'PolarMoist', + 6 : 'PolarDry', + 7 : 'BorealMoist', + 8 : 'BorealDry', + 9 : 'TropicalMontane', + 10 : 'TropicalWet', + 11 : 'TropicalMoist', + 12 : 'TropicalDry' } + +tr_type_of_environment = { + 'unknown' : 'Unknown', + 'background' : 'Background', + 'traffic' : 'Traffic', + 'industrial' : 'Industrial', + 'other' : 'Other' } + +tr_type_of_area = { + 'unknown' : 'Unknown', + 'urban' : 'Urban', + 'suburban' : 'Suburban', + 'rural' : 'Rural', + 'remote' : 'Remote' } + + +if __name__ == "__main__": + + access_token = sys.argv[1] + + headers = { 'Content-Type': 'application/json', + 'accept': 'application/json', + 'Authorization': 'Token ' + access_token } + insecure_ssl=True + + column_names = [ + 'numid', + 'network_name', + 'station_id', + 'station_local_id', + 'station_type', + 'station_type_of_area', + 'station_category', + 'station_name', + 'station_country', + 'station_state', + 'station_lon', + 'station_lat', + 'station_alt', + 'station_timezone', + 'station_nightlight_5km', + 'station_climatic_zone', + 'station_wheat_production', + 'station_rice_production', + 'station_nox_emissions', + 'station_omi_no2_column', + 'station_toar_category', + 'station_htap_region', + 'station_reported_alt', + 'station_alt_flag', + 'station_coordinate_status', + 'station_google_alt', + 'station_etopo_alt', + 'station_etopo_min_alt_5km', + 'station_etopo_relative_alt', + 'station_dominant_landcover', + 'station_landcover_description', + 'station_max_nightlight_25km', + 'station_max_population_density_25km', + 'station_nightlight_1km', + 'station_population_density', + 'google_resolution', + 'station_comments', + 'station_max_population_density_5km' ] + df = pd.read_csv("stations_Germany_AIRBASE_20201029_export.txt", header=None, index_col=None,sep='\t',names=column_names) + for index, row in df.iterrows(): + # create json structure for POST request + + stationmeta = {"codes": [row['station_id'].strip()], + "name": row['station_name'].strip(), + "coordinates": {"lat": row['station_lat'],"lng": row['station_lon'],"alt": row['station_alt']}, + "coordinate_validation_status": tr_coord_status[row['station_coordinate_status']], + "country": row['station_country'].strip(), + "state": row['station_state'].strip(), + "type_of_environment": tr_type_of_environment[row['station_type'].strip()], + "type_of_area": tr_type_of_area[row['station_type_of_area'].strip()], + "timezone": row['station_timezone'].strip(), + #to be done --> collect all data from old database! + "additional_metadata": "{}", + #to be done! + "roles": [], + "globalmeta": {"climatic_zone": tr_climatic_zone[row['station_climatic_zone']]} + } + data = {"stationmeta": stationmeta} + + print(TOAR_SERVICE_URL + 'stationmeta/',json.dumps(data)) + print("==>") + r = requests.post(TOAR_SERVICE_URL + 'stationmeta/', + data=json.dumps(data), + headers=headers,verify=insecure_ssl) + # to do (German station are not critical!): + # if code already exists, is it really the same station? + msg = r.text + print(f"{msg}\n") diff --git a/toardb/stationmeta/crud.py b/toardb/stationmeta/crud.py index 472ee5246c3ef5d9135bdb99aecece2823bb2221..1de1623c2b55d196a6ec13e420f0549ad9f71369 100644 --- a/toardb/stationmeta/crud.py +++ b/toardb/stationmeta/crud.py @@ -95,7 +95,45 @@ def get_unique_stationmeta_annotation(db: Session, text: str, contributor_id: in return db_object +# this is just to fake what would be done! This is a dry-run! def create_stationmeta(db: Session, engine: Engine, stationmeta: StationmetaCreate): + stationmeta_dict = stationmeta.dict() + roles_data = stationmeta_dict.pop('roles', None) + annotations_data = stationmeta_dict.pop('annotations', None) + aux_images_data = stationmeta_dict.pop('aux_images', None) + aux_docs_data = stationmeta_dict.pop('aux_docs', None) + aux_urls_data = stationmeta_dict.pop('aux_urls', None) + globalmeta_data = stationmeta_dict.pop('globalmeta', None) + globalservice_data = stationmeta_dict.pop('globalservice', None) + db_stationmeta = models.StationmetaCore(**stationmeta_dict) + # there's a mismatch with coordinates --> how to automatically switch back and forth?! + tmp_coordinates = db_stationmeta.coordinates + db_stationmeta.coordinates = get_geom_from_coordinates(Coordinates(**db_stationmeta.coordinates)) + # there's also a mismatch with additional_metadata --> BUT: this should not be switched back! + # in upload command, we have now: "additional_metadata": "{}" + # but return from this method gives: "additional_metadata": {} + # ==> there is a mismatch between model(JSONB) and schema(JSON) + db_stationmeta.additional_metadata = str(db_stationmeta.additional_metadata).replace("'",'"') + fake_conn = engine.raw_connection() + fake_cur = fake_conn.cursor() + radius = 100 + db_cmd = f"select * from stationmeta_core where ST_DistanceSphere(stationmeta_core.coordinates, '{db_stationmeta.coordinates}') < {radius}" + fake_cur.execute(db_cmd) + records = fake_cur.fetchall() + if records: + if len(records) == 1: + message=f'already in database --> patch record (add station code)' + else: + message = 'more than one station falls within the given radius!\n' + \ + 'choose which station record to patch (add station code)' + for record in records: + message += str(record) + else: + message=f'new station: {db_stationmeta.codes},{db_stationmeta.name},{db_stationmeta.coordinates}' + status_code=200 + return JSONResponse(status_code=status_code, content=message) + +def create_stationmeta_theOriginal(db: Session, engine: Engine, stationmeta: StationmetaCreate): stationmeta_dict = stationmeta.dict() roles_data = stationmeta_dict.pop('roles', None) annotations_data = stationmeta_dict.pop('annotations', None) diff --git a/toardb/stationmeta/models_core.py b/toardb/stationmeta/models_core.py index e4099fc74370b831c6d5eb1e21ff88818655751d..13468009188aad1d8460be0a04c84f6d1928b7ce 100644 --- a/toardb/stationmeta/models_core.py +++ b/toardb/stationmeta/models_core.py @@ -4,7 +4,7 @@ class StationmetaCore (Base) ============================ """ from sqlalchemy import Column, DateTime, Float, ForeignKey, Integer, String, \ - Text, CheckConstraint, Sequence + Text, text, CheckConstraint, Sequence from geoalchemy2.types import Geometry from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import JSONB, ARRAY @@ -55,6 +55,7 @@ class StationmetaCore_WithoutCoords(Base): Check constraints: "stationmeta_core_type_of_area_check" CHECK (type_of_area >= 0) "stationmeta_core_type_of_environment_check" CHECK (type_of_environment >= 0) + "stationmeta_core_coordinate_validation_status_check" CHECK (coordinate_validation_status >= 0) Foreign-key constraints: "stationmeta_core_type_of_area_fk_ta_vocabulary_enum_val" FOREIGN KEY (type_of_area) REFERENCES ta_vocabulary(enum_val) "stationmeta_core_type_of_environment_fk_st_vocabulary_enum_val" FOREIGN KEY (type_of_environment) REFERENCES st_vocabulary(enum_val) @@ -71,7 +72,8 @@ class StationmetaCore_WithoutCoords(Base): __tablename__ = 'stationmeta_core' __table_args__ = ( CheckConstraint('type_of_area >= 0'), - CheckConstraint('type_of_environment >= 0') + CheckConstraint('type_of_environment >= 0'), + CheckConstraint('coordinate_validation_status >= 0') ) id = Column(Integer, STATIONMETA_CORE_ID_SEQ, primary_key=True, server_default=STATIONMETA_CORE_ID_SEQ.next_value()) @@ -100,3 +102,4 @@ class StationmetaCore_WithoutCoords(Base): class StationmetaCore(StationmetaCore_WithoutCoords): coordinates = Column(Geometry('POINTZ', 4326)) + coordinate_validation_status = Column(ForeignKey('cv_vocabulary.enum_val'), nullable=False, server_default=text("'0'::integer")) diff --git a/toardb/stationmeta/schemas.py b/toardb/stationmeta/schemas.py index 9b818f0afe48ca92c4132d0fd9e35b6b88914427..b7bb7ad776ba3a49fa7838f56967bba77db4d973 100644 --- a/toardb/stationmeta/schemas.py +++ b/toardb/stationmeta/schemas.py @@ -422,7 +422,12 @@ class StationmetaRoleBase(BaseModel): id: int = None role: str status: str - contact: Contact +# contact: Contact +# at the moment contact_id has to be given... +# ==> in the future: give unique contact_email +# patching stationmeta should not result in creating new contacts! +# ==> still to do: check, whether contact already exists (otherwise patching cannot be done) + contact_id: int @validator('role') def check_role(cls, v): diff --git a/toardb/stationmeta/stationmeta.py b/toardb/stationmeta/stationmeta.py index 8efd0a447c2ab84bdb53e977d03f2aa2bcddb9d7..6fe2110f0477494144356379dc02bf738b5df5a2 100644 --- a/toardb/stationmeta/stationmeta.py +++ b/toardb/stationmeta/stationmeta.py @@ -53,7 +53,10 @@ def get_stationmeta_changelog(station_id: int, db: Session = Depends(get_db)): # - get stationmeta_aux # - ... -@router.post('/stationmeta/', response_model=schemas.StationmetaCreate) +# the original post command! +#@router.post('/stationmeta/', response_model=schemas.StationmetaCreate) +# now the dry run! +@router.post('/stationmeta/') # The following command was not working as long as the upload via Body was defined. # See bug report: https://github.com/tiangolo/fastapi/issues/300 # (Although this seems to be fixed in the meantime, it is not working in my FastAPI version.) @@ -63,7 +66,12 @@ def create_stationmeta_core(stationmeta: schemas.StationmetaCreate = Body(..., e db_stationmeta_core= crud.get_stationmeta_core(db, station_code=station_code) if db_stationmeta_core: raise HTTPException(status_code=400, detail="Station already registered.") - return crud.create_stationmeta(db=db, engine=engine, stationmeta=stationmeta) +# the original post command! +# return crud.create_stationmeta(db=db, engine=engine, stationmeta=stationmeta) +# now the dry run! + response=crud.create_stationmeta(db=db, engine=engine, stationmeta=stationmeta) + msg = response.body.decode('utf-8') + raise HTTPException(status_code=200, detail=str(msg)) @router.patch('/stationmeta/{station_code}', response_model=schemas.StationmetaPatch) def patch_stationmeta_core(station_code: str, description: str, stationmeta: schemas.StationmetaPatch = Body(..., embed = True), db: Session = Depends(get_db)):