diff --git a/toardb/data/crud.py b/toardb/data/crud.py index be3c2a4430d24f7a3c4f0852c4cbc77fc5840129..b7a2d689d8b8b2e1473e7618f48e130cf2d36243 100644 --- a/toardb/data/crud.py +++ b/toardb/data/crud.py @@ -17,7 +17,6 @@ from toardb.timeseries.crud import get_timeseries_by_unique_constraints import datetime as dt import pandas as pd from io import StringIO -from toardb.utils.database import engine def get_data(db: Session, timeseries_id: int): return db.query(models.Data).filter(models.Data.timeseries_id == timeseries_id).all() @@ -31,7 +30,7 @@ def get_all_data(db: Session, skip : int = 0, limit: int = None): return db.query(models.Data).offset(skip).limit(limit).all() -def create_data(db: Session, input_handle: UploadFile = File(...)): +def create_data(db: Session, engine: Engine, input_handle: UploadFile = File(...)): # a timeseries is defined by the unique_constraint of (station_id, variable_id, label) # station_id: from header # variable_id: from database (with variable_name -- from filename) @@ -40,7 +39,7 @@ def create_data(db: Session, input_handle: UploadFile = File(...)): variable_name = input_handle.filename.split('_')[0] variable = db.query(variables_models.Variable).filter(variables_models.Variable.name == variable_name).first() variable_id = variable.id - # get header information (station_id, contributor_shortname) + # get header information (station_id, contributor_shortname, timeshift_from_utc) line = '#bla' f = input_handle.file prev = pos = 0 @@ -51,6 +50,8 @@ def create_data(db: Session, input_handle: UploadFile = File(...)): station_id = line.split(':')[1] if key == "#contributor_shortname": label = line.split(':')[1] + if key == "#timeshift_from_utc": + timeoffset = dt.timedelta(hours=float(line.split(':')[1])) prev, pos = pos, f.tell() f.seek(prev) stationmeta_core = get_stationmeta_core(db=db,station_code=station_id) @@ -59,6 +60,8 @@ def create_data(db: Session, input_handle: UploadFile = File(...)): timeseries_id = timeseries.id # open SpooledTemporaryFile, skip header (and also try to insert timeseries_id!) df = pd.read_csv(input_handle.file, comment='#', header=None, sep=';',names=["time","value","flags"],parse_dates=["time"],index_col="time") + # substract timeshift to convert data to UTC + df.index = df.index - timeoffset # now insert the timeseries_id to the end of the data frame df.insert(2, 'timeseries_id', timeseries_id) # datetime needs timezone information @@ -69,13 +72,14 @@ def create_data(db: Session, input_handle: UploadFile = File(...)): buf.seek(0) fake_conn = engine.raw_connection() fake_cur = fake_conn.cursor() - # I really want to return whether the command worked or not! (this issue is now still open) try: fake_cur.copy_from(buf, 'data', sep=',', columns=('datetime','value','flags','timeseries_id')) fake_conn.commit() message = "Data successfully inserted." + status_code = 200 except: e = sys.exc_info()[0] message = "An error occurred in data insertion: %s" % (e,) - return JSONResponse(message) + status_code = 400 + return JSONResponse(status_code=status_code, content=message) diff --git a/toardb/data/data.py b/toardb/data/data.py index d3ffc8b89a9e9511f7b5a94f6975ea709c3b5947..55064f6006c1f8120f85eddba5bf7b803235ffd1 100644 --- a/toardb/data/data.py +++ b/toardb/data/data.py @@ -2,15 +2,22 @@ Simple test API for data management """ +import json from typing import List from fastapi import APIRouter, Depends, HTTPException, \ File, UploadFile from sqlalchemy.orm import Session +from sqlalchemy.engine import Engine from . import crud, schemas -from toardb.utils.database import ToarDbSession +from toardb.utils.database import ToarDbSession, engine router = APIRouter() +# Dependency +def get_engine(): + assert engine is not None + return engine + # Dependency def get_db(): try: @@ -20,7 +27,6 @@ def get_db(): db.close() - # plain views to post and get data #get all data of table data @@ -43,7 +49,7 @@ def get_data(timeseries_id: int, db: Session = Depends(get_db)): # - ... @router.post('/data/', response_model=schemas.DataCreate) -async def create_data(file: UploadFile = File(...), db: Session = Depends(get_db)): +async def create_data(file: UploadFile = File(...), db: Session = Depends(get_db), engine: Engine = Depends(get_engine)): # # the next three lines are automatically done by database management, # # but we do want helpful error messages! # db_data = crud.get_data_by_datetime_and_timeseriesid(db, datetime=data.datetime, timeseries_id=data.timeseries_id) @@ -53,5 +59,10 @@ async def create_data(file: UploadFile = File(...), db: Session = Depends(get_db # BUT: # we want to upload a whole file! # - return crud.create_data(db, input_handle=file) + response = crud.create_data(db, engine, input_handle=file) + msg = response.body.decode('utf-8') + msg2 = '"An error occurred in data insertion: <class \'psycopg2.errors.UniqueViolation\'>"' + if (msg == msg2): + raise HTTPException(status_code=400, detail="Data for timeseries already registered.") + return response diff --git a/toardb/data/fixtures/toluene_SDZ54421_2013_2013_v1-0.dat b/toardb/data/fixtures/toluene_SDZ54421_2013_2013_v1-0.dat new file mode 100644 index 0000000000000000000000000000000000000000..dc8d05c301579c5a513f50e57a7ff3c7b99b6423 --- /dev/null +++ b/toardb/data/fixtures/toluene_SDZ54421_2013_2013_v1-0.dat @@ -0,0 +1,60 @@ +#individual_harmonize.py: automatically created (datetimeformat (input): %d/%m/%Y %H:%M) +#individual_harmonize.py: date: 2020-04-07 21:26 +#individual_harmonize.py: version of data: 1.0 +#station_id: SDZ54421 +#station_name: Shangdianzi +#station_country: China +#station_lat: 40.65 +#station_lon: 117.17 +#station_alt: 293.9 +#timeshift_from_utc: 8.0 +#time_reporting: begin_of_interval +#original_units: ppb +#station_local_id: SDZ54421 +#station_state: Beijing Shi +#station_timezone: Asia/Shanghai +#station_type_of_area: rural +#contributor: China Meteorological Administration +#contributor_shortname: CMA +#contributor_country: China +#dataset_pi: Néstor Xu Xiaobin +#dataset_pi_email: xuxb@camscma.cn +#Time; Value; Flag +2013-12-15 10:00; 33.353; 0 +2013-12-15 11:00; 58.859; 0 +2013-12-15 12:00; 66.706; 0 +2013-12-15 13:00; 64.744; 0 +2013-12-15 14:00; 72.592; 0 +2013-12-15 15:00; 56.897; 0 +2013-12-15 16:00; 41.201; 0 +2013-12-15 17:00; 35.315; 0 +2013-12-15 18:00; 37.277; 0 +2013-12-15 19:00; 31.391; 0 +2013-12-15 20:00; 23.543; 0 +2013-12-15 21:00; 7.848; 0 +2013-12-15 22:00; 1.962; 0 +2013-12-16 08:00; 3.924; 0 +2013-12-16 09:00; 9.810; 0 +2013-12-16 10:00; 31.391; 0 +2013-12-16 11:00; 52.973; 0 +2013-12-16 12:00; 66.706; 0 +2013-12-16 13:00; 74.554; 0 +2013-12-16 14:00; 74.554; 0 +2013-12-16 15:00; 66.706; 0 +2013-12-16 16:00; 51.011; 0 +2013-12-16 17:00; 41.201; 0 +2013-12-16 18:00; 37.277; 0 +2013-12-16 19:00; 31.391; 0 +2013-12-16 20:00; 21.581; 0 +2013-12-16 21:00; 13.734; 0 +2013-12-16 22:00; 13.734; 0 +2013-12-16 23:00; 7.848; 0 +2013-12-17 00:00; 15.696; 0 +2013-12-17 01:00; 11.772; 0 +2013-12-17 02:00; 13.734; 0 +2013-12-17 03:00; 19.620; 0 +2013-12-17 04:00; 15.696; 0 +2013-12-17 05:00; 5.886; 0 +2013-12-17 06:00; 3.924; 0 +2013-12-17 08:00; 1.962; 0 +2013-12-17 09:00; 23.543; 0 diff --git a/toardb/data/test_base.py b/toardb/data/test_base.py index ad1ef701aff826fdba0d25316e8e3de227e20744..d85318992098d439453904db993fdffe77981668 100644 --- a/toardb/data/test_base.py +++ b/toardb/data/test_base.py @@ -3,8 +3,9 @@ import pytest from starlette.testclient import TestClient from typing import Optional, AsyncIterable from sqlalchemy import create_engine -from sqlalchemy.engine import Engine as Database +from sqlalchemy.engine import Engine from sqlalchemy.orm import Session +from sqlalchemy.orm import sessionmaker from sqlalchemy_utils import database_exists, create_database, drop_database from toardb.utils.database import DATABASE_URL @@ -15,19 +16,18 @@ from toardb.variables.models import Base as VariableBase from toardb.stationmeta.models import Base as StationmetaBase from toardb.timeseries.models import Base as TimeseriesBase from toardb.toardb import app -from toardb.data.data import get_db +from toardb.data.data import get_db, get_engine url = str(DATABASE_URL+ "_test") _db_conn = create_engine(url) -def get_test_db_conn() -> Database: +def get_test_engine() -> Engine: assert _db_conn is not None return _db_conn -def get_test_db() -> AsyncIterable[Session]: - sess = Session(bind=_db_conn) - +def get_test_db(): + sess = sessionmaker(bind=_db_conn,autoflush=False,autocommit=False)() try: yield sess finally: @@ -91,6 +91,7 @@ def create_test_database(): Base.metadata.create_all(_db_conn) # Create the tables. #try with the basics app.dependency_overrides[get_db] = get_test_db # Mock the Database Dependency + app.dependency_overrides[get_engine] = get_test_engine # Mock the Database Dependency yield # Run the tests. drop_database(url) # Drop the test database. @@ -99,8 +100,7 @@ def create_test_database(): def test_db_session(): """Returns an sqlalchemy session, and after the test tears down everything properly.""" - session = Session(bind=_db_conn) - + session = sessionmaker(bind=_db_conn,autoflush=False,autocommit=False)() yield session # Drop all data after each test for tbl in reversed(AuthUserBase.metadata.sorted_tables + diff --git a/toardb/data/test_data.py b/toardb/data/test_data.py index c2d4ac664c8717510f183e0defffe01f89b9788d..ef26e109144f6042d4282a53b0c4fe89171c3347 100644 --- a/toardb/data/test_data.py +++ b/toardb/data/test_data.py @@ -1,7 +1,5 @@ import pytest import json -import io -from fastapi import File, UploadFile from .models import Data from toardb.timeseries.models import Timeseries from toardb.stationmeta.models import StationmetaCore @@ -101,7 +99,6 @@ class TestApps: for entry in metajson: new_stationmeta_core = StationmetaCore(**entry) # there's a mismatch with coordinates --> how to automatically switch back and forth?! - tmp_coordinates = new_stationmeta_core.coordinates new_stationmeta_core.coordinates = get_geom_from_coordinates(Coordinates(**new_stationmeta_core.coordinates)) # there's also a mismatch with additional_metadata --> BUT: this should not be switched back! # in upload command, we have now: "additional_metadata": "{}" @@ -175,19 +172,16 @@ class TestApps: def test_insert_new(self, client, db): - response = client.post("/data/", files={"file": open("toardb/data/fixtures/toluene_SDZ54421_2012_2012_v1-0.dat", "rb")}) - print(response.json()) + response = client.post("/data/", files={"file": open("toardb/data/fixtures/toluene_SDZ54421_2013_2013_v1-0.dat", "rb")}) expected_status_code = 200 assert response.status_code == expected_status_code - expected_resp = response.json() + expected_resp = 'Data successfully inserted.' assert response.json() == expected_resp -# def test_insert_duplicate(self, client, db): -# response = client.post("/data/", -# ... -# ) -# expected_status_code = 400 -# assert response.status_code == expected_status_code -# expected_resp = {'detail': 'Data for timeseries already registered.'} -# assert response.json() == expected_resp + def test_insert_duplicate(self, client, db): + response = client.post("/data/", files={"file": open("toardb/data/fixtures/toluene_SDZ54421_2012_2012_v1-0.dat", "rb")}) + expected_status_code = 400 + assert response.status_code == expected_status_code + expected_resp = {'detail': 'Data for timeseries already registered.'} + assert response.json() == expected_resp diff --git a/toardb/timeseries/crud.py b/toardb/timeseries/crud.py index f7042912f79459e2af80bdfaa8dda0a767ad74a8..7abde3481ef1108e29f592650953c15dcc1d93e2 100644 --- a/toardb/timeseries/crud.py +++ b/toardb/timeseries/crud.py @@ -27,15 +27,13 @@ def get_all_timeseries(db: Session, skip : int = 0, limit: int = None): def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_id: int, label: str): - with db.no_autoflush: - db_object = db.query(models.Timeseries).filter(models.Timeseries.station_id == station_id) \ - .filter(models.Timeseries.variable_id == variable_id) \ - .filter(models.Timeseries.label == label.strip()) \ - .first() - print(db_object) - # there is a mismatch with additional_metadata - if db_object: - db_object.additional_metadata = str(db_object.additional_metadata) + db_object = db.query(models.Timeseries).filter(models.Timeseries.station_id == station_id) \ + .filter(models.Timeseries.variable_id == variable_id) \ + .filter(models.Timeseries.label == label.strip()) \ + .first() + # there is a mismatch with additional_metadata + if db_object: + db_object.additional_metadata = str(db_object.additional_metadata) return db_object