diff --git a/tests/test_data.py b/tests/test_data.py index 565f073feff9d2ba5e7d5c706e7dac99dc63eb52..33dc800c6cd9fc8ce4782f9167fa32e90a8eaec1 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -33,7 +33,6 @@ from toardb.utils.database import get_db from datetime import datetime from unittest.mock import patch - # only datetime.now needs to be overridden because otherwise daterange-arguments would be provided as MagicMock-objects! class FixedDatetime(datetime): @classmethod @@ -262,10 +261,7 @@ class TestApps: # the data/map-endpoint is a special need of the analysis service def test_get_map_data(self, client, db): - fixed_time = datetime(2023, 7, 28, 12, 0, 0) - with patch('toardb.timeseries.crud.dt.datetime') as mock_datetime: - mock_datetime.now.return_value = fixed_time - response = client.get("/data/map/?variable_id=7&daterange=2012-12-16T21:00,2012-12-17T06:00") + response = client.get("/data/map/?variable_id=7&daterange=2012-12-16T21:00,2012-12-17T06:00") expected_status_code = 200 assert response.status_code == expected_status_code expected_resp = [{'timeseries_id': 1, 'value': 21.581}, @@ -281,6 +277,291 @@ class TestApps: assert response.json() == expected_resp + def test_get_data_with_fields(self, client, db): + fixed_time = datetime(2023, 7, 28, 12, 0, 0) + with patch('toardb.timeseries.crud.dt.datetime') as mock_datetime: + mock_datetime.now.return_value = fixed_time + response = client.get("/data/timeseries/1?fields=datetime,value") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = {'metadata': {'id': 1, + 'label': 'CMA', + 'order': 1, + 'sampling_frequency': 'hourly', + 'aggregation': 'mean', + 'data_start_date': '2003-09-07T15:30:00+00:00', + 'data_end_date': '2016-12-31T14:30:00+00:00', + 'data_origin': 'instrument', + 'data_origin_type': 'measurement', + 'provider_version': 'N/A', + 'sampling_height': 7.0, + 'additional_metadata': {}, + 'doi': '', + 'coverage': -1.0, + 'station': {'id': 2, + 'codes': ['SDZ54421'], + 'name': 'Shangdianzi', + 'coordinates': {'lat': 40.65, + 'lng': 117.106, + 'alt': 293.9}, + 'coordinate_validation_status': 'not checked', + 'country': 'China', + 'state': 'Beijing Shi', + 'type': 'unknown', + 'type_of_area': 'unknown', + 'timezone': 'Asia/Shanghai', + 'additional_metadata': {'dummy_info': 'Here is some ' + 'more information about the station'}, + 'roles': [], + 'annotations': [], + 'aux_images': [], + 'aux_docs': [], + 'aux_urls': [], + 'globalmeta': None, + 'changelog': []}, + 'variable': {'name': 'toluene', + 'longname': 'toluene', + 'displayname': 'Toluene', + 'cf_standardname': 'mole_fraction_of_toluene_in_air', + 'units': 'nmol mol-1', + 'chemical_formula': 'C7H8', + 'id': 7}, + 'programme': {'id': 0, + 'name': '', + 'longname': '', + 'homepage': '', + 'description': ''}, + 'roles': [{'id': 2, + 'role': 'resource provider', + 'status': 'active', + 'contact': {'id': 4, + 'person': None, + 'organisation': {'id': 1, + 'name': 'UBA', + 'longname': 'Umweltbundesamt', + 'kind': 'government', + 'city': 'Dessau-Roßlau', + 'postcode': '06844', + 'street_address': 'Wörlitzer Platz 1', + 'country': 'Germany', + 'homepage': 'https://www.umweltbundesamt.de', + 'contact_url': 'mailto:immission@uba.de'}}}], + 'changelog': None, + 'citation': 'Umweltbundesamt: time series of toluene at ' + 'Shangdianzi, accessed from the TOAR database on ' + '2023-07-28 12:00:00', + 'license': 'This data is published under a Creative Commons ' + 'Attribution 4.0 International (CC BY 4.0). ' + 'https://creativecommons.org/licenses/by/4.0/'}, + 'data': [{'datetime': '2012-12-16T21:00:00+00:00', 'value': 21.581}, + {'datetime': '2012-12-16T22:00:00+00:00', 'value': 13.734}, + {'datetime': '2012-12-16T23:00:00+00:00', 'value': 13.734}, + {'datetime': '2012-12-17T00:00:00+00:00', 'value': 7.848}, + {'datetime': '2012-12-17T01:00:00+00:00', 'value': 15.696}, + {'datetime': '2012-12-17T02:00:00+00:00', 'value': 11.772}, + {'datetime': '2012-12-17T03:00:00+00:00', 'value': 13.734}, + {'datetime': '2012-12-17T04:00:00+00:00', 'value': 19.62}, + {'datetime': '2012-12-17T05:00:00+00:00', 'value': 15.696}, + {'datetime': '2012-12-17T06:00:00+00:00', 'value': 5.886}] + } + assert response.json() == expected_resp + + + def test_get_data_with_fields_limited(self, client, db): + fixed_time = datetime(2023, 7, 28, 12, 0, 0) + with patch('toardb.timeseries.crud.dt.datetime') as mock_datetime: + mock_datetime.now.return_value = fixed_time + response = client.get("/data/timeseries/1?fields=datetime,value&limit=4") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = {'metadata': {'id': 1, + 'label': 'CMA', + 'order': 1, + 'sampling_frequency': 'hourly', + 'aggregation': 'mean', + 'data_start_date': '2003-09-07T15:30:00+00:00', + 'data_end_date': '2016-12-31T14:30:00+00:00', + 'data_origin': 'instrument', + 'data_origin_type': 'measurement', + 'provider_version': 'N/A', + 'sampling_height': 7.0, + 'additional_metadata': {}, + 'doi': '', + 'coverage': -1.0, + 'station': {'id': 2, + 'codes': ['SDZ54421'], + 'name': 'Shangdianzi', + 'coordinates': {'lat': 40.65, + 'lng': 117.106, + 'alt': 293.9}, + 'coordinate_validation_status': 'not checked', + 'country': 'China', + 'state': 'Beijing Shi', + 'type': 'unknown', + 'type_of_area': 'unknown', + 'timezone': 'Asia/Shanghai', + 'additional_metadata': {'dummy_info': 'Here is some ' + 'more information about the station'}, + 'roles': [], + 'annotations': [], + 'aux_images': [], + 'aux_docs': [], + 'aux_urls': [], + 'globalmeta': None, + 'changelog': []}, + 'variable': {'name': 'toluene', + 'longname': 'toluene', + 'displayname': 'Toluene', + 'cf_standardname': 'mole_fraction_of_toluene_in_air', + 'units': 'nmol mol-1', + 'chemical_formula': 'C7H8', + 'id': 7}, + 'programme': {'id': 0, + 'name': '', + 'longname': '', + 'homepage': '', + 'description': ''}, + 'roles': [{'id': 2, + 'role': 'resource provider', + 'status': 'active', + 'contact': {'id': 4, + 'person': None, + 'organisation': {'id': 1, + 'name': 'UBA', + 'longname': 'Umweltbundesamt', + 'kind': 'government', + 'city': 'Dessau-Roßlau', + 'postcode': '06844', + 'street_address': 'Wörlitzer Platz 1', + 'country': 'Germany', + 'homepage': 'https://www.umweltbundesamt.de', + 'contact_url': 'mailto:immission@uba.de'}}}], + 'changelog': None, + 'citation': 'Umweltbundesamt: time series of toluene at ' + 'Shangdianzi, accessed from the TOAR database on ' + '2023-07-28 12:00:00', + 'license': 'This data is published under a Creative Commons ' + 'Attribution 4.0 International (CC BY 4.0). ' + 'https://creativecommons.org/licenses/by/4.0/'}, + 'data': [{'datetime': '2012-12-16T21:00:00+00:00', 'value': 21.581}, + {'datetime': '2012-12-16T22:00:00+00:00', 'value': 13.734}, + {'datetime': '2012-12-16T23:00:00+00:00', 'value': 13.734}, + {'datetime': '2012-12-17T00:00:00+00:00', 'value': 7.848}] + } + assert response.json() == expected_resp + + + def test_get_data_as_csv_with_fields(self, client, db): + fixed_time = datetime(2023, 7, 28, 12, 0, 0) + with patch('toardb.timeseries.crud.dt.datetime') as mock_datetime: + mock_datetime.now.return_value = fixed_time + response = client.get("/data/timeseries/1?format=csv&fields=datetime,value") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = ''.join(['#{\n', + '# "id": 1,\n', + '# "label": "CMA",\n', + '# "order": 1,\n', + '# "sampling_frequency": "hourly",\n', + '# "aggregation": "mean",\n', + '# "data_start_date": "2003-09-07T15:30:00+00:00",\n', + '# "data_end_date": "2016-12-31T14:30:00+00:00",\n', + '# "data_origin": "instrument",\n', + '# "data_origin_type": "measurement",\n', + '# "provider_version": "N/A",\n', + '# "sampling_height": 7.0,\n', + '# "additional_metadata": {},\n', + '# "data_license_accepted": null,\n', + '# "dataset_approved_by_provider": null,\n', + '# "doi": "",\n', + '# "coverage": -1.0,\n', + '# "station": {\n', + '# "id": 2,\n', + '# "codes": [\n', + '# "SDZ54421"\n', + '# ],\n', + '# "name": "Shangdianzi",\n', + '# "coordinates": {\n', + '# "lat": 40.65,\n', + '# "lng": 117.106,\n', + '# "alt": 293.9\n', + '# },\n', + '# "coordinate_validation_status": "not checked",\n', + '# "country": "China",\n', + '# "state": "Beijing Shi",\n', + '# "type": "unknown",\n', + '# "type_of_area": "unknown",\n', + '# "timezone": "Asia/Shanghai",\n', + '# "additional_metadata": {\n', + '# "dummy_info": "Here is some more information about the station"\n', + '# },\n', + '# "roles": [],\n', + '# "annotations": [],\n', + '# "aux_images": [],\n', + '# "aux_docs": [],\n', + '# "aux_urls": [],\n', + '# "globalmeta": null,\n', + '# "changelog": []\n', + '# },\n', + '# "variable": {\n', + '# "name": "toluene",\n', + '# "longname": "toluene",\n', + '# "displayname": "Toluene",\n', + '# "cf_standardname": "mole_fraction_of_toluene_in_air",\n', + '# "units": "nmol mol-1",\n', + '# "chemical_formula": "C7H8",\n', + '# "id": 7\n', + '# },\n', + '# "programme": {\n', + '# "id": 0,\n', + '# "name": "",\n', + '# "longname": "",\n', + '# "homepage": "",\n', + '# "description": ""\n', + '# },\n', + '# "roles": [\n', + '# {\n', + '# "id": 2,\n', + '# "role": "resource provider",\n', + '# "status": "active",\n', + '# "contact": {\n', + '# "id": 4,\n', + '# "person": null,\n', + '# "organisation": {\n', + '# "id": 1,\n', + '# "name": "UBA",\n', + '# "longname": "Umweltbundesamt",\n', + '# "kind": "government",\n', + '# "city": "Dessau-Roßlau",\n', + '# "postcode": "06844",\n', + '# "street_address": "Wörlitzer Platz 1",\n', + '# "country": "Germany",\n', + '# "homepage": "https://www.umweltbundesamt.de",\n', + '# "contact_url": "mailto:immission@uba.de"\n', + '# }\n', + '# }\n', + '# }\n', + '# ],\n', + '# "annotations": null,\n', + '# "changelog": null,\n', + '# "citation": "Umweltbundesamt: time series of toluene at Shangdianzi, accessed from the TOAR database on 2023-07-28 12:00:00",\n', + '# "attribution": null,\n', + '# "license": "This data is published under a Creative Commons Attribution 4.0 International (CC BY 4.0). https://creativecommons.org/licenses/by/4.0/"\n', + '#}\n', + 'datetime,value\n', + '2012-12-16 21:00:00+00:00,21.581\n', + '2012-12-16 22:00:00+00:00,13.734\n', + '2012-12-16 23:00:00+00:00,13.734\n', + '2012-12-17 00:00:00+00:00,7.848\n', + '2012-12-17 01:00:00+00:00,15.696\n', + '2012-12-17 02:00:00+00:00,11.772\n', + '2012-12-17 03:00:00+00:00,13.734\n', + '2012-12-17 04:00:00+00:00,19.62\n', + '2012-12-17 05:00:00+00:00,15.696\n', + '2012-12-17 06:00:00+00:00,5.886']) + assert response.text == expected_resp + + def test_get_no_data_with_variable_and_timerage(self, client, db): # see: https://gitlab.jsc.fz-juelich.de/esde/toar-data/toardb_fastapi/-/issues/171 response = client.get("/data/map/?variable_id=25&daterange=2012-12-16T21:00,2012-12-17T06:00") diff --git a/toardb/data/crud.py b/toardb/data/crud.py index d3d3aab7f681c4410420cce2357a59b7bc99440b..1d36bb534583f7c54798414d5d12275e8fe46e42 100644 --- a/toardb/data/crud.py +++ b/toardb/data/crud.py @@ -127,19 +127,25 @@ def get_data(db: Session, timeseries_id: int, path_params, query_params): flags = None limit, offset, fields, format, filters = create_filter(query_params, "data") d_filter = filters["d_filter"] + fields_list = [] + if fields: + fields_list = fields.split(',') + columns = ( [getattr(models.Data, field) for field in fields_list] + if fields_list + else list(models.Data.__table__.columns) ) except KeyError as e: status_code=400 return JSONResponse(status_code=status_code, content=str(e)) if flags: filter_string = create_filter_from_flags(flags) - data = db.query(models.Data).filter(models.Data.timeseries_id == timeseries_id). \ + data = db.query(*columns).filter(models.Data.timeseries_id == timeseries_id). \ filter(text(filter_string)). \ filter(text(d_filter)). \ - order_by(models.Data.datetime).all() + order_by(models.Data.datetime).limit(limit).all() else: - data = db.query(models.Data).filter(models.Data.timeseries_id == timeseries_id). \ + data = db.query(*columns).filter(models.Data.timeseries_id == timeseries_id). \ filter(text(d_filter)). \ - order_by(models.Data.datetime).all() + order_by(models.Data.datetime).limit(limit).all() # get advantages from pydantic, but without having another call of the REST API # (especially needed for testing with pytest!) metadata = get_timeseries_meta(timeseries_id) @@ -154,9 +160,9 @@ def get_data(db: Session, timeseries_id: int, path_params, query_params): # start with metadata content = '#' + metadata.json(indent=4, ensure_ascii=False).replace('\n', '\n#') + '\n' # add header - content += ','.join(column.name for column in models.Data.__mapper__.columns) + '\n' + content += ','.join(column.name for column in columns) + '\n' # now the data - content += '\n'.join(','.join(f"{getattr(curr, column.name)}" for column in models.Data.__mapper__.columns) for curr in data) + content += '\n'.join(','.join(f"{getattr(curr, column.name)}" for column in columns) for curr in data) return Response(content=content, media_type="text/csv") else: status_code=400 diff --git a/toardb/data/schemas.py b/toardb/data/schemas.py index d4b410ea021bd7af0af0f95b493465cb17a590a4..7ee000807231dbbceeecebee8b86e2a283ab8518 100644 --- a/toardb/data/schemas.py +++ b/toardb/data/schemas.py @@ -66,6 +66,11 @@ class DataCreate(DataBase): class Data(DataBase): + datetime: dt.datetime = None + value: float = None + flags: str = None + timeseries_id: int = None + version: str = None class Config: orm_mode = True diff --git a/toardb/utils/utils.py b/toardb/utils/utils.py index 71316d848ede4686ebbfbb353da5e4d80881a9df..5bdc4578b7a2ea8dd6af676a2f919477fdee08de 100644 --- a/toardb/utils/utils.py +++ b/toardb/utils/utils.py @@ -165,8 +165,6 @@ def create_filter(query_params, endpoint): # fields and format are no filter options fields = query_params.get("fields", None) - if fields and endpoint == "data": - raise KeyError(f"An unknown argument was received: fields.") format = query_params.get("format", 'json') allowed_params = allrel_params.copy() @@ -178,7 +176,7 @@ def create_filter(query_params, endpoint): elif endpoint in {'search'}: allowed_params |= gis_params | core_params | global_params | timeseries_params | roles_params | ambig_params elif endpoint in {'data'}: - allowed_params = {"limit", "offset" } | data_params | profiling_params + allowed_params |= data_params | profiling_params elif endpoint in {'variables'}: allowed_params |= variable_params elif endpoint in {'persons'}: