diff --git a/README.md b/README.md index 866ad3b08b795f1bfb87ff82f56371a8f0927b33..c0f56257775c3d9c2ed8f7ecb1503693144ef430 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ http://esde.pages.jsc.fz-juelich.de/toar-data/toardb_fastapi/docs/ After cloning the repository (`git clone https://gitlab.jsc.fz-juelich.de/esde/toar-data/toardb_fastapi.git`) it is recommended to create a virtual environment and install the requirements therein: ``` -python3 –m venv venv +python3 -m venv venv source venv/bin/activate -pip install –r requirements.txt +pip install -r requirements.txt ``` **If needed: installation and setup of database:** diff --git a/tests/test_search_aggregations.py b/tests/test_search_aggregations.py new file mode 100644 index 0000000000000000000000000000000000000000..9ead2beba10a5b7dbc7eebec3cec142b69a57af5 --- /dev/null +++ b/tests/test_search_aggregations.py @@ -0,0 +1,995 @@ +# SPDX-FileCopyrightText: 2021 Forschungszentrum Jülich GmbH +# SPDX-License-Identifier: MIT + +import pytest +import json +from sqlalchemy import insert +from toardb.timeseries.models import Timeseries, timeseries_timeseries_roles_table +from toardb.timeseries.models_programme import TimeseriesProgramme +from toardb.timeseries.models_role import TimeseriesRole +from toardb.stationmeta.models import StationmetaCore, StationmetaGlobal, StationmetaChangelog +from toardb.stationmeta.schemas import get_geom_from_coordinates, Coordinates +from toardb.variables.models import Variable +from toardb.contacts.models import Person, Organisation, Contact +from toardb.auth_user.models import AuthUser + +# Required imports 'create_test_database' +from toardb.test_base import ( + client, + get_test_db, + create_test_database, + url, + get_test_engine, + test_db_session as db, +) + + +class TestApps: + def setup(self): + self.application_url = "/timeseries/" + + """Set up all the data before each test + If you want the setup only once (per test module), + the scope argument is not working in the expected way, as discussed here: + https://stackoverflow.com/questions/45817153/py-test-fixture-use-function-fixture-in-scope-fixture + """ + + @pytest.fixture(autouse=True) + def setup_db_data(self, db): + _db_conn = get_test_engine() + # id_seq will not be reset automatically between tests! + fake_conn = _db_conn.raw_connection() + fake_cur = fake_conn.cursor() + fake_cur.execute("ALTER SEQUENCE auth_user_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE variables_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_core_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_global_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_roles_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_annotations_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_aux_doc_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_aux_image_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_aux_url_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE persons_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE organisations_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE contacts_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_annotations_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_roles_id_seq RESTART WITH 3") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_programmes_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_id_seq RESTART WITH 1") + fake_conn.commit() + infilename = "tests/fixtures/auth_user/auth.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_auth_user = AuthUser(**entry) + db.add(new_auth_user) + db.commit() + db.refresh(new_auth_user) + infilename = "tests/fixtures/contacts/persons.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_person = Person(**entry) + db.add(new_person) + db.commit() + db.refresh(new_person) + infilename = "tests/fixtures/contacts/organisations.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_organisation = Organisation(**entry) + db.add(new_organisation) + db.commit() + db.refresh(new_organisation) + infilename = "tests/fixtures/contacts/contacts.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_contact = Contact(**entry) + db.add(new_contact) + db.commit() + db.refresh(new_contact) + infilename = "tests/fixtures/variables/variables.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_variable = Variable(**entry) + db.add(new_variable) + db.commit() + db.refresh(new_variable) + infilename = "tests/fixtures/stationmeta/stationmeta_core.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_stationmeta_core = StationmetaCore(**entry) + # there's a mismatch with coordinates --> how to automatically switch back and forth?! + tmp_coordinates = new_stationmeta_core.coordinates + new_stationmeta_core.coordinates = get_geom_from_coordinates( + Coordinates(**new_stationmeta_core.coordinates) + ) + db.add(new_stationmeta_core) + db.commit() + db.refresh(new_stationmeta_core) + infilename = "tests/fixtures/stationmeta/stationmeta_changelog.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_stationmeta_changelog = StationmetaChangelog(**entry) + db.add(new_stationmeta_changelog) + db.commit() + db.refresh(new_stationmeta_changelog) + infilename = "tests/fixtures/stationmeta/stationmeta_global.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_stationmeta_global = StationmetaGlobal(**entry) + db.add(new_stationmeta_global) + db.commit() + db.refresh(new_stationmeta_global) + infilename = "tests/fixtures/timeseries/timeseries_programmes.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_timeseries_programme = TimeseriesProgramme(**entry) + db.add(new_timeseries_programme) + db.commit() + db.refresh(new_timeseries_programme) + infilename = "tests/fixtures/timeseries/timeseries.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_timeseries = Timeseries(**entry) + db.add(new_timeseries) + db.commit() + db.refresh(new_timeseries) + infilename = "tests/fixtures/timeseries/timeseries_roles.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_timeseries_role = TimeseriesRole(**entry) + db.add(new_timeseries_role) + db.commit() + db.refresh(new_timeseries_role) + infilename = "tests/fixtures/timeseries/timeseries_timeseries_roles.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + db.execute( + insert(timeseries_timeseries_roles_table).values( + timeseries_id=entry["timeseries_id"], role_id=entry["role_id"] + ) + ) + db.execute("COMMIT") + + def test_search_base(self, client, db): + response = client.get("/search/a?") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": {}, + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, + }, + } + ], + }, + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": { + "original_units": {"since_19740101000000": "nmol/mol"}, + "measurement_method": "uv_abs", + "absorption_cross_section": "Hearn 1961", + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, + }, + "doi": "", + "coverage": -1.0, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"lat": 36.256, "lng": 117.106, "alt": 1534.0}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, + } + ], + }, + ] + assert response.json() == expected_resp + + def test_search_single(self, client, db): + response = client.get("/search/a?id=2") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_origin_type": "measurement", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "coverage": -1.0, + "data_origin": "instrument", + "sampling_height": 7.0, + "provider_version": "N/A", + "doi": "", + "additional_metadata": { + "absorption_cross_section": "Hearn 1961", + "measurement_method": "uv_abs", + "original_units": {"since_19740101000000": "nmol/mol"}, + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, + }, + "roles": [ + { + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, + } + ], + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"alt": 1534.0, "lat": 36.256, "lng": 117.106}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "climatic_zone_year2016": "6 (warm temperate dry)", + "distance_to_major_road_year2020": -999.0, + "dominant_ecoregion_year2017": "-1 (undefined)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "ecoregion_description_25km_year2017": "", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "landcover_description_25km_year2012": "", + "max_stable_nightlights_25km_year1992": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_population_density_25km_year1990": -1.0, + "max_population_density_25km_year2015": -1.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year1990": -1.0, + "mean_population_density_5km_year2015": -1.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "mean_topography_srtm_alt_90m_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + } + ] + assert response.json() == expected_resp + + def test_search_plus(self, client, db): + response = client.get("/search/a?id=1+id=2") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": {}, + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, + }, + } + ], + }, + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": { + "original_units": {"since_19740101000000": "nmol/mol"}, + "measurement_method": "uv_abs", + "absorption_cross_section": "Hearn 1961", + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, + }, + "doi": "", + "coverage": -1.0, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"lat": 36.256, "lng": 117.106, "alt": 1534.0}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, + } + ], + }, + ] + assert response.json() == expected_resp + + def test_search_minus(self, client, db): + response = client.get("/search/a?id=2-id=2") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [] + assert response.json() == expected_resp + + def test_search_distinct(self, client, db): + response = client.get("/search/a?id=1+id=1+id=1+id=1") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": {}, + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, + }, + } + ], + }] + assert response.json() == expected_resp + + def test_search_complex(self, client, db): + response = client.get("/search/a?id=1+id=2-id=2") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": {}, + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, + }, + } + ], + }] + assert response.json() == expected_resp + + def test_inconsistent_fields(self, client, db): + response = client.get("/search/a?fields=id+fields=role") + expected_status_code = 400 + assert response.status_code == expected_status_code + + def test_consistent_fields(self, client, db): + response = client.get("/search/a?fields=id+fields=id") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [{"id": 1,}, {"id": 2}] + assert response.json() == expected_resp diff --git a/toardb/test_base.py b/toardb/test_base.py index 17125e880e85ae93890933a18c3f867e1ef64dd0..50084babfd8acdebcfb56dd5bf74cb982ce7050b 100644 --- a/toardb/test_base.py +++ b/toardb/test_base.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2021 Forschungszentrum Jülich GmbH # SPDX-License-Identifier: MIT -#taken from: https://github.com/tiangolo/fastapi/issues/831 +# taken from: https://github.com/tiangolo/fastapi/issues/831 import pytest from starlette.testclient import TestClient from sqlalchemy import create_engine @@ -15,15 +15,16 @@ from toardb.toardb import app from toardb.auth_user.models import AuthUser from toardb.utils.database import DATABASE_URL, get_db, get_engine from toardb.utils.utils import ( - get_admin_access_rights, - get_station_md_change_access_rights, - get_timeseries_md_change_access_rights, - get_data_change_access_rights + get_admin_access_rights, + get_station_md_change_access_rights, + get_timeseries_md_change_access_rights, + get_data_change_access_rights, ) -url = "postgresql://postgres:postgres@postgres:5432/postgres" -_db_conn = create_engine(url,pool_pre_ping=True, pool_size=32, max_overflow=128) -sess = sessionmaker(bind=_db_conn,autoflush=False,autocommit=False) +url = f"postgresql://swt_user:...@zam10131.zam.kfa-juelich.de:5433/toardb_test" +_db_conn = create_engine(url, pool_pre_ping=True, pool_size=32, max_overflow=128) +sess = sessionmaker(bind=_db_conn, autoflush=False, autocommit=False) + def get_test_engine() -> Engine: assert _db_conn is not None @@ -40,25 +41,29 @@ def get_test_db(): async def override_dependency(request: Request): db = next(get_test_db()) - email = request.headers.get('email') + email = request.headers.get("email") db_user = db.query(AuthUser).filter(AuthUser.email == email).first() # status_code will be taken from the AAI (here: faked) status_code = 401 if db_user: # status_code will be taken from the AAI (here: faked) status_code = 200 - access_dict = { "status_code": status_code, - "user_name": "Sabine Schröder", - "user_email": email, - "auth_user_id": db_user.id } + access_dict = { + "status_code": status_code, + "user_name": "Sabine Schröder", + "user_email": email, + "auth_user_id": db_user.id, + } else: # the user needs to be added to the database! # (maybe users already have the credentials (in the AAI), # but they also need a permanent auth_user_id related to the TOAR database) - access_dict = { "status_code": status_code, - "user_name": "Something from AAI", - "user_email": email, - "auth_user_id": -1 } + access_dict = { + "status_code": status_code, + "user_name": "Something from AAI", + "user_email": email, + "auth_user_id": -1, + } return access_dict @@ -75,24 +80,26 @@ def create_test_database(): We use the `sqlalchemy_utils` package here for a few helpers in consistently creating and dropping the database. """ -# if database_exists(url): -# drop_database(url) -# create_database(url) # Create the test database. + # if database_exists(url): + # drop_database(url) + # create_database(url) # Create the test database. #'create_all' does not work (because of undefined 'Geometry')! - #declare PostGIS extension! (and toar_controlled_vocabulary) -# fake_conn = _db_conn.raw_connection() -# fake_cur = fake_conn.cursor() -# fake_cur.execute("CREATE EXTENSION IF NOT EXISTS postgis") -# fake_conn.commit() -# fake_cur.execute("CREATE EXTENSION IF NOT EXISTS toar_controlled_vocabulary") -# fake_conn.commit() -# fake_cur.execute("SET TIMEZONE='UTC'") -# fake_conn.commit() + # declare PostGIS extension! (and toar_controlled_vocabulary) + # fake_conn = _db_conn.raw_connection() + # fake_cur = fake_conn.cursor() + # fake_cur.execute("CREATE EXTENSION IF NOT EXISTS postgis") + # fake_conn.commit() + # fake_cur.execute("CREATE EXTENSION IF NOT EXISTS toar_controlled_vocabulary") + # fake_conn.commit() + # fake_cur.execute("SET TIMEZONE='UTC'") + # fake_conn.commit() Base.metadata.create_all(_db_conn) # Create the tables. - #try with the basics + # try with the basics app.dependency_overrides[get_db] = get_test_db # Mock the Database Dependency - app.dependency_overrides[get_engine] = get_test_engine # Mock the Database Dependency + app.dependency_overrides[get_engine] = get_test_engine # Mock the Database Dependency yield # Run the tests. + + # drop_database(url) # Drop the test database. @@ -100,11 +107,11 @@ def create_test_database(): def test_db_session(): """Returns an sqlalchemy session, and after the test tears down everything properly.""" - session = sessionmaker(bind=_db_conn,autoflush=False,autocommit=False)() + session = sessionmaker(bind=_db_conn, autoflush=False, autocommit=False)() yield session # Drop all data after each test for tbl in reversed(Base.metadata.sorted_tables): - # otherwiese all tables from "toar_controlled_vocabulary" will get lost! + # otherwiese all tables from "toar_controlled_vocabulary" will get lost! if not tbl.name.endswith("_vocabulary"): _db_conn.execute(tbl.delete()) _db_conn.execute("DELETE FROM staging.data;") diff --git a/toardb/timeseries/crud.py b/toardb/timeseries/crud.py index cb732cd8aefa3d6bdca5e1847d47c2732ebd51b0..3449b2d2526915266c0bab0b43bcb88fb46e42cb 100644 --- a/toardb/timeseries/crud.py +++ b/toardb/timeseries/crud.py @@ -34,8 +34,7 @@ import toardb def clean_additional_metadata(ad_met_dict): # all changes are permanent! if not isinstance(ad_met_dict,dict): - tmp = ad_met_dict.replace('\\"','"') - tmp = tmp.replace('"','\\"') + tmp = ad_met_dict.replace('"','\\"') return tmp.replace("'",'"') # there is a mismatch with additional_metadata additional_metadata = ad_met_dict @@ -44,9 +43,8 @@ def clean_additional_metadata(ad_met_dict): for key2, value2 in value.items(): if isinstance(value2,str): additional_metadata[key][key2] = value2.replace("'","$apostroph$") - else: - if isinstance(value,str): - additional_metadata[key] = value.replace("'","$apostroph$") + elif isinstance(value,str): + additional_metadata[key] = value.replace("'","$apostroph$") additional_metadata = str(additional_metadata).replace('"','\\"') additional_metadata = str(additional_metadata).replace("'",'"') additional_metadata = str(additional_metadata).replace("$apostroph$","'") @@ -140,183 +138,181 @@ def get_citation(db: Session, timeseries_id: int, datetime: dt.datetime = None): license_txt = "This data is published under a Creative Commons Attribution 4.0 International (CC BY 4.0). https://creativecommons.org/licenses/by/4.0/" return {"attribution": attribution, "citation": citation, "license": license_txt} -def adapt_db_object(fields, fields1, db_object_immut): - db_object = dict(zip(fields1.split(','),db_object_immut)) - # there is a mismatch with coordinates and additional_metadata - try: - db_object['coordinates'] = get_coordinates_from_string(db_object['coordinates']) - except: - pass - try: - db_object['additional_metadata'] = clean_additional_metadata(db_object['additional_metadata']) - except: - pass - try: - station_id = { "id": db_object['station_id'] } - db_object['station'] = station_id - del db_object['station_id'] - except: - pass - try: - variable_id = { "id": db_object['variable_id'] } - db_object['variable'] = variable_id - del db_object['variable_id'] - except: - pass - if "changelog" in fields: - db_object['changelog'] = get_timeseries_changelog(db, db_object['id']) - if "station_changelog" in fields: - try: - db_object['station_changelog'] = get_stationmeta_changelog(db, db_object['station_id']) - except: +def adapt_db_object(db_object, db, fields=False, lconstr_roles=False): + if fields: + db_object = dict(zip((field for field in fields if field not in {"station_changelog", "changelog"}), db_object)) + + # there is a mismatch with coordinates and additional_metadata + if "coordinates" in db_object: + db_object["coordinates"] = get_coordinates_from_string(db_object["coordinates"]) + + if "additional_metadata" in db_object: + db_object["additional_metadata"] = clean_additional_metadata(db_object["additional_metadata"]) + + if "station_id" in db_object: + station_id = {"id": db_object["station_id"]} + db_object["station"] = station_id + del db_object["station_id"] + + if "variable_id" in db_object: + variable_id = {"id": db_object["variable_id"]} + db_object["variable"] = variable_id + del db_object["variable_id"] + + if "changelog" in db_object: + db_object["changelog"] = get_timeseries_changelog(db, db_object["id"]) + + if "station_changelog" in db_object: + try: + db_object["station_changelog"] = get_stationmeta_changelog(db, db_object["station_id"]) + except Exception: + pass + + if lconstr_roles: + # example, how to put the roles explicitly (not needed at the moment) + # organisation = get_contact(db, contact_id=39) + # roles_atts["contact"] = {"id": 39, "organisation": organisation.__dict__} + roles_atts = {key: value for key, value in db_object.items() if key in roles_params} + db_object = {key: value for key, value in db_object.items() if key not in roles_params} + db_object["roles"] = TimeseriesRoleFields(**roles_atts) + else: + if isinstance(db_object.station.coordinates, (WKBElement, WKTElement)): + db_object.station.coordinates = get_coordinates_from_geom(db_object.station.coordinates) + # there is a mismatch with additional_metadata + if isinstance(db_object.station.additional_metadata, dict): + db_object.station.additional_metadata = json.dumps(db_object.station.additional_metadata) + db_object.additional_metadata = clean_additional_metadata(db_object.additional_metadata) + + + #Internall use + try: + del db_object.data_license_accepted + except AttributeError: pass - return db_object + try: + del db_object.dataset_approved_by_provider + except AttributeError: + pass -def search_all(db, path_params, query_params, lts=False): - endpoint = "timeseries" if lts else "search" - try: + return db_object + +class TimeseriesQuery: + def __init__(self, sign, query, fields, lconstr_roles): + self.sign = sign + self.query = query + self.fields = fields + self.lconstr_roles = lconstr_roles + + @staticmethod + def aggregate(querys): + aggregated_query = next(querys) + for query in querys: + if aggregated_query.fields != query.fields: + raise ValueError("Fields of subquerys are diffrent") + + aggregated_query = TimeseriesQuery( + True, + aggregated_query.query.union(query.query) + if query.sign + else aggregated_query.query.except_(query.query), + aggregated_query.fields, + aggregated_query.lconstr_roles or query.lconstr_roles, + ) + return aggregated_query + + @staticmethod + def from_query_params(query_params, db, endpoint="timeseries", sign=True): limit, offset, fields, format, filters = create_filter(query_params, endpoint) t_filter = filters["t_filter"] t_r_filter = filters["t_r_filter"] s_c_filter = filters["s_c_filter"] s_g_filter = filters["s_g_filter"] - except (KeyError, ValueError) as e: - status_code=400 - return JSONResponse(status_code=status_code, content=str(e)) - lnot_role = (t_r_filter.find("NOT") > 0) - if fields: - # sort input fields to be sure to replace station_changelog before changelog - fields = ",".join(sorted(fields.split(','),reverse=True)) - lconstr_roles = any(field in roles_params for field in fields.split(',')) - if fields.find("role") >= 0: - fields = fields.replace("role,","") - fields = fields.replace(",role","") - fields = fields.replace("role","") - if fields: - fields += ',' - fields += ','.join(roles_params) - lconstr_glob = True - fields1 = fields - fields2 = [] - for field in fields.split(','): - if field == "id": - fields2.append("timeseries.id") - elif field == "order": - fields2.append("timeseries.order") - elif field == "additional_metadata": - fields2.append("timeseries.additional_metadata") - elif field == "station_additional_metadata": - fields2.append("stationmeta_core.additional_metadata") - elif field == "station_id": - fields2.append("stationmeta_core.id") - elif field == "variable_id": - fields2.append("variables.id") - elif field in ("station_changelog", "changelog"): - fields1 = fields1.replace(field,"") - fields1 = fields1.replace(",,",",") - if fields1[-1] == ',': - fields1 = fields1[:-1] - elif field == "name": - fields2.append("stationmeta_core.name") - elif field == "coordinates": - fields2.append("ST_AsText(coordinates)") - elif field == "station_country": - fields2.append("stationmeta_core.country") - else: - fields2.append(field) - # ordering is needed (because of limit/offset-option) - # --> Timeseries.id is added to columns while doing distinct! (see: https://docs.sqlalchemy.org/en/14/changelog/migration_20.html#migration-20-query-distinct) - # (duplicates are being created, which means that limit/offset in the query itself is useless!) - # hot fix to speed up the search - if limit: - total_cnt = offset + limit - db_objects_l = db.query(*map(text,fields2)).select_from(models.Timeseries).filter(text(t_filter)).distinct(). \ - join(StationmetaCore).filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))). \ - join(StationmetaGlobal).filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))). \ - filter(and_((models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), \ - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), \ - (models.TimeseriesRole.contact_id == Contact.id), \ - (Contact.organisation_id == Organisation.id), \ - (Contact.person_id == Person.id), \ - (Variable.id == models.Timeseries.variable_id), \ - text(t_r_filter))). \ - order_by(models.Timeseries.id).all() + + if fields: + # If only certain fields are selected the return type is not a orm object anymore but a dict + # sort input fields to be sure to replace station_changelog before changelog + fields = sorted(fields.split(","), reverse=True) + if "role" in fields: + fields.remove("role") + fields.extend(roles_params) + + field_map = { + "id": models.Timeseries.id, + "order": models.Timeseries.order, + "additional_metadata": models.Timeseries.additional_metadata, + "station_id": StationmetaCore.id, + "variable_id": Variable.id, + "name": StationmetaCore.name, + "coordinates": func.ST_AsText(StationmetaCore.coordinates), + "station_country": StationmetaCore.country, + } + + query_select = [field_map.get(field, text(field)) for field in fields] + else: - db_objects_l = db.query(*map(text,fields2)).select_from(models.Timeseries).filter(text(t_filter)).distinct(). \ - join(StationmetaCore).filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))). \ - join(StationmetaGlobal).filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))). \ - filter(and_((models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), \ - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), \ - (models.TimeseriesRole.contact_id == Contact.id), \ - (Contact.organisation_id == Organisation.id), \ - (Contact.person_id == Person.id), \ - (Variable.id == models.Timeseries.variable_id), \ - text(t_r_filter))). \ - limit(limit).offset(offset).all() - total_cnt = len(db_objects_l) - db_objects = [] - count = 0 - for db_object_immut in db_objects_l: - db_object = adapt_db_object(fields, fields1, db_object_immut) - if lconstr_roles: - roles_atts = {} - db_object_cp = db_object.copy() - for key in db_object: - if key in roles_params: - roles_atts[key] = db_object[key] - del db_object_cp[key] - db_object = db_object_cp -# example, how to put the roles explicitly (not needed at the moment) -# organisation = get_contact(db, contact_id=39) -# roles_atts["contact"] = {"id": 39, "organisation": organisation.__dict__} - db_object["roles"] = TimeseriesRoleFields(**roles_atts) - # since Timeseries.id was added, there might be duplicates in the shrinked record (only requested fields) - # unfortunately, ordering and manually removing duplicates slows down the request very much! - if (not limit) or (count <= total_cnt and db_object not in db_objects): - db_objects.append(db_object) - count += 1 + query_select = [models.Timeseries] + + query = ( + db.query(*query_select) + .select_from(models.Timeseries) + .distinct() + .filter(text(t_filter), text(s_c_filter), text(s_g_filter), text(t_r_filter)) + .join(StationmetaCore) + .join(StationmetaGlobal) + .join(timeseries_timeseries_roles_table) + .join(models.TimeseriesRole) + .join(Contact) + .join(Organisation) + .join(Person) + .join(Variable) + .execution_options(stream_results=True) + .order_by(models.Timeseries.id) + ) + + # Apply NOT filter with role logic + if "NOT" in t_r_filter: + role_ids = get_role_id_from_string(db, query_params.get("has_role")[1:]) + query = query.filter( + ~models.Timeseries.id.in_( + select(timeseries_timeseries_roles_table.c.timeseries_id).where( + timeseries_timeseries_roles_table.c.role_id.in_(role_ids) + ) + ) + ) + if limit: - db_objects = db_objects[offset:offset+limit] - else: - db_objects = db.query(models.Timeseries).filter(text(t_filter)).distinct(). \ - join(StationmetaCore).filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))). \ - join(StationmetaGlobal).filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))). \ - filter(and_((models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), \ - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), \ - (models.TimeseriesRole.contact_id == Contact.id), \ - (Contact.organisation_id == Organisation.id), \ - (Contact.person_id == Person.id), \ - text(t_r_filter))). \ - order_by(models.Timeseries.id). \ - limit(limit).offset(offset).all() - if lnot_role: - role_string = query_params.get("has_role")[1:] - role_ids = get_role_id_from_string(db, role_string) - db_objects_tmp = [] - for db_object in db_objects: - lappend = True - roles = get_timeseries_roles(db,db_object.id) - for role in roles: - if role[1] in role_ids: - lappend = False - if lappend: - db_objects_tmp.append(db_object) - db_objects = db_objects_tmp - - for db_object in db_objects: - # there is also a mismatch with coordinates and additional_metadata from station object - if isinstance(db_object.station.coordinates, (WKBElement, WKTElement)): - db_object.station.coordinates = get_coordinates_from_geom(db_object.station.coordinates) - # there is a mismatch with additional_metadata - if isinstance(db_object.station.additional_metadata, dict): - db_object.station.additional_metadata = json.dumps(db_object.station.additional_metadata) - db_object.additional_metadata = clean_additional_metadata(db_object.additional_metadata) - # only for internal use! - del db_object.data_license_accepted - del db_object.dataset_approved_by_provider - return db_objects + query = query.limit(limit).offset(offset) + + return TimeseriesQuery(sign, query, fields, lconstr_roles=any(field in roles_params for field in fields)) + def adapt_objects(self, db): + return [adapt_db_object(db_object, db, self.fields, self.lconstr_roles) for db_object in self.query] + + +def search_all(db, path_params, query_params, lts=False): + endpoint = "timeseries" if lts else "search" + + try: + return TimeseriesQuery.from_query_params(query_params, db, endpoint).adapt_objects(db) + + except (KeyError, ValueError) as e: + status_code = 400 + return JSONResponse(status_code=status_code, content=str(e)) + + +def search_all_aggreagtion(db, path_params, signs, query_params_list, lts=False): + endpoint = "timeseries" if lts else "search" + + try: + return TimeseriesQuery.aggregate( + TimeseriesQuery.from_query_params(query_params, db, endpoint, sign) + for sign, query_params in zip(signs, query_params_list) + ).adapt_objects(db) + + except (KeyError, ValueError) as e: + status_code = 400 + return JSONResponse(status_code=status_code, content=str(e)) def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_id: int, resource_provider: str = None, @@ -345,7 +341,7 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ .filter(models.Timeseries.variable_id == variable_id).all() # if already not found: return None # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None @@ -357,35 +353,31 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ iter_obj = ret_db_object.copy() counter=0 for db_object in iter_obj: - found = False for role in db_object.roles: # resource provider is always an organisation! organisation = get_contact(db, contact_id=role.contact_id) if ((role_num == role.role) and (organisation.longname == resource_provider)): - found = True - if not found: - ret_db_object.pop(counter) - else: - counter = counter + 1 + ret_db_object.pop(counter) + counter -= 1 + break + counter += 1 else: # time series that do not have a resource_provider are not identical to those who do not! role_num = get_value_from_str(toardb.toardb.RC_vocabulary,'ResourceProvider') iter_obj = ret_db_object.copy() counter=0 for db_object in iter_obj: - found = False for role in db_object.roles: if (role_num == role.role): - found = True - if found: - ret_db_object.pop(counter) - else: - counter = counter + 1 + counter -= 1 + ret_db_object.pop(counter) + break + counter = counter + 1 # if already only none object --> return # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None # filter for criterion 14.4 @@ -396,11 +388,11 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ if not (db_object.sampling_frequency == sampling_frequency): ret_db_object.pop(counter) else: - counter = counter + 1 + counter += 1 # if already only none object --> return # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None # filter for criterion 14.5 @@ -411,11 +403,11 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ if not (db_object.provider_version == provider_version): ret_db_object.pop(counter) else: - counter = counter + 1 + counter += 1 # if already only none object --> return # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None # filter for criterion 14.6 @@ -427,11 +419,11 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ if not (db_object.data_origin_type == data_origin_type_num): ret_db_object.pop(counter) else: - counter = counter + 1 + counter += 1 # if already only none object --> return # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None # filter for criterion 14.7 @@ -443,11 +435,11 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ if not (db_object.data_origin == data_origin_num): ret_db_object.pop(counter) else: - counter = counter + 1 + counter += 1 # if already only none object --> return # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None # filter for criterion 14.8 @@ -458,11 +450,11 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ if not (db_object.sampling_height == sampling_height): ret_db_object.pop(counter) else: - counter = counter + 1 + counter += 1 # if already only none object --> return # if only one single object is found, it has to be checked whether all criterions are fullfilled - if len(ret_db_object) == 0: + if not ret_db_object: return None # filter for criterion 14.9 @@ -473,27 +465,26 @@ def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_ if not (db_object.label == label): ret_db_object.pop(counter) else: - counter = counter + 1 + counter += 1 # check that only one object is left!!! # adapt mismatches for return value - if len(ret_db_object) == 0: + if not ret_db_object: ret_db_object = None + elif len(ret_db_object) == 1: + ret_db_object = ret_db_object[0] + # there is a mismatch with additional_metadata + ret_db_object.additional_metadata = clean_additional_metadata(ret_db_object.additional_metadata) + # there is also a mismatch with coordinates and additional_metadata from station object + if isinstance(ret_db_object.station.coordinates, (WKBElement, WKTElement)): + ret_db_object.station.coordinates = get_coordinates_from_geom(ret_db_object.station.coordinates) + # there is a mismatch with additional_metadata + if isinstance(ret_db_object.station.additional_metadata, dict): + ret_db_object.station.additional_metadata = json.dumps(ret_db_object.station.additional_metadata) else: - if len(ret_db_object) == 1: - ret_db_object = ret_db_object[0] - # there is a mismatch with additional_metadata - ret_db_object.additional_metadata = clean_additional_metadata(ret_db_object.additional_metadata) - # there is also a mismatch with coordinates and additional_metadata from station object - if isinstance(ret_db_object.station.coordinates, (WKBElement, WKTElement)): - ret_db_object.station.coordinates = get_coordinates_from_geom(ret_db_object.station.coordinates) - # there is a mismatch with additional_metadata - if isinstance(ret_db_object.station.additional_metadata, dict): - ret_db_object.station.additional_metadata = json.dumps(ret_db_object.station.additional_metadata) - else: - status_code=405 - message=f"Timeseries not unique, more criteria need to be defined." - return JSONResponse(status_code=status_code, content=message) + status_code=405 + message=f"Timeseries not unique, more criteria need to be defined." + return JSONResponse(status_code=status_code, content=message) return ret_db_object diff --git a/toardb/timeseries/timeseries.py b/toardb/timeseries/timeseries.py index 1a40821059b568e1734c3082cbc36497882e79c8..84630ee4079b426283f1585131489f2b9ff4b442 100644 --- a/toardb/timeseries/timeseries.py +++ b/toardb/timeseries/timeseries.py @@ -5,6 +5,7 @@ Simple test API for timeseries management """ +import re import datetime as dt from typing import List, Union from fastapi import APIRouter, Depends, HTTPException, Body, Request, File, UploadFile @@ -31,19 +32,34 @@ from toardb.utils.utils import ( router = APIRouter() -# CRUD: create, retrieve, update, delete -# 1. retrieve +def get_query_params(raw_query_string): + updated_query_string = raw_query_string.replace('+', '%2B') + updated_params = dict(urllib.parse.parse_qsl(updated_query_string)) + return QueryParams(updated_params) +# CRUD: create, retrieve, update, delete +# 1. retrieve #get all entries of table timeseries @router.get('/search/', response_model=List[schemas.Timeseries] | List[schemas.TimeseriesFields], response_model_exclude_none=True, response_model_exclude_unset=True) def search_all_timeseries(request: Request, db: Session = Depends(get_db)): - raw_query_string = request.url.query - updated_query_string = raw_query_string.replace('+', '%2B') - updated_params = dict(urllib.parse.parse_qsl(updated_query_string)) - updated_query_params = QueryParams(updated_params) + updated_query_params = get_query_params(request.url.query) return crud.search_all(db, path_params=request.path_params, query_params=updated_query_params) +# Does not make sense when only certain fields are selected in diffrent requests needs diffrent syntax +# order of concatinated statmens is also unclear +@router.get('/search/a', response_model=List[schemas.Timeseries] | List[schemas.TimeseriesFields], response_model_exclude_none=True, response_model_exclude_unset=True) +def search_all_timeseries_aggregations(request: Request, db: Session = Depends(get_db)): + urls = re.split(r"(?=[+-]\D)", "+" + request.url.query)[1:] + if urls: + signs = [url.startswith("+") for url in urls] + query_params = [get_query_params(url[1:]) for url in urls] + return crud.search_all_aggreagtion( + db, path_params=request.path_params, signs=signs, query_params_list=query_params + ) + else: + return search_all_timeseries(request, db) + #get all entries of table timeseries @router.get('/timeseries/', response_model=List[schemas.Timeseries], response_model_exclude_none=True) diff --git a/toardb/utils/database.py b/toardb/utils/database.py index 043780b0d10f89feb88b6ca1b719dd6b9f2b6530..2459c094e73f7f3b12d55ab4eede594a4aac952d 100644 --- a/toardb/utils/database.py +++ b/toardb/utils/database.py @@ -8,18 +8,20 @@ Database connection from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker -DATABASE_URL = "postgresql://postgres:postgres@postgres:5432/postgres" +DATABASE_URL = f"postgresql://swt_user:...@zam10131.zam.kfa-juelich.de:5433/toardb_v2_dev" engine = create_engine(DATABASE_URL) ToarDbSession = sessionmaker(autocommit=False, autoflush=False, bind=engine) + # Dependency -def get_engine(): # pragma: no cover +def get_engine(): # pragma: no cover assert engine is not None return engine + # Dependency -def get_db(): # pragma: no cover +def get_db(): # pragma: no cover try: db = ToarDbSession() yield db diff --git a/toardb/utils/utils.py b/toardb/utils/utils.py index b41ab05628822d61102b012bf8e15218d1aae5a6..3b557d86adbccdd159b2271af8a58f1e9d068350 100644 --- a/toardb/utils/utils.py +++ b/toardb/utils/utils.py @@ -171,25 +171,24 @@ def create_filter(query_params, endpoint): raise ValueError(f"Wrong value for limit given: {limit}") # fields and format are no filter options - fields = query_params.get("fields", None) + fields = query_params.get("fields", "") format = query_params.get("format", 'json') - allowed_params = allrel_params.copy() - allowed_params |= profiling_params - if endpoint in {'stationmeta'}: - allowed_params |= gis_params | core_params | global_params - elif endpoint in {'timeseries'}: - allowed_params |= timeseries_params | roles_params - elif endpoint in {'search'}: - allowed_params |= gis_params | core_params | global_params | timeseries_params | roles_params | ambig_params - elif endpoint in {'data'}: - allowed_params |= data_params | profiling_params - elif endpoint in {'variables'}: - allowed_params |= variable_params - elif endpoint in {'persons'}: - allowed_params |= person_params - else: + allowed_params = allrel_params | profiling_params + endpoint_params_map = { + 'stationmeta': gis_params | core_params | global_params, + 'timeseries': timeseries_params | roles_params, + 'search': gis_params | core_params | global_params | timeseries_params | roles_params | ambig_params, + 'data': data_params | profiling_params, + 'variables': variable_params, + 'persons': person_params, + } + try: + allowed_params |= endpoint_params_map[endpoint] + except KeyError: raise ValueError(f"Wrong endpoint given: {endpoint}") + + if fields: for field in fields.split(','): if field not in allowed_params: @@ -347,14 +346,15 @@ def create_filter(query_params, endpoint): d_filter = " AND ".join(d_filter).replace('[','(').replace(']',')') v_filter = " AND ".join(v_filter).replace('[','(').replace(']',')') p_filter = " AND ".join(p_filter).replace('[','(').replace(']',')') - filters = {} - filters["t_filter"] = t_filter - filters["t_r_filter"] = t_r_filter - filters["s_c_filter"] = s_c_filter - filters["s_g_filter"] = s_g_filter - filters["d_filter"] = d_filter - filters["v_filter"] = v_filter - filters["p_filter"] = p_filter + filters = { + "t_filter": t_filter, + "t_r_filter":t_r_filter, + "s_c_filter": s_c_filter, + "s_g_filter": s_g_filter, + "d_filter": d_filter, + "v_filter": v_filter, + "p_filter": p_filter, + } return limit, offset, fields, format, filters