diff --git a/tests/test_search_aggregations.py b/tests/test_search_aggregations.py new file mode 100644 index 0000000000000000000000000000000000000000..152ea9e965d95d05eaa1cbf088b5ea9038b7dbbb --- /dev/null +++ b/tests/test_search_aggregations.py @@ -0,0 +1,484 @@ +# SPDX-FileCopyrightText: 2021 Forschungszentrum Jülich GmbH +# SPDX-License-Identifier: MIT + +import pytest +import json +from sqlalchemy import insert +from toardb.timeseries.models import Timeseries, timeseries_timeseries_roles_table +from toardb.timeseries.models_programme import TimeseriesProgramme +from toardb.timeseries.models_role import TimeseriesRole +from toardb.stationmeta.models import StationmetaCore, StationmetaGlobal, StationmetaChangelog +from toardb.stationmeta.schemas import get_geom_from_coordinates, Coordinates +from toardb.variables.models import Variable +from toardb.contacts.models import Person, Organisation, Contact +from toardb.auth_user.models import AuthUser + +# Required imports 'create_test_database' +from toardb.test_base import ( + client, + get_test_db, + create_test_database, + url, + get_test_engine, + test_db_session as db, +) + + +class TestApps: + def setup(self): + self.application_url = "/timeseries/" + + """Set up all the data before each test + If you want the setup only once (per test module), + the scope argument is not working in the expected way, as discussed here: + https://stackoverflow.com/questions/45817153/py-test-fixture-use-function-fixture-in-scope-fixture + """ + + @pytest.fixture(autouse=True) + def setup_db_data(self, db): + _db_conn = get_test_engine() + # id_seq will not be reset automatically between tests! + fake_conn = _db_conn.raw_connection() + fake_cur = fake_conn.cursor() + fake_cur.execute("ALTER SEQUENCE auth_user_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE variables_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_core_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_global_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_roles_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_annotations_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_aux_doc_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_aux_image_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE stationmeta_aux_url_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE persons_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE organisations_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE contacts_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_annotations_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_roles_id_seq RESTART WITH 3") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_programmes_id_seq RESTART WITH 1") + fake_conn.commit() + fake_cur.execute("ALTER SEQUENCE timeseries_id_seq RESTART WITH 1") + fake_conn.commit() + infilename = "tests/fixtures/auth_user/auth.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_auth_user = AuthUser(**entry) + db.add(new_auth_user) + db.commit() + db.refresh(new_auth_user) + infilename = "tests/fixtures/contacts/persons.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_person = Person(**entry) + db.add(new_person) + db.commit() + db.refresh(new_person) + infilename = "tests/fixtures/contacts/organisations.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_organisation = Organisation(**entry) + db.add(new_organisation) + db.commit() + db.refresh(new_organisation) + infilename = "tests/fixtures/contacts/contacts.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_contact = Contact(**entry) + db.add(new_contact) + db.commit() + db.refresh(new_contact) + infilename = "tests/fixtures/variables/variables.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_variable = Variable(**entry) + db.add(new_variable) + db.commit() + db.refresh(new_variable) + infilename = "tests/fixtures/stationmeta/stationmeta_core.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_stationmeta_core = StationmetaCore(**entry) + # there's a mismatch with coordinates --> how to automatically switch back and forth?! + tmp_coordinates = new_stationmeta_core.coordinates + new_stationmeta_core.coordinates = get_geom_from_coordinates( + Coordinates(**new_stationmeta_core.coordinates) + ) + db.add(new_stationmeta_core) + db.commit() + db.refresh(new_stationmeta_core) + infilename = "tests/fixtures/stationmeta/stationmeta_changelog.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_stationmeta_changelog = StationmetaChangelog(**entry) + db.add(new_stationmeta_changelog) + db.commit() + db.refresh(new_stationmeta_changelog) + infilename = "tests/fixtures/stationmeta/stationmeta_global.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_stationmeta_global = StationmetaGlobal(**entry) + db.add(new_stationmeta_global) + db.commit() + db.refresh(new_stationmeta_global) + infilename = "tests/fixtures/timeseries/timeseries_programmes.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_timeseries_programme = TimeseriesProgramme(**entry) + db.add(new_timeseries_programme) + db.commit() + db.refresh(new_timeseries_programme) + infilename = "tests/fixtures/timeseries/timeseries.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_timeseries = Timeseries(**entry) + db.add(new_timeseries) + db.commit() + db.refresh(new_timeseries) + infilename = "tests/fixtures/timeseries/timeseries_roles.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + new_timeseries_role = TimeseriesRole(**entry) + db.add(new_timeseries_role) + db.commit() + db.refresh(new_timeseries_role) + infilename = "tests/fixtures/timeseries/timeseries_timeseries_roles.json" + with open(infilename) as f: + metajson = json.load(f) + for entry in metajson: + db.execute( + insert(timeseries_timeseries_roles_table).values( + timeseries_id=entry["timeseries_id"], role_id=entry["role_id"] + ) + ) + db.execute("COMMIT") + + def test_search_with_global_attributes(self, client, db): + response = client.get("/search/a?climatic_zone_year2016=WarmTemperateDry&htap_region_tier1_year2010=HTAPTier1SAF") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [{'id': 2, 'label': 'CMA', 'order': 1, + 'sampling_frequency': 'hourly', 'aggregation': 'mean', 'data_origin_type': 'measurement', + 'data_start_date': '2003-09-07T15:30:00+00:00', 'data_end_date': '2016-12-31T14:30:00+00:00', 'coverage': -1.0, + 'data_origin': 'instrument', 'sampling_height': 7.0, + 'provider_version': 'N/A', + 'doi': '', + 'additional_metadata': {'absorption_cross_section': 'Hearn 1961', + 'measurement_method': 'uv_abs', + 'original_units': {'since_19740101000000': 'nmol/mol'}, + 'ebas_metadata_19740101000000_29y': {'Submitter': 'Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA', + 'Data level': '2', + 'Frameworks': 'GAW-WDCRG NOAA-ESRL', + 'Station code': 'XXX', + 'Station name': 'Secret' } }, + 'roles': [{'id': 1, 'role': 'resource provider', 'status': 'active', + 'contact': {'id': 5, 'organisation': {'id': 2, 'name': 'FZJ', 'longname': 'Forschungszentrum Jülich', + 'kind': 'research', 'city': 'Jülich', 'postcode': '52425', 'street_address': 'Wilhelm-Johnen-Straße', + 'country': 'Germany', 'homepage': 'https://www.fz-juelich.de', 'contact_url': 'mailto:toar-data@fz-juelich.de'}}}], + 'variable': {'name': 'o3', 'longname': 'ozone', 'displayname': 'Ozone', + 'cf_standardname': 'mole_fraction_of_ozone_in_air', 'units': 'nmol mol-1', + 'chemical_formula': 'O3', 'id': 5}, + 'station': {'id': 3, 'codes': ['China_test8'], 'name': 'Test_China', + 'coordinates': {'alt': 1534.0, 'lat': 36.256, 'lng': 117.106}, + 'coordinate_validation_status': 'not checked', + 'country': 'China', 'state': 'Shandong Sheng', + 'type': 'unknown', 'type_of_area': 'unknown', + 'timezone': 'Asia/Shanghai', 'additional_metadata': {}, + 'roles': [], 'annotations': [], + 'aux_images': [], 'aux_docs': [], 'aux_urls': [], + 'globalmeta': {'climatic_zone_year2016': '6 (warm temperate dry)', + 'distance_to_major_road_year2020': -999.0, + 'dominant_ecoregion_year2017': '-1 (undefined)', + 'dominant_landcover_year2012': '10 (Cropland, rainfed)', + 'ecoregion_description_25km_year2017': '', + 'htap_region_tier1_year2010': '10 (SAF Sub Saharan/sub Sahel Africa)', + 'landcover_description_25km_year2012': '', + 'max_stable_nightlights_25km_year1992': -999.0, + 'max_stable_nightlights_25km_year2013': -999.0, + 'max_population_density_25km_year1990': -1.0, + 'max_population_density_25km_year2015': -1.0, + 'max_topography_srtm_relative_alt_5km_year1994': -999.0, + 'mean_stable_nightlights_1km_year2013': -999.0, + 'mean_stable_nightlights_5km_year2013': -999.0, + 'mean_nox_emissions_10km_year2000': -999.0, + 'mean_nox_emissions_10km_year2015': -999.0, + 'mean_population_density_250m_year1990': -1.0, + 'mean_population_density_250m_year2015': -1.0, + 'mean_population_density_5km_year1990': -1.0, + 'mean_population_density_5km_year2015': -1.0, + 'mean_topography_srtm_alt_1km_year1994': -999.0, + 'mean_topography_srtm_alt_90m_year1994': -999.0, + 'min_topography_srtm_relative_alt_5km_year1994': -999.0, + 'stddev_topography_srtm_relative_alt_5km_year1994': -999.0, + 'toar1_category': 'unclassified'}, + 'changelog': [{'datetime': '2023-08-15T21:16:20.596545+00:00', + 'description': 'station created', + 'old_value': '', + 'new_value': '', + 'station_id': 3, + 'author_id': 1, + 'type_of_change': 'created' + }]}, + 'programme': {'id': 0, 'name': '', 'longname': '', 'homepage': '', 'description': ''}}] + assert response.json() == expected_resp + + def test_search_with_global_attributes2(self, client, db): + response = client.get("/search/a?climatic_zone_year2016=WarmTemperateDry&htap_region_tier1_year2010=HTAPTier1SAF+dominant_landcover_year2012=CroplandRainfed&dominant_ecoregion_year2017=Undefined") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_origin_type": "measurement", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "coverage": -1.0, + "data_origin": "instrument", + "sampling_height": 7.0, + "provider_version": "N/A", + "doi": "", + "additional_metadata": { + "absorption_cross_section": "Hearn 1961", + "measurement_method": "uv_abs", + "original_units": {"since_19740101000000": "nmol/mol"}, + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, + }, + "roles": [ + { + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, + } + ], + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"alt": 1534.0, "lat": 36.256, "lng": 117.106}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "climatic_zone_year2016": "6 (warm temperate dry)", + "distance_to_major_road_year2020": -999.0, + "dominant_ecoregion_year2017": "-1 (undefined)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "ecoregion_description_25km_year2017": "", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "landcover_description_25km_year2012": "", + "max_stable_nightlights_25km_year1992": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_population_density_25km_year1990": -1.0, + "max_population_density_25km_year2015": -1.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year1990": -1.0, + "mean_population_density_5km_year2015": -1.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "mean_topography_srtm_alt_90m_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + }, + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_origin_type": "measurement", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "coverage": -1.0, + "data_origin": "instrument", + "sampling_height": 7.0, + "provider_version": "N/A", + "doi": "", + "additional_metadata": { + "absorption_cross_section": "Hearn 1961", + "measurement_method": "uv_abs", + "original_units": {"since_19740101000000": "nmol/mol"}, + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, + }, + "roles": [ + { + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, + } + ], + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"alt": 1534.0, "lat": 36.256, "lng": 117.106}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "climatic_zone_year2016": "6 (warm temperate dry)", + "distance_to_major_road_year2020": -999.0, + "dominant_ecoregion_year2017": "-1 (undefined)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "ecoregion_description_25km_year2017": "", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "landcover_description_25km_year2012": "", + "max_stable_nightlights_25km_year1992": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_population_density_25km_year1990": -1.0, + "max_population_density_25km_year2015": -1.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year1990": -1.0, + "mean_population_density_5km_year2015": -1.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "mean_topography_srtm_alt_90m_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + }, + ] + assert response.json() == expected_resp \ No newline at end of file diff --git a/toardb/timeseries/crud.py b/toardb/timeseries/crud.py index e8cda0b04866e69a95db1761b87bff95c26f85f8..b5bc4e5db2edcddf14772570f07b976d57d5b069 100644 --- a/toardb/timeseries/crud.py +++ b/toardb/timeseries/crud.py @@ -174,133 +174,119 @@ def adapt_db_object(fields, fields1, db_object_immut): def search_all(db, path_params, query_params, lts=False): endpoint = "timeseries" if lts else "search" - try: + try: limit, offset, fields, format, filters = create_filter(query_params, endpoint) t_filter = filters["t_filter"] t_r_filter = filters["t_r_filter"] s_c_filter = filters["s_c_filter"] s_g_filter = filters["s_g_filter"] except (KeyError, ValueError) as e: - status_code=400 + status_code = 400 return JSONResponse(status_code=status_code, content=str(e)) - lnot_role = (t_r_filter.find("NOT") > 0) - + lnot_role = t_r_filter.find("NOT") > 0 if fields: # sort input fields to be sure to replace station_changelog before changelog - fields = ",".join(sorted(fields.split(','),reverse=True)) - lconstr_roles = any(field in roles_params for field in fields.split(',')) - if fields.find("role") >= 0: - fields = fields.replace("role,","") - fields = fields.replace(",role","") - fields = fields.replace("role","") - if fields: - fields += ',' - fields += ','.join(roles_params) - lconstr_glob = True - fields1 = fields - fields2 = [] - for field in fields.split(','): - if field == "id": - fields2.append("timeseries.id") - elif field == "order": - fields2.append("timeseries.order") - elif field == "additional_metadata": - fields2.append("timeseries.additional_metadata") - elif field == "station_id": - fields2.append("stationmeta_core.id") - elif field == "variable_id": - fields2.append("variables.id") - elif field in ("station_changelog", "changelog"): - fields1 = fields1.replace(field,"") - fields1 = fields1.replace(",,",",") - if fields1[-1] == ',': - fields1 = fields1[:-1] - elif field == "name": - fields2.append("stationmeta_core.name") - elif field == "coordinates": - fields2.append("ST_AsText(coordinates)") - elif field == "station_country": - fields2.append("stationmeta_core.country") - else: - fields2.append(field) + + fields = sorted(fields.split(","), reverse=True) + + lconstr_roles = any(field in roles_params for field in fields) + + if "role" in fields: + fields.remove("role") + fields += roles_params + # lconstr_glob = True #used to be set here but is unused + + field_map = { + "id": "timeseries.id", + "order": "timeseries.order", + "additional_metadata": "timeseries.additional_metadata", + "station_id": "stationmeta_core.id", + "variable_id": "variables.id", + "name": "stationmeta_core.name", + "coordinates": "ST_AsText(coordinates)", + "station_country": "stationmeta_core.country", + } + + fields1 = ",".join(field for field in fields if field not in ("station_changelog", "changelog")) + fields2 = [field_map.get(field, field) for field in fields] + # ordering is needed (because of limit/offset-option) # --> Timeseries.id is added to columns while doing distinct! (see: https://docs.sqlalchemy.org/en/14/changelog/migration_20.html#migration-20-query-distinct) # (duplicates are being created, which means that limit/offset in the query itself is useless!) # hot fix to speed up the search + query = ( + db.query(*map(text, fields2)) + .select_from(models.Timeseries) + .filter(text(t_filter)) + .distinct() + .join(StationmetaCore) + .filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))) + .join(StationmetaGlobal) + .filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))) + .filter( + and_( + (models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), + (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), + (models.TimeseriesRole.contact_id == Contact.id), + (Contact.organisation_id == Organisation.id), + (Contact.person_id == Person.id), + (Variable.id == models.Timeseries.variable_id), + text(t_r_filter), + ) + ) + .execution_options(stream_results=True) + ) + # is required like this due to test and previous implementation seems weird if limit: - total_cnt = offset + limit - db_objects_l = db.query(*map(text,fields2)).select_from(models.Timeseries).filter(text(t_filter)).distinct(). \ - join(StationmetaCore).filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))). \ - join(StationmetaGlobal).filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))). \ - filter(and_((models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), \ - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), \ - (models.TimeseriesRole.contact_id == Contact.id), \ - (Contact.organisation_id == Organisation.id), \ - (Contact.person_id == Person.id), \ - (Variable.id == models.Timeseries.variable_id), \ - text(t_r_filter))). \ - order_by(models.Timeseries.id).all() + query = query.order_by(models.Timeseries.id) else: - db_objects_l = db.query(*map(text,fields2)).select_from(models.Timeseries).filter(text(t_filter)).distinct(). \ - join(StationmetaCore).filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))). \ - join(StationmetaGlobal).filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))). \ - filter(and_((models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), \ - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), \ - (models.TimeseriesRole.contact_id == Contact.id), \ - (Contact.organisation_id == Organisation.id), \ - (Contact.person_id == Person.id), \ - (Variable.id == models.Timeseries.variable_id), \ - text(t_r_filter))). \ - limit(limit).offset(offset).all() - total_cnt = len(db_objects_l) + query = query.limit(limit).offset(offset) + db_objects = [] - count = 0 - for db_object_immut in db_objects_l: + for db_object_immut in query: db_object = adapt_db_object(fields, fields1, db_object_immut) if lconstr_roles: - roles_atts = {} - db_object_cp = db_object.copy() - for key in db_object: - if key in roles_params: - roles_atts[key] = db_object[key] - del db_object_cp[key] - db_object = db_object_cp -# example, how to put the roles explicitly (not needed at the moment) -# organisation = get_contact(db, contact_id=39) -# roles_atts["contact"] = {"id": 39, "organisation": organisation.__dict__} + roles_atts = {key: value for key, value in db_object.items() if key in roles_params} + # example, how to put the roles explicitly (not needed at the moment) + # organisation = get_contact(db, contact_id=39) + # roles_atts["contact"] = {"id": 39, "organisation": organisation.__dict__} + db_object = {key: value for key, value in db_object.items() if key not in roles_params} db_object["roles"] = TimeseriesRoleFields(**roles_atts) - # since Timeseries.id was added, there might be duplicates in the shrinked record (only requested fields) - # unfortunately, ordering and manually removing duplicates slows down the request very much! - if (not limit) or (count <= total_cnt and db_object not in db_objects): - db_objects.append(db_object) - count += 1 - if limit: - db_objects = db_objects[offset:offset+limit] + db_objects.append(db_object) + else: - db_objects = db.query(models.Timeseries).filter(text(t_filter)).distinct(). \ - join(StationmetaCore).filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))). \ - join(StationmetaGlobal).filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))). \ - filter(and_((models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), \ - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), \ - (models.TimeseriesRole.contact_id == Contact.id), \ - (Contact.organisation_id == Organisation.id), \ - (Contact.person_id == Person.id), \ - text(t_r_filter))). \ - order_by(models.Timeseries.id). \ - limit(limit).offset(offset).all() + db_objects = ( + db.query(models.Timeseries) + .filter(text(t_filter)) + .distinct() + .join(StationmetaCore) + .filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))) + .join(StationmetaGlobal) + .filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))) + .filter( + and_( + (models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), + (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), + (models.TimeseriesRole.contact_id == Contact.id), + (Contact.organisation_id == Organisation.id), + (Contact.person_id == Person.id), + text(t_r_filter), + ) + ) + .order_by(models.Timeseries.id) + .limit(limit) + .offset(offset) + .all() + ) + if lnot_role: role_string = query_params.get("has_role")[1:] role_ids = get_role_id_from_string(db, role_string) - db_objects_tmp = [] - for db_object in db_objects: - lappend = True - roles = get_timeseries_roles(db,db_object.id) - for role in roles: - if role[1] in role_ids: - lappend = False - if lappend: - db_objects_tmp.append(db_object) - db_objects = db_objects_tmp + db_objects = [ + db_object + for db_object in db_objects + if all(role[1] in role_ids for role in get_timeseries_roles(db, db_object.id)) + ] for db_object in db_objects: # there is also a mismatch with coordinates and additional_metadata from station object @@ -311,11 +297,18 @@ def search_all(db, path_params, query_params, lts=False): db_object.station.additional_metadata = json.dumps(db_object.station.additional_metadata) db_object.additional_metadata = clean_additional_metadata(db_object.additional_metadata) # only for internal use! - del db_object.data_license_accepted - del db_object.dataset_approved_by_provider + try: + del db_object.data_license_accepted + del db_object.dataset_approved_by_provider + except AttributeError: + pass + return db_objects +def seearch_all_aggreagtion(db, path_params, query_params, lts=False): + pass + def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_id: int, resource_provider: str = None, sampling_frequency: str = None, provider_version: str = None, data_origin_type: str = None, diff --git a/toardb/timeseries/timeseries.py b/toardb/timeseries/timeseries.py index d5b5e210c48a8a7b84ffc096c7a75e12cd2fb0ba..52799cb286d6f6e8f178e2e0d34eeaba2f943bd9 100644 --- a/toardb/timeseries/timeseries.py +++ b/toardb/timeseries/timeseries.py @@ -5,6 +5,7 @@ Simple test API for timeseries management """ +import re import datetime as dt from typing import List, Union from fastapi import APIRouter, Depends, HTTPException, Body, Request, File, UploadFile @@ -43,6 +44,20 @@ def search_all_timeseries(request: Request, db: Session = Depends(get_db)): updated_query_params = QueryParams(updated_params) return crud.search_all(db, path_params=request.path_params, query_params=updated_query_params) +# Does not work correctly for request connected with a + or - +@router.get('/search/a', response_model=List[schemas.Timeseries] | List[schemas.TimeseriesFields], response_model_exclude_none=True, response_model_exclude_unset=True) +def search_all_timeseries_aggregations(request: Request, db: Session = Depends(get_db)): + urls = re.split(r"[+-](?=\D)",request.url.query) + + results = [] + for raw_query_string in urls: + updated_query_string = raw_query_string.replace('+', '%2B') + updated_params = dict(urllib.parse.parse_qsl(updated_query_string)) + updated_query_params = QueryParams(updated_params) + results.extend(crud.search_all(db, path_params=request.path_params, query_params=updated_query_params)) + return results + + #get all entries of table timeseries @router.get('/timeseries/', response_model=List[schemas.Timeseries], response_model_exclude_none=True)