diff --git a/tests/test_search_aggregations.py b/tests/test_search_aggregations.py index 152ea9e965d95d05eaa1cbf088b5ea9038b7dbbb..6eb328795a8b48aee5c9d19641621bd656b0c576 100644 --- a/tests/test_search_aggregations.py +++ b/tests/test_search_aggregations.py @@ -176,309 +176,581 @@ class TestApps: ) db.execute("COMMIT") - def test_search_with_global_attributes(self, client, db): - response = client.get("/search/a?climatic_zone_year2016=WarmTemperateDry&htap_region_tier1_year2010=HTAPTier1SAF") + def test_search_single(self, client, db): + response = client.get("/search/a?id=2") expected_status_code = 200 assert response.status_code == expected_status_code - expected_resp = [{'id': 2, 'label': 'CMA', 'order': 1, - 'sampling_frequency': 'hourly', 'aggregation': 'mean', 'data_origin_type': 'measurement', - 'data_start_date': '2003-09-07T15:30:00+00:00', 'data_end_date': '2016-12-31T14:30:00+00:00', 'coverage': -1.0, - 'data_origin': 'instrument', 'sampling_height': 7.0, - 'provider_version': 'N/A', - 'doi': '', - 'additional_metadata': {'absorption_cross_section': 'Hearn 1961', - 'measurement_method': 'uv_abs', - 'original_units': {'since_19740101000000': 'nmol/mol'}, - 'ebas_metadata_19740101000000_29y': {'Submitter': 'Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA', - 'Data level': '2', - 'Frameworks': 'GAW-WDCRG NOAA-ESRL', - 'Station code': 'XXX', - 'Station name': 'Secret' } }, - 'roles': [{'id': 1, 'role': 'resource provider', 'status': 'active', - 'contact': {'id': 5, 'organisation': {'id': 2, 'name': 'FZJ', 'longname': 'Forschungszentrum Jülich', - 'kind': 'research', 'city': 'Jülich', 'postcode': '52425', 'street_address': 'Wilhelm-Johnen-Straße', - 'country': 'Germany', 'homepage': 'https://www.fz-juelich.de', 'contact_url': 'mailto:toar-data@fz-juelich.de'}}}], - 'variable': {'name': 'o3', 'longname': 'ozone', 'displayname': 'Ozone', - 'cf_standardname': 'mole_fraction_of_ozone_in_air', 'units': 'nmol mol-1', - 'chemical_formula': 'O3', 'id': 5}, - 'station': {'id': 3, 'codes': ['China_test8'], 'name': 'Test_China', - 'coordinates': {'alt': 1534.0, 'lat': 36.256, 'lng': 117.106}, - 'coordinate_validation_status': 'not checked', - 'country': 'China', 'state': 'Shandong Sheng', - 'type': 'unknown', 'type_of_area': 'unknown', - 'timezone': 'Asia/Shanghai', 'additional_metadata': {}, - 'roles': [], 'annotations': [], - 'aux_images': [], 'aux_docs': [], 'aux_urls': [], - 'globalmeta': {'climatic_zone_year2016': '6 (warm temperate dry)', - 'distance_to_major_road_year2020': -999.0, - 'dominant_ecoregion_year2017': '-1 (undefined)', - 'dominant_landcover_year2012': '10 (Cropland, rainfed)', - 'ecoregion_description_25km_year2017': '', - 'htap_region_tier1_year2010': '10 (SAF Sub Saharan/sub Sahel Africa)', - 'landcover_description_25km_year2012': '', - 'max_stable_nightlights_25km_year1992': -999.0, - 'max_stable_nightlights_25km_year2013': -999.0, - 'max_population_density_25km_year1990': -1.0, - 'max_population_density_25km_year2015': -1.0, - 'max_topography_srtm_relative_alt_5km_year1994': -999.0, - 'mean_stable_nightlights_1km_year2013': -999.0, - 'mean_stable_nightlights_5km_year2013': -999.0, - 'mean_nox_emissions_10km_year2000': -999.0, - 'mean_nox_emissions_10km_year2015': -999.0, - 'mean_population_density_250m_year1990': -1.0, - 'mean_population_density_250m_year2015': -1.0, - 'mean_population_density_5km_year1990': -1.0, - 'mean_population_density_5km_year2015': -1.0, - 'mean_topography_srtm_alt_1km_year1994': -999.0, - 'mean_topography_srtm_alt_90m_year1994': -999.0, - 'min_topography_srtm_relative_alt_5km_year1994': -999.0, - 'stddev_topography_srtm_relative_alt_5km_year1994': -999.0, - 'toar1_category': 'unclassified'}, - 'changelog': [{'datetime': '2023-08-15T21:16:20.596545+00:00', - 'description': 'station created', - 'old_value': '', - 'new_value': '', - 'station_id': 3, - 'author_id': 1, - 'type_of_change': 'created' - }]}, - 'programme': {'id': 0, 'name': '', 'longname': '', 'homepage': '', 'description': ''}}] + expected_resp = [ + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_origin_type": "measurement", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "coverage": -1.0, + "data_origin": "instrument", + "sampling_height": 7.0, + "provider_version": "N/A", + "doi": "", + "additional_metadata": { + "absorption_cross_section": "Hearn 1961", + "measurement_method": "uv_abs", + "original_units": {"since_19740101000000": "nmol/mol"}, + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, + }, + "roles": [ + { + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, + } + ], + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"alt": 1534.0, "lat": 36.256, "lng": 117.106}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "climatic_zone_year2016": "6 (warm temperate dry)", + "distance_to_major_road_year2020": -999.0, + "dominant_ecoregion_year2017": "-1 (undefined)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "ecoregion_description_25km_year2017": "", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "landcover_description_25km_year2012": "", + "max_stable_nightlights_25km_year1992": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_population_density_25km_year1990": -1.0, + "max_population_density_25km_year2015": -1.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year1990": -1.0, + "mean_population_density_5km_year2015": -1.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "mean_topography_srtm_alt_90m_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + } + ] assert response.json() == expected_resp - def test_search_with_global_attributes2(self, client, db): - response = client.get("/search/a?climatic_zone_year2016=WarmTemperateDry&htap_region_tier1_year2010=HTAPTier1SAF+dominant_landcover_year2012=CroplandRainfed&dominant_ecoregion_year2017=Undefined") + def test_search_plus(self, client, db): + response = client.get("/search/a?id=1+id=2") expected_status_code = 200 assert response.status_code == expected_status_code expected_resp = [ - { - "id": 2, - "label": "CMA", - "order": 1, - "sampling_frequency": "hourly", - "aggregation": "mean", - "data_origin_type": "measurement", - "data_start_date": "2003-09-07T15:30:00+00:00", - "data_end_date": "2016-12-31T14:30:00+00:00", - "coverage": -1.0, - "data_origin": "instrument", - "sampling_height": 7.0, - "provider_version": "N/A", - "doi": "", - "additional_metadata": { - "absorption_cross_section": "Hearn 1961", - "measurement_method": "uv_abs", - "original_units": {"since_19740101000000": "nmol/mol"}, - "ebas_metadata_19740101000000_29y": { - "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", - "Data level": "2", - "Frameworks": "GAW-WDCRG NOAA-ESRL", - "Station code": "XXX", - "Station name": "Secret", + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": {}, + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], }, - }, - "roles": [ - { - "id": 1, - "role": "resource provider", - "status": "active", - "contact": { - "id": 5, - "organisation": { - "id": 2, - "name": "FZJ", - "longname": "Forschungszentrum Jülich", - "kind": "research", - "city": "Jülich", - "postcode": "52425", - "street_address": "Wilhelm-Johnen-Straße", - "country": "Germany", - "homepage": "https://www.fz-juelich.de", - "contact_url": "mailto:toar-data@fz-juelich.de", + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, }, - }, - } - ], - "variable": { - "name": "o3", - "longname": "ozone", - "displayname": "Ozone", - "cf_standardname": "mole_fraction_of_ozone_in_air", - "units": "nmol mol-1", - "chemical_formula": "O3", - "id": 5, + } + ], }, - "station": { - "id": 3, - "codes": ["China_test8"], - "name": "Test_China", - "coordinates": {"alt": 1534.0, "lat": 36.256, "lng": 117.106}, - "coordinate_validation_status": "not checked", - "country": "China", - "state": "Shandong Sheng", - "type": "unknown", - "type_of_area": "unknown", - "timezone": "Asia/Shanghai", - "additional_metadata": {}, - "roles": [], - "annotations": [], - "aux_images": [], - "aux_docs": [], - "aux_urls": [], - "globalmeta": { - "climatic_zone_year2016": "6 (warm temperate dry)", - "distance_to_major_road_year2020": -999.0, - "dominant_ecoregion_year2017": "-1 (undefined)", - "dominant_landcover_year2012": "10 (Cropland, rainfed)", - "ecoregion_description_25km_year2017": "", - "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", - "landcover_description_25km_year2012": "", - "max_stable_nightlights_25km_year1992": -999.0, - "max_stable_nightlights_25km_year2013": -999.0, - "max_population_density_25km_year1990": -1.0, - "max_population_density_25km_year2015": -1.0, - "max_topography_srtm_relative_alt_5km_year1994": -999.0, - "mean_stable_nightlights_1km_year2013": -999.0, - "mean_stable_nightlights_5km_year2013": -999.0, - "mean_nox_emissions_10km_year2000": -999.0, - "mean_nox_emissions_10km_year2015": -999.0, - "mean_population_density_250m_year1990": -1.0, - "mean_population_density_250m_year2015": -1.0, - "mean_population_density_5km_year1990": -1.0, - "mean_population_density_5km_year2015": -1.0, - "mean_topography_srtm_alt_1km_year1994": -999.0, - "mean_topography_srtm_alt_90m_year1994": -999.0, - "min_topography_srtm_relative_alt_5km_year1994": -999.0, - "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, - "toar1_category": "unclassified", + { + "id": 2, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": { + "original_units": {"since_19740101000000": "nmol/mol"}, + "measurement_method": "uv_abs", + "absorption_cross_section": "Hearn 1961", + "ebas_metadata_19740101000000_29y": { + "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", + "Data level": "2", + "Frameworks": "GAW-WDCRG NOAA-ESRL", + "Station code": "XXX", + "Station name": "Secret", + }, }, - "changelog": [ + "doi": "", + "coverage": -1.0, + "station": { + "id": 3, + "codes": ["China_test8"], + "name": "Test_China", + "coordinates": {"lat": 36.256, "lng": 117.106, "alt": 1534.0}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Shandong Sheng", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", + "dominant_landcover_year2012": "10 (Cropland, rainfed)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-08-15T21:16:20.596545+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 3, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "o3", + "longname": "ozone", + "displayname": "Ozone", + "cf_standardname": "mole_fraction_of_ozone_in_air", + "units": "nmol mol-1", + "chemical_formula": "O3", + "id": 5, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ { - "datetime": "2023-08-15T21:16:20.596545+00:00", - "description": "station created", - "old_value": "", - "new_value": "", - "station_id": 3, - "author_id": 1, - "type_of_change": "created", + "id": 1, + "role": "resource provider", + "status": "active", + "contact": { + "id": 5, + "organisation": { + "id": 2, + "name": "FZJ", + "longname": "Forschungszentrum Jülich", + "kind": "research", + "city": "Jülich", + "postcode": "52425", + "street_address": "Wilhelm-Johnen-Straße", + "country": "Germany", + "homepage": "https://www.fz-juelich.de", + "contact_url": "mailto:toar-data@fz-juelich.de", + }, + }, } ], }, - "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, - }, - { - "id": 2, - "label": "CMA", - "order": 1, - "sampling_frequency": "hourly", - "aggregation": "mean", - "data_origin_type": "measurement", - "data_start_date": "2003-09-07T15:30:00+00:00", - "data_end_date": "2016-12-31T14:30:00+00:00", - "coverage": -1.0, - "data_origin": "instrument", - "sampling_height": 7.0, - "provider_version": "N/A", - "doi": "", - "additional_metadata": { - "absorption_cross_section": "Hearn 1961", - "measurement_method": "uv_abs", - "original_units": {"since_19740101000000": "nmol/mol"}, - "ebas_metadata_19740101000000_29y": { - "Submitter": "Unknown, Lady, lady.unknown@unknown.com, some long division name, SHORT, , 111 Streetname, , zipcode, Boulder, CO, USA", - "Data level": "2", - "Frameworks": "GAW-WDCRG NOAA-ESRL", - "Station code": "XXX", - "Station name": "Secret", + ] + assert response.json() == expected_resp + + def test_search_minus(self, client, db): + response = client.get("/search/a?id=2-id=2") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [] + assert response.json() == expected_resp + + def test_search_distinct(self, client, db): + response = client.get("/search/a?id=1+id=1+id=1+id=1") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, + "additional_metadata": {}, + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], }, - }, - "roles": [ - { - "id": 1, - "role": "resource provider", - "status": "active", - "contact": { - "id": 5, - "organisation": { - "id": 2, - "name": "FZJ", - "longname": "Forschungszentrum Jülich", - "kind": "research", - "city": "Jülich", - "postcode": "52425", - "street_address": "Wilhelm-Johnen-Straße", - "country": "Germany", - "homepage": "https://www.fz-juelich.de", - "contact_url": "mailto:toar-data@fz-juelich.de", + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, + }, + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ + { + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, }, - }, - } - ], - "variable": { - "name": "o3", - "longname": "ozone", - "displayname": "Ozone", - "cf_standardname": "mole_fraction_of_ozone_in_air", - "units": "nmol mol-1", - "chemical_formula": "O3", - "id": 5, - }, - "station": { - "id": 3, - "codes": ["China_test8"], - "name": "Test_China", - "coordinates": {"alt": 1534.0, "lat": 36.256, "lng": 117.106}, - "coordinate_validation_status": "not checked", - "country": "China", - "state": "Shandong Sheng", - "type": "unknown", - "type_of_area": "unknown", - "timezone": "Asia/Shanghai", + } + ], + }] + assert response.json() == expected_resp + + def test_search_complex(self, client, db): + response = client.get("/search/a?id=1+id=2-id=2") + expected_status_code = 200 + assert response.status_code == expected_status_code + expected_resp = [ + { + "id": 1, + "label": "CMA", + "order": 1, + "sampling_frequency": "hourly", + "aggregation": "mean", + "data_start_date": "2003-09-07T15:30:00+00:00", + "data_end_date": "2016-12-31T14:30:00+00:00", + "data_origin": "instrument", + "data_origin_type": "measurement", + "provider_version": "N/A", + "sampling_height": 7.0, "additional_metadata": {}, - "roles": [], - "annotations": [], - "aux_images": [], - "aux_docs": [], - "aux_urls": [], - "globalmeta": { - "climatic_zone_year2016": "6 (warm temperate dry)", - "distance_to_major_road_year2020": -999.0, - "dominant_ecoregion_year2017": "-1 (undefined)", - "dominant_landcover_year2012": "10 (Cropland, rainfed)", - "ecoregion_description_25km_year2017": "", - "htap_region_tier1_year2010": "10 (SAF Sub Saharan/sub Sahel Africa)", - "landcover_description_25km_year2012": "", - "max_stable_nightlights_25km_year1992": -999.0, - "max_stable_nightlights_25km_year2013": -999.0, - "max_population_density_25km_year1990": -1.0, - "max_population_density_25km_year2015": -1.0, - "max_topography_srtm_relative_alt_5km_year1994": -999.0, - "mean_stable_nightlights_1km_year2013": -999.0, - "mean_stable_nightlights_5km_year2013": -999.0, - "mean_nox_emissions_10km_year2000": -999.0, - "mean_nox_emissions_10km_year2015": -999.0, - "mean_population_density_250m_year1990": -1.0, - "mean_population_density_250m_year2015": -1.0, - "mean_population_density_5km_year1990": -1.0, - "mean_population_density_5km_year2015": -1.0, - "mean_topography_srtm_alt_1km_year1994": -999.0, - "mean_topography_srtm_alt_90m_year1994": -999.0, - "min_topography_srtm_relative_alt_5km_year1994": -999.0, - "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, - "toar1_category": "unclassified", + "doi": "", + "coverage": -1.0, + "station": { + "id": 2, + "codes": ["SDZ54421"], + "name": "Shangdianzi", + "coordinates": {"lat": 40.65, "lng": 117.106, "alt": 293.9}, + "coordinate_validation_status": "not checked", + "country": "China", + "state": "Beijing Shi", + "type": "unknown", + "type_of_area": "unknown", + "timezone": "Asia/Shanghai", + "additional_metadata": {"dummy_info": "Here is some more information about the station"}, + "roles": [], + "annotations": [], + "aux_images": [], + "aux_docs": [], + "aux_urls": [], + "globalmeta": { + "mean_topography_srtm_alt_90m_year1994": -999.0, + "mean_topography_srtm_alt_1km_year1994": -999.0, + "max_topography_srtm_relative_alt_5km_year1994": -999.0, + "min_topography_srtm_relative_alt_5km_year1994": -999.0, + "stddev_topography_srtm_relative_alt_5km_year1994": -999.0, + "climatic_zone_year2016": "6 (warm temperate dry)", + "htap_region_tier1_year2010": "11 (MDE Middle East: S. Arabia, Oman, etc, Iran, Iraq)", + "dominant_landcover_year2012": "11 (Cropland, rainfed, herbaceous cover)", + "landcover_description_25km_year2012": "", + "dominant_ecoregion_year2017": "-1 (undefined)", + "ecoregion_description_25km_year2017": "", + "distance_to_major_road_year2020": -999.0, + "mean_stable_nightlights_1km_year2013": -999.0, + "mean_stable_nightlights_5km_year2013": -999.0, + "max_stable_nightlights_25km_year2013": -999.0, + "max_stable_nightlights_25km_year1992": -999.0, + "mean_population_density_250m_year2015": -1.0, + "mean_population_density_5km_year2015": -1.0, + "max_population_density_25km_year2015": -1.0, + "mean_population_density_250m_year1990": -1.0, + "mean_population_density_5km_year1990": -1.0, + "max_population_density_25km_year1990": -1.0, + "mean_nox_emissions_10km_year2015": -999.0, + "mean_nox_emissions_10km_year2000": -999.0, + "toar1_category": "unclassified", + }, + "changelog": [ + { + "datetime": "2023-07-15T19:27:09.463245+00:00", + "description": "station created", + "old_value": "", + "new_value": "", + "station_id": 2, + "author_id": 1, + "type_of_change": "created", + } + ], + }, + "variable": { + "name": "toluene", + "longname": "toluene", + "displayname": "Toluene", + "cf_standardname": "mole_fraction_of_toluene_in_air", + "units": "nmol mol-1", + "chemical_formula": "C7H8", + "id": 7, }, - "changelog": [ + "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, + "roles": [ { - "datetime": "2023-08-15T21:16:20.596545+00:00", - "description": "station created", - "old_value": "", - "new_value": "", - "station_id": 3, - "author_id": 1, - "type_of_change": "created", + "id": 2, + "role": "resource provider", + "status": "active", + "contact": { + "id": 4, + "organisation": { + "id": 1, + "name": "UBA", + "longname": "Umweltbundesamt", + "kind": "government", + "city": "Dessau-Roßlau", + "postcode": "06844", + "street_address": "Wörlitzer Platz 1", + "country": "Germany", + "homepage": "https://www.umweltbundesamt.de", + "contact_url": "mailto:immission@uba.de", + }, + }, } ], - }, - "programme": {"id": 0, "name": "", "longname": "", "homepage": "", "description": ""}, - }, - ] + }] assert response.json() == expected_resp \ No newline at end of file diff --git a/toardb/timeseries/crud.py b/toardb/timeseries/crud.py index b5bc4e5db2edcddf14772570f07b976d57d5b069..906259861ebf56ebd1f28c166f849080fac3a697 100644 --- a/toardb/timeseries/crud.py +++ b/toardb/timeseries/crud.py @@ -139,37 +139,109 @@ def get_citation(db: Session, timeseries_id: int, datetime: dt.datetime = None): license_txt = "This data is published under a Creative Commons Attribution 4.0 International (CC BY 4.0). https://creativecommons.org/licenses/by/4.0/" return {"attribution": attribution, "citation": citation, "license": license_txt} -def adapt_db_object(fields, fields1, db_object_immut): - db_object = dict(zip(fields1.split(','),db_object_immut)) - # there is a mismatch with coordinates and additional_metadata - try: - db_object['coordinates'] = get_coordinates_from_string(db_object['coordinates']) - except: - pass - try: - db_object['additional_metadata'] = clean_additional_metadata(db_object['additional_metadata']) - except: - pass - try: - station_id = { "id": db_object['station_id'] } - db_object['station'] = station_id - del db_object['station_id'] - except: - pass - try: - variable_id = { "id": db_object['variable_id'] } - db_object['variable'] = variable_id - del db_object['variable_id'] - except: - pass - if "changelog" in fields: - db_object['changelog'] = get_timeseries_changelog(db, db_object['id']) - if "station_changelog" in fields: - try: - db_object['station_changelog'] = get_stationmeta_changelog(db, db_object['station_id']) - except: +def adapt_db_object(db_object_immut, fields_str = "", lconstr_roles = False): + if fields_str: + db_object = dict(zip(fields_str.split(","), db_object_immut)) + # there is a mismatch with coordinates and additional_metadata + if "coordinates" in db_object: + db_object["coordinates"] = get_coordinates_from_string(db_object["coordinates"]) + + if "additional_metadata" in db_object: + db_object["additional_metadata"] = clean_additional_metadata(db_object["additional_metadata"]) + + if "station" in db_object: + station_id = {"id": db_object["station_id"]} + db_object["station"] = station_id + del db_object["station_id"] + + if "variable_id" in db_object: + variable_id = {"id": db_object["variable_id"]} + db_object["variable"] = variable_id + del db_object["variable_id"] + + if "changelog" in db_object: + db_object["changelog"] = get_timeseries_changelog(db, db_object["id"]) + + if "station_changelog" in db_object: + db_object["station_changelog"] = get_stationmeta_changelog(db, db_object["station_id"]) + + if lconstr_roles: + # example, how to put the roles explicitly (not needed at the moment) + # organisation = get_contact(db, contact_id=39) + # roles_atts["contact"] = {"id": 39, "organisation": organisation.__dict__} + roles_atts = {key: value for key, value in db_object.items() if key in roles_params} + db_object = {key: value for key, value in db_object.items() if key not in roles_params} + db_object["roles"] = TimeseriesRoleFields(**roles_atts) + + return db_object + + else: + if isinstance(db_object_immut.station.coordinates, (WKBElement, WKTElement)): + db_object_immut.station.coordinates = get_coordinates_from_geom(db_object_immut.station.coordinates) + # there is a mismatch with additional_metadata + if isinstance(db_object_immut.station.additional_metadata, dict): + db_object_immut.station.additional_metadata = json.dumps(db_object_immut.station.additional_metadata) + db_object_immut.additional_metadata = clean_additional_metadata(db_object_immut.additional_metadata) + + + #Internall use + try: + del db_object_immut.data_license_accepted + except AttributeError: pass - return db_object + + try: + del db_object_immut.dataset_approved_by_provider + except AttributeError: + pass + + return db_object_immut + +class TimeseriesQuery: + def __init__(self, sign, query): + self.sign = sign + self.query = query + + def __add__(self, other): + return TimeseriesQuery(True, self.query.union_all(other.query) if other.sign else self.query.except_all(other.query)) + + def from_query_params(query_params, db, endpoint = False, sign = True): + try: + limit, offset, fields, format, filters = create_filter(query_params, endpoint) + t_filter = filters["t_filter"] + t_r_filter = filters["t_r_filter"] + s_c_filter = filters["s_c_filter"] + s_g_filter = filters["s_g_filter"] + except (KeyError, ValueError) as e: + status_code = 400 + return JSONResponse(status_code=status_code, content=str(e)) + + query = ( + db.query(models.Timeseries) + .filter(text(t_filter)) + .distinct() + .join(StationmetaCore) + .filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))) + .join(StationmetaGlobal) + .filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))) + .filter( + and_( + (models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), + (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), + (models.TimeseriesRole.contact_id == Contact.id), + (Contact.organisation_id == Organisation.id), + (Contact.person_id == Person.id), + (Variable.id == models.Timeseries.variable_id), + text(t_r_filter), + ) + ) + .execution_options(stream_results=True) + ) + + if limit: + query = query.limit(limit).offset(offset) + + return TimeseriesQuery(sign, query) def search_all(db, path_params, query_params, lts=False): @@ -183,7 +255,7 @@ def search_all(db, path_params, query_params, lts=False): except (KeyError, ValueError) as e: status_code = 400 return JSONResponse(status_code=status_code, content=str(e)) - lnot_role = t_r_filter.find("NOT") > 0 + if fields: # sort input fields to be sure to replace station_changelog before changelog @@ -207,107 +279,70 @@ def search_all(db, path_params, query_params, lts=False): "station_country": "stationmeta_core.country", } - fields1 = ",".join(field for field in fields if field not in ("station_changelog", "changelog")) - fields2 = [field_map.get(field, field) for field in fields] + fields_str = ",".join(field for field in fields if field not in ("station_changelog", "changelog")) + fields_lst = [text(field_map.get(field, field)) for field in fields] + + query = db.query(*fields_lst) # ordering is needed (because of limit/offset-option) # --> Timeseries.id is added to columns while doing distinct! (see: https://docs.sqlalchemy.org/en/14/changelog/migration_20.html#migration-20-query-distinct) # (duplicates are being created, which means that limit/offset in the query itself is useless!) # hot fix to speed up the search - query = ( - db.query(*map(text, fields2)) - .select_from(models.Timeseries) - .filter(text(t_filter)) - .distinct() - .join(StationmetaCore) - .filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))) - .join(StationmetaGlobal) - .filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))) - .filter( - and_( - (models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), - (models.TimeseriesRole.contact_id == Contact.id), - (Contact.organisation_id == Organisation.id), - (Contact.person_id == Person.id), - (Variable.id == models.Timeseries.variable_id), - text(t_r_filter), - ) + else: + lconstr_roles = False + fields_str = "" + query = db.query(models.Timeseries) + + query = ( + query.select_from(models.Timeseries) + .filter(text(t_filter)) + .distinct() + .join(StationmetaCore) + .filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))) + .join(StationmetaGlobal) + .filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))) + .filter( + and_( + (models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), + (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), + (models.TimeseriesRole.contact_id == Contact.id), + (Contact.organisation_id == Organisation.id), + (Contact.person_id == Person.id), + (Variable.id == models.Timeseries.variable_id), + text(t_r_filter), ) - .execution_options(stream_results=True) ) - # is required like this due to test and previous implementation seems weird - if limit: - query = query.order_by(models.Timeseries.id) - else: - query = query.limit(limit).offset(offset) + .execution_options(stream_results=True) + .order_by(models.Timeseries.id) + ) - db_objects = [] - for db_object_immut in query: - db_object = adapt_db_object(fields, fields1, db_object_immut) - if lconstr_roles: - roles_atts = {key: value for key, value in db_object.items() if key in roles_params} - # example, how to put the roles explicitly (not needed at the moment) - # organisation = get_contact(db, contact_id=39) - # roles_atts["contact"] = {"id": 39, "organisation": organisation.__dict__} - db_object = {key: value for key, value in db_object.items() if key not in roles_params} - db_object["roles"] = TimeseriesRoleFields(**roles_atts) - db_objects.append(db_object) + # is required like this due to test and previous implementation seems weird + # order by defeats purpose of limit limited data retrieval + if limit: + query = query.limit(limit).offset(offset) - else: - db_objects = ( - db.query(models.Timeseries) - .filter(text(t_filter)) - .distinct() - .join(StationmetaCore) - .filter(and_(models.Timeseries.station_id == StationmetaCore.id, text(s_c_filter))) - .join(StationmetaGlobal) - .filter(and_(StationmetaCore.id == StationmetaGlobal.station_id, text(s_g_filter))) - .filter( - and_( - (models.Timeseries.id == timeseries_timeseries_roles_table.c.timeseries_id), - (models.timeseries_timeseries_roles_table.c.role_id == models.TimeseriesRole.id), - (models.TimeseriesRole.contact_id == Contact.id), - (Contact.organisation_id == Organisation.id), - (Contact.person_id == Person.id), - text(t_r_filter), - ) - ) - .order_by(models.Timeseries.id) - .limit(limit) - .offset(offset) - .all() - ) - if lnot_role: - role_string = query_params.get("has_role")[1:] - role_ids = get_role_id_from_string(db, role_string) - db_objects = [ - db_object - for db_object in db_objects - if all(role[1] in role_ids for role in get_timeseries_roles(db, db_object.id)) - ] + not_in_tr = "NOT" in t_r_filter + if not_in_tr: + role_string = query_params.get("has_role")[1:] + role_ids = get_role_id_from_string(db, role_string) - for db_object in db_objects: - # there is also a mismatch with coordinates and additional_metadata from station object - if isinstance(db_object.station.coordinates, (WKBElement, WKTElement)): - db_object.station.coordinates = get_coordinates_from_geom(db_object.station.coordinates) - # there is a mismatch with additional_metadata - if isinstance(db_object.station.additional_metadata, dict): - db_object.station.additional_metadata = json.dumps(db_object.station.additional_metadata) - db_object.additional_metadata = clean_additional_metadata(db_object.additional_metadata) - # only for internal use! - try: - del db_object.data_license_accepted - del db_object.dataset_approved_by_provider - except AttributeError: - pass + return [ + adapt_db_object(db_object_immut, fields_str, lconstr_roles) + for db_object_immut in query.all() + if not not_in_tr or not all(role[1] in role_ids for role in get_timeseries_roles(db, db_object_immut.id)) + ] - return db_objects +def search_all_aggreagtion(db, path_params, signs, query_params_list, lts=False): + endpoint = "timeseries" if lts else "search" + + queries = [ + TimeseriesQuery.from_query_params(query_params, db, endpoint, sign) + for sign, query_params in zip(signs, query_params_list) + ] -def seearch_all_aggreagtion(db, path_params, query_params, lts=False): - pass + return [adapt_db_object(db_object_immut) for db_object_immut in sum(queries[1:], queries[0]).query] def get_timeseries_by_unique_constraints(db: Session, station_id: int, variable_id: int, resource_provider: str = None, diff --git a/toardb/timeseries/timeseries.py b/toardb/timeseries/timeseries.py index 52799cb286d6f6e8f178e2e0d34eeaba2f943bd9..445cefd0ba24427f28169ec0ca7339d278a2cd99 100644 --- a/toardb/timeseries/timeseries.py +++ b/toardb/timeseries/timeseries.py @@ -31,31 +31,29 @@ from toardb.utils.utils import ( router = APIRouter() -# CRUD: create, retrieve, update, delete -# 1. retrieve +def get_query_params(raw_query_string): + updated_query_string = raw_query_string.replace('+', '%2B') + updated_params = dict(urllib.parse.parse_qsl(updated_query_string)) + return QueryParams(updated_params) +# CRUD: create, retrieve, update, delete +# 1. retrieve #get all entries of table timeseries @router.get('/search/', response_model=List[schemas.Timeseries] | List[schemas.TimeseriesFields], response_model_exclude_none=True, response_model_exclude_unset=True) def search_all_timeseries(request: Request, db: Session = Depends(get_db)): - raw_query_string = request.url.query - updated_query_string = raw_query_string.replace('+', '%2B') - updated_params = dict(urllib.parse.parse_qsl(updated_query_string)) - updated_query_params = QueryParams(updated_params) + updated_query_params = get_query_params(request.url.query) return crud.search_all(db, path_params=request.path_params, query_params=updated_query_params) -# Does not work correctly for request connected with a + or - +# Does not make sense when only certain fields are selected in diffrent requests needs diffrent syntax +# order of concatinated statmens is also unclear @router.get('/search/a', response_model=List[schemas.Timeseries] | List[schemas.TimeseriesFields], response_model_exclude_none=True, response_model_exclude_unset=True) def search_all_timeseries_aggregations(request: Request, db: Session = Depends(get_db)): - urls = re.split(r"[+-](?=\D)",request.url.query) - - results = [] - for raw_query_string in urls: - updated_query_string = raw_query_string.replace('+', '%2B') - updated_params = dict(urllib.parse.parse_qsl(updated_query_string)) - updated_query_params = QueryParams(updated_params) - results.extend(crud.search_all(db, path_params=request.path_params, query_params=updated_query_params)) - return results + urls = re.split(r"(?=[+-]\D)", "+" + request.url.query)[1:] + + signs = [ url.startswith("+") for url in urls] + query_params = [get_query_params(url[1:]) for url in urls] + return crud.search_all_aggreagtion(db, path_params=request.path_params, signs=signs, query_params_list=query_params) diff --git a/toardb/utils/utils.py b/toardb/utils/utils.py index 5bdc4578b7a2ea8dd6af676a2f919477fdee08de..02a330309a855671399096621cc3b606f4bb05a4 100644 --- a/toardb/utils/utils.py +++ b/toardb/utils/utils.py @@ -164,25 +164,24 @@ def create_filter(query_params, endpoint): raise ValueError(f"Wrong value for limit given: {limit}") # fields and format are no filter options - fields = query_params.get("fields", None) + fields = query_params.get("fields", "") format = query_params.get("format", 'json') - allowed_params = allrel_params.copy() - allowed_params |= profiling_params - if endpoint in {'stationmeta'}: - allowed_params |= gis_params | core_params | global_params - elif endpoint in {'timeseries'}: - allowed_params |= timeseries_params | roles_params - elif endpoint in {'search'}: - allowed_params |= gis_params | core_params | global_params | timeseries_params | roles_params | ambig_params - elif endpoint in {'data'}: - allowed_params |= data_params | profiling_params - elif endpoint in {'variables'}: - allowed_params |= variable_params - elif endpoint in {'persons'}: - allowed_params |= person_params - else: + allowed_params = allrel_params | profiling_params + endpoint_params_map = { + 'stationmeta': gis_params | core_params | global_params, + 'timeseries': timeseries_params | roles_params, + 'search': gis_params | core_params | global_params | timeseries_params | roles_params | ambig_params, + 'data': data_params | profiling_params, + 'variables': variable_params, + 'persons': person_params, + } + try: + allowed_params |= endpoint_params_map[endpoint] + except KeyError: raise ValueError(f"Wrong endpoint given: {endpoint}") + + if fields: for field in fields.split(','): if field not in allowed_params: @@ -313,14 +312,15 @@ def create_filter(query_params, endpoint): d_filter = " AND ".join(d_filter).replace('[','(').replace(']',')') v_filter = " AND ".join(v_filter).replace('[','(').replace(']',')') p_filter = " AND ".join(p_filter).replace('[','(').replace(']',')') - filters = {} - filters["t_filter"] = t_filter - filters["t_r_filter"] = t_r_filter - filters["s_c_filter"] = s_c_filter - filters["s_g_filter"] = s_g_filter - filters["d_filter"] = d_filter - filters["v_filter"] = v_filter - filters["p_filter"] = p_filter + filters = { + "t_filter": t_filter, + "t_r_filter":t_r_filter, + "s_c_filter": s_c_filter, + "s_g_filter": s_g_filter, + "d_filter": d_filter, + "v_filter": v_filter, + "p_filter": p_filter, + } return limit, offset, fields, format, filters