diff --git a/mlair/reference_data_handler/__init__.py b/mlair/reference_data_handler/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mlair/reference_data_handler/abstract_reference_data_handler.py b/mlair/reference_data_handler/abstract_reference_data_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..19c102946f4958992c82c7cf31d1281baa7418b9 --- /dev/null +++ b/mlair/reference_data_handler/abstract_reference_data_handler.py @@ -0,0 +1,65 @@ +__author__ = "Felix Kleinert" +__date__ = "2021-01-29" + +import os +import sys +from abc import ABC + +import wget + +from mlair.configuration import check_path_and_create + + +class AbstractReferenceModel(ABC): + """ + Abstract reference model. All classes providing some reference or competitor models must inherent from this class. + """ + def __init__(self, *args, **kwargs): + pass + + def make_reference_available_locally(self): + raise NotImplementedError + + @staticmethod + def is_reference_available_locally(reference_path) -> bool: + """ + Checks if reference is available locally + """ + + try: + if os.listdir(reference_path): + res = True + else: + res = False + except FileNotFoundError: + res = False + return res + + +class AbstractReferenceb2share(AbstractReferenceModel): + """ + Abstract class for reference models located on b2share (eudat or fz-juelich) + See also https://github.com/EUDAT-Training/B2SHARE-Training/blob/master/api/01_Retrieve_existing_record.md + + """ + def __init__(self, b2share_hosturl: str, b2share_bucket: str, b2share_key: str): + super().__init__() + self.b2share_hosturl = b2share_hosturl + self.b2share_bucket = b2share_bucket + self.b2share_key = b2share_key + + @property + def b2share_url(self): + return f"{self.b2share_hosturl}/api/files/{self.b2share_bucket}" + + def bar_custom(self, current, total, width=80): + progress_message = f"Downloading {self.b2share_key}: {round(current / total * 100)}% [{current} / {total}] bytes" + sys.stdout.write("\r" + progress_message) + sys.stdout.flush() + + def download_from_b2share(self, tmp_download_path: str): + check_path_and_create(tmp_download_path) + wget.download(f"{self.b2share_url}/{self.b2share_key}", + out=f"{tmp_download_path}{self.b2share_key}", + bar=self.bar_custom + ) diff --git a/mlair/reference_data_handler/intellio3_v1_reference.py b/mlair/reference_data_handler/intellio3_v1_reference.py new file mode 100644 index 0000000000000000000000000000000000000000..aec05a3bc66a7451088824893abae72af00621fa --- /dev/null +++ b/mlair/reference_data_handler/intellio3_v1_reference.py @@ -0,0 +1,98 @@ +""" +Extract forecasts from intelliO3 and store them for MLAir + +""" + +__author__ = "Felix Kleinert" +__date__ = "2021-01-29" + +import os +import xarray as xr +import shutil + +from mlair.configuration.path_config import check_path_and_create +from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceb2share + + +class IntelliO3Reference(AbstractReferenceb2share): + """ + Reference handler that extracts IntelliO3-ts v1.0 forecasts (Kleinert, 2021). + + IntelliO3 forecasts can be used as a competitive model within MLAir. Downloads the IntelliO3 tar-ball and extracts + the forecasts. + + Kleinert, F., Leufen, L. H., and Schultz, M. G.: IntelliO3-ts v1.0: a neural network approach to predict + near-surface ozone concentrations in Germany, Geosci. Model Dev., 14, 1–25, + https://doi.org/10.5194/gmd-14-1-2021, 2021. + """ + + def __init__(self, ref_name: str, ref_store_path: str = None): + """ + :param ref_name: Desired Name of reference forecast + :type ref_name: str + :param ref_store_path: Path to store reference forecasts + :type ref_store_path: str + """ + super().__init__(b2share_hosturl="https://b2share.eudat.eu", + b2share_bucket="0cae9db2-f388-4136-8d28-9d9c5665d641", + b2share_key="IntelliO3-ts.tar.gz", + ) + self.ref_name = ref_name + if ref_store_path is None: + ref_store_path = f"{self.ref_name}/" + self.ref_store_path = ref_store_path + self.tmp_extract_path = "tmp_downloads/" + self.orig_forecast_path = "IntelliO3-ts/IntelliO3-ts_network/forecasts/" + self.file_pattern = "forecasts_DE?????_test.nc" + + def untar_forecasts(self): + """ + Extracts IntelliO3 forecasts from tar-ball. + """ + cmd = f"tar -xf {self.tmp_extract_path}{self.b2share_key} --directory {self.tmp_extract_path} --wildcards --no-anchored '{self.orig_forecast_path}{self.file_pattern}'" + os.system(cmd) + + def file_list(self): + """ + :return: base dir of tmp path and list of forecast files + :rtype: tuple(str, list(str)) + """ + for root, dirs, file_names in os.walk(self.tmp_extract_path+self.orig_forecast_path): + pass + return root, file_names + + def read_and_drop(self, sel_coords: dict = None): + """ + Reads original forecast files, renames coord type and store forecasts as NetCdf4 files + :param sel_coords: + :type sel_coords: + """ + if sel_coords is None: + sel_coords = {'type': 'CNN'} + in_path, files = self.file_list() + check_path_and_create(self.ref_store_path) + for infile in files: + data = xr.open_dataarray(f"{in_path}{infile}") + data = data.sel(**sel_coords) + data.coords['type'] = (self.ref_name) + data.to_netcdf(f"{self.ref_store_path}{infile}") + + def make_reference_available_locally(self, remove_tmp_dir: bool = True): + """ + + :return: + :rtype: + """ + if not self.is_reference_available_locally(self.ref_store_path): + if not os.path.exists(self.tmp_extract_path+self.b2share_key): + self.download_from_b2share(tmp_download_path=self.tmp_extract_path) + self.untar_forecasts() + self.read_and_drop() + if remove_tmp_dir: + shutil.rmtree(self.tmp_extract_path) + + +if __name__ == '__main__': + io3 = IntelliO3Reference('IntelliO3-ts') + io3.make_reference_available_locally() + diff --git a/requirements.txt b/requirements.txt index 371bb776e581925e507bf06c60bd866061c52791..77f300df6a8b97395fd5c6771310f7b7b9537a59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -63,7 +63,7 @@ wcwidth==0.1.8 Werkzeug==1.0.0 xarray==0.16.1 zipp==3.1.0 - +wget~=3.2 setuptools~=49.6.0 Cartopy==0.18.0 --no-binary shapely Shapely==1.7.0 \ No newline at end of file diff --git a/test/test_reference_data_handler/test_abstract_reference_data_handler.py b/test/test_reference_data_handler/test_abstract_reference_data_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..98267fc913df299647c0fe8a1d95074e5d22bf04 --- /dev/null +++ b/test/test_reference_data_handler/test_abstract_reference_data_handler.py @@ -0,0 +1,54 @@ +import pytest +import mock + +from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceModel +from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceb2share + + +class TestAbstractReferenceDataHandler: + + def test_init(self): + assert isinstance(AbstractReferenceModel(), AbstractReferenceModel) + + def test_make_reference_available_locally(self): + arm = AbstractReferenceModel() + with pytest.raises(NotImplementedError): + arm.make_reference_available_locally() + + @mock.patch("os.listdir", side_effect=[True, False, FileNotFoundError, ValueError]) + def test_is_reference_available_locally(self, mock_file): + arm = AbstractReferenceModel() + assert arm.is_reference_available_locally("TestFile/") == True + assert arm.is_reference_available_locally("TestFile/") == False + assert arm.is_reference_available_locally("TestFile/") == False + with pytest.raises(ValueError): + arm.is_reference_available_locally("TestFile/") + + +class TestAbstractReferenceb2share: + + @pytest.fixture + def ar(self): + b2share_hosturl = "b2share.url" + b2share_bucket = "1111-2222-3333" + b2share_key = "b2share_key.tar.gz" + return AbstractReferenceb2share(b2share_hosturl, b2share_bucket, b2share_key) + + def test_inheritance(self): + assert issubclass(AbstractReferenceb2share, AbstractReferenceModel) + + def test_init(self, ar): + assert isinstance(ar, AbstractReferenceb2share) + assert ar.b2share_hosturl == "b2share.url" + assert ar.b2share_bucket == "1111-2222-3333" + assert ar.b2share_key == "b2share_key.tar.gz" + + def test_b2share_url(self, ar): + assert isinstance(ar.b2share_url, str) + assert ar.b2share_url == 'b2share.url/api/files/1111-2222-3333' + + def test_bar_custom(self): + pass + + def test_download_from_b2share(self): + pass \ No newline at end of file diff --git a/test/test_reference_data_handler/test_intellio3_v1_reference.py b/test/test_reference_data_handler/test_intellio3_v1_reference.py new file mode 100644 index 0000000000000000000000000000000000000000..6582b99ec197f282a561bcdecd4d331cfc964327 --- /dev/null +++ b/test/test_reference_data_handler/test_intellio3_v1_reference.py @@ -0,0 +1,45 @@ +import pytest + + +from mlair.reference_data_handler.intellio3_v1_reference import IntelliO3Reference +from mlair.reference_data_handler.abstract_reference_data_handler import AbstractReferenceb2share + + +class TestIntelliO3Reference: + + @pytest.fixture + def io3(self): + return IntelliO3Reference("IntelliO3-test") + + def test_init_none_path(self): + io3 = IntelliO3Reference("IntelliO3-test") + assert isinstance(io3, IntelliO3Reference) + assert io3.ref_name == "IntelliO3-test" + assert io3.ref_store_path == "IntelliO3-test/" + assert io3.tmp_extract_path == "tmp_downloads/" + assert io3.orig_forecast_path == "IntelliO3-ts/IntelliO3-ts_network/forecasts/" + assert io3.file_pattern == "forecasts_DE?????_test.nc" + assert io3.b2share_key == "IntelliO3-ts.tar.gz" + assert io3.b2share_bucket == "0cae9db2-f388-4136-8d28-9d9c5665d641" + assert io3.b2share_hosturl == "https://b2share.eudat.eu" + + def test_init_extra_path(self): + io3 = IntelliO3Reference("IntelliO3-test", "DummyExtraPath/") + assert isinstance(io3, IntelliO3Reference) + assert io3.ref_store_path == "DummyExtraPath/" + + def test_inheritance(self): + assert issubclass(IntelliO3Reference, AbstractReferenceb2share) + + def test_untar_forecasts(self, io3): + pass + + def test_file_list(self): + pass + + def test_read_and_drop(self): + pass + + def test_make_reference_available_locally(self): + io3 = IntelliO3Reference("IntelliO3-test", "DummyExtraPath/") + diff --git a/test/test_reference_data_handler/tmp_downloads/IntelliO3-ts.tar.gz b/test/test_reference_data_handler/tmp_downloads/IntelliO3-ts.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..5d98fd1b0c7758489aafeb9da4901fd1c4c7736d Binary files /dev/null and b/test/test_reference_data_handler/tmp_downloads/IntelliO3-ts.tar.gz differ