diff --git a/mlair/helpers/geofunctions.py b/mlair/helpers/geofunctions.py new file mode 100644 index 0000000000000000000000000000000000000000..cf6313584f898304d1a646892accf7507ef75ed2 --- /dev/null +++ b/mlair/helpers/geofunctions.py @@ -0,0 +1,82 @@ +"""Functions related to geo locations etc.""" +__author__ = 'Felix Kleinert' +__date__ = '2021-02-16' + +import dask.array as da +import numpy as np +import xarray as xr + +from mlair.helpers.helpers import convert2xrda +from typing import Union, Tuple + +xr_int_float = Union[xr.DataArray, xr.Dataset, np.ndarray, int, float] +tuple_of_2xr_int_float = Tuple[xr_int_float, xr_int_float] + + +def deg2rad_all_points(lat1: xr_int_float, lon1: xr_int_float, + lat2: xr_int_float, lon2: xr_int_float) -> Tuple[tuple_of_2xr_int_float, tuple_of_2xr_int_float]: + """ + Converts coordinates provided in lat1, lon1, lat2, and lon2 from deg to rad. In fact this method just calls + dasks deg2rad method on all inputs and returns a tuple of tuples. + + :param lat1: Latitude(-s) of first location + :type lat1: + :param lon1: Longitude(-s) of first location + :type lon1: + :param lat2: Latitude(-s) of second location + :type lat2: + :param lon2: Longitude(-s) of second location + :type lon2: + :return: Lats and lons in radians ((lat1, lon1), (lat2, lon2)) + :rtype: + """ + lat1, lon1, lat2, lon2 = da.deg2rad(lat1), da.deg2rad(lon1), da.deg2rad(lat2), da.deg2rad(lon2) + return (lat1, lon1), (lat2, lon2) + + +def haversine_dist(lat1: xr_int_float, lon1: xr_int_float, + lat2: xr_int_float, lon2: xr_int_float, + to_radians: bool = True, earth_radius: float = 6371.,) -> xr.DataArray: + """ + Calculate the great circle distance between two points + on the Earth (specified in decimal degrees or in radians) + + Reference: ToBeAdded + (First implementation provided by M. Langguth) + + :param lat1: Latitude(-s) of first location + :param lon1: Longitude(-s) of first location + :param lat2: Latitude(-s) of second location + :param lon2: Longitude(-s) of second location + :param to_radians: Flag if conversion from degree to radiant is required + :param earth_radius: Earth radius in kilometers + :return: Distance between locations in kilometers + """ + + if to_radians: + (lat1, lon1), (lat2, lon2) = deg2rad_all_points(lat1, lon1, lat2, lon2) + + lat1 = convert2xrda(lat1, use_1d_default=True) + lon1 = convert2xrda(lon1, use_1d_default=True) + lat2 = convert2xrda(lat2, use_1d_default=True) + lon2 = convert2xrda(lon2, use_1d_default=True) + + assert lat1.shape == lon1.shape + assert lat2.shape == lon2.shape + assert isinstance(lat1, xr.DataArray) + assert isinstance(lon1, xr.DataArray) + assert isinstance(lat2, xr.DataArray) + assert isinstance(lon2, xr.DataArray) + assert len(lat1.shape) >= len(lat2.shape) + + # broadcast lats and lons to calculate distances in a vectorized manner. + lat1, lat2 = xr.broadcast(lat1, lat2) + lon1, lon2 = xr.broadcast(lon1, lon2) + + a = da.sin((lat2 - lat1) / 2.0) ** 2 + \ + da.cos(lat1) * da.cos(lat2) * da.sin((lon2 - lon1) / 2.0) ** 2 + + dist = earth_radius * 2. * da.arcsin(da.sqrt(a)) + + return dist + diff --git a/mlair/helpers/helpers.py b/mlair/helpers/helpers.py index 42b66dcb68b184112a321473e3aae250d697c452..ee727ef59ff35334be0a52a4d78dbae814d6c205 100644 --- a/mlair/helpers/helpers.py +++ b/mlair/helpers/helpers.py @@ -5,7 +5,9 @@ __date__ = '2019-10-21' import inspect import math +import numpy as np import xarray as xr +import dask.array as da from typing import Dict, Callable, Union, List, Any @@ -125,3 +127,54 @@ def extract_value(encapsulated_value): "entry is not supported by this function.") except TypeError: return encapsulated_value + + +def is_xarray(arr) -> bool: + """ + Returns True if arr is xarray.DataArray or xarray.Dataset. + :param arr: variable in question + :type arr: Any + :return: + :rtype: bool + """ + return isinstance(arr, xr.DataArray) or isinstance(arr, xr.Dataset) + + +def convert2xrda(arr: Union[xr.DataArray, xr.Dataset, np.ndarray, int, float], + use_1d_default: bool = False, **kwargs) -> Union[xr.DataArray, xr.Dataset]: + """ + Converts np.array, int or float object to xarray.DataArray. + + If a xarray.DataArray or xarray.Dataset is passed, returns that unchanged. + :param arr: + :type arr: xr.DataArray, xr.Dataset, np.ndarray, int, float + :param use_1d_default: + :type use_1d_default: bool + :param kwargs: Any additional kwargs which are accepted by xr.DataArray() + :type kwargs: + :return: + :rtype: xr.DataArray, xr.DataSet + """ + if is_xarray(arr): + return arr + else: + if use_1d_default: + if isinstance(arr, da.core.Array): + raise TypeError(f"`use_1d_default=True' is used with `arr' of type da.array. For da.arrays please " + f"pass `use_1d_default=False' and specify keywords for xr.DataArray via kwargs.") + dims = kwargs.pop('dims', 'points') + coords = kwargs.pop('coords', None) + try: + if coords is None: + coords = {dims: range(arr.shape[0])} + except (AttributeError, IndexError): + if isinstance(arr, int) or isinstance(arr, float): + coords = kwargs.pop('coords', {dims: range(1)}) + dims = to_list(dims) + else: + raise TypeError(f"`arr' must be arry-like, int or float. But is of type {type(arr)}") + kwargs.update({'dims': dims, 'coords': coords}) + + return xr.DataArray(arr, **kwargs) + + diff --git a/test/test_helpers/test_geofunctions.py b/test/test_helpers/test_geofunctions.py new file mode 100644 index 0000000000000000000000000000000000000000..56ea6cba423f262b19e7294603f93268f9303548 --- /dev/null +++ b/test/test_helpers/test_geofunctions.py @@ -0,0 +1,89 @@ +import pytest +import dask.array as da +import numpy as np +import xarray as xr + +from mlair.helpers.geofunctions import deg2rad_all_points, haversine_dist + + +class TestDeg2RadAllPoints: + + @pytest.fixture + def custom_np_arrays(self): + return np.array([0., 0.]), np.array([30., 30.]), np.array([60., 60.]), np.array([90., 90.]) + + @pytest.fixture + def custom_da_arrays(self, custom_np_arrays): + return (da.array(i) for i in custom_np_arrays) + + @pytest.fixture + def custom_xr_arrays(self, custom_np_arrays): + return (xr.DataArray(i) for i in custom_np_arrays) + + @pytest.mark.parametrize("value", ((0., 30., 60., 90.), (0, 30, 60, 90), + pytest.lazy_fixture('custom_np_arrays'))) + def test_deg2rad_all_points_scalar_inputs(self, value): + (lat1, lon1), (lat2, lon2) = deg2rad_all_points(*value) + assert lat1 == pytest.approx(0.0) + assert lon1 == pytest.approx(np.pi/6.) + assert lat2 == pytest.approx(np.pi/3.) + assert lon2 == pytest.approx(np.pi/2) + + def test_deg2rad_all_points_xr_arr_inputs(self, custom_xr_arrays): + (lat1, lon1), (lat2, lon2) = deg2rad_all_points(*custom_xr_arrays) + assert (lat1 == pytest.approx(0.0)).all() + assert (lon1 == pytest.approx(np.pi / 6.)).all() + assert (lat2 == pytest.approx(np.pi / 3.)).all() + assert (lon2 == pytest.approx(np.pi / 2)).all() + + def test_deg2rad_all_points_xr_arr_inputs(self, custom_da_arrays): + (lat1, lon1), (lat2, lon2) = deg2rad_all_points(*custom_da_arrays) + assert lat1.compute() == pytest.approx(0.0) + assert lon1.compute() == pytest.approx(np.pi / 6.) + assert lat2.compute() == pytest.approx(np.pi / 3.) + assert lon2.compute() == pytest.approx(np.pi / 2) + + +class TestHaversineDist: + + @pytest.mark.parametrize("lat1,lon1,lat2,lon2,to_radians,expected_dist", + ((90., 0., -90., 0., True, np.pi), + (np.pi / 2., 0., np.pi / -2., 0., False, np.pi), + (0., 0., 0., 180., True, np.pi), + (0., 0., 0., np.pi, False, np.pi), + (0., 0., -45., 0, True, np.pi / 4.), + (0., 0., np.pi / -4., 0, False, np.pi / 4.), + )) + def test_haversine_dist_on_unit_sphere_scalars(self, lat1, lon1, lat2, lon2, to_radians, expected_dist): + dist = haversine_dist(lat1=lat1, lon1=lon1, lat2=lat2, lon2=lon2, to_radians=to_radians, earth_radius=1.) + assert dist == pytest.approx(expected_dist) + + @pytest.mark.parametrize("lat1,lon1,lat2,lon2,to_radians,expected_dist", + ( + (xr.DataArray(np.array([90., 0, 45.]), dims='first', coords={'first': range(3)}), + xr.DataArray(np.array([0., 0, 0]), dims='first', coords={'first': range(3)}), + -90., 0., True, + np.array([[np.pi], [np.pi / 2.], [3./4.*np.pi]])), + (xr.DataArray(np.array([90., 0, 45.]), dims='first', coords={'first': range(3)}), + xr.DataArray(np.array([0., 0, 0]), dims='first', coords={'first': range(3)}), + np.array([-90., 0., 45.]), + np.array([0., 0., 0.]), True, + np.array([[np.pi, np.pi/2., np.pi/4.], + [np.pi/2., 0., np.pi/4.], + [3./4.*np.pi, np.pi/4., 0.]])), + )) + def test_haversine_dist_on_unit_sphere_fields_and_scalars(self, lat1, lon1, lat2, lon2, to_radians, expected_dist): + dist = haversine_dist(lat1=lat1, lon1=lon1, lat2=lat2, lon2=lon2, to_radians=to_radians, earth_radius=1.) + assert (dist == pytest.approx(expected_dist)).all() + + @pytest.mark.parametrize("lat1,lon1,lat2,lon2,to_radians", + ( + (np.array([0., 0.]), 0., 0., 0., True), + (0., np.array([0., 0.]), 0., 0., True), + (0., 0., np.array([0., 0.]), 0., True), + (0., 0., 0., np.array([0., 0.]), True), + + )) + def test_haversine_dist_on_unit_sphere_missmatch_dimensions(self, lat1, lon1, lat2, lon2, to_radians): + with pytest.raises(AssertionError) as e: + dist = haversine_dist(lat1=lat1, lon1=lon1, lat2=lat2, lon2=lon2, to_radians=to_radians, earth_radius=1.) diff --git a/test/test_helpers/test_helpers.py b/test/test_helpers/test_helpers.py index a5aaa707c83a65c3e10f76fdbfcd8142d3615e2e..f2e2b341afa424ce351c0253f41c75e362b77eba 100644 --- a/test/test_helpers/test_helpers.py +++ b/test/test_helpers/test_helpers.py @@ -1,5 +1,6 @@ import numpy as np import xarray as xr +import dask.array as da import datetime as dt import logging @@ -9,10 +10,12 @@ import os import mock import pytest +import string from mlair.helpers import to_list, dict_to_xarray, float_round, remove_items, extract_value, select_from_dict from mlair.helpers import PyTestRegex from mlair.helpers import Logger, TimeTracking +from mlair.helpers.helpers import is_xarray, convert2xrda class TestToList: @@ -312,3 +315,116 @@ class TestExtractValue: extract_value([1, 2, 3]) assert "Trying to extract an encapsulated value from objects with more than a single entry is not supported " \ "by this function." in e.value.args[0] + + +class TestIsXarray: + + @pytest.fixture + def custom_xr_data(self): + return xr.DataArray(np.array(range(5))) + + def test_is_xarray_xr_input(self, custom_xr_data): + # data_array = xr.DataArray(np.array(range(5))) + assert is_xarray(custom_xr_data) is True + assert is_xarray(xr.Dataset({'test': custom_xr_data})) is True + + def test_is_xarray_other_input(self, custom_xr_data): + assert is_xarray(1) is False + assert is_xarray(1.) is False + assert is_xarray([1, 2.]) is False + assert is_xarray([custom_xr_data]) is False + + +class TestConvert2xrDa: + + @pytest.fixture + def custom_1d_npdata(self): + return np.array(range(9)) + + @pytest.fixture() + def custom_2d_npdata(self, custom_1d_npdata): + return np.stack([custom_1d_npdata, 2 * custom_1d_npdata]) + + @pytest.fixture + def custom_xr_dataarray(self, custom_1d_npdata): + return xr.DataArray(custom_1d_npdata) + + @pytest.fixture + def custom_xr_dataset(self, custom_xr_dataarray): + return xr.Dataset({'test_1': custom_xr_dataarray}) + + @pytest.fixture + def custom_1d_daarray(self, custom_1d_npdata): + return da.array(custom_1d_npdata) + + def test_convert2xrda_xrdata_in(self, custom_xr_dataarray, custom_xr_dataset): + assert (convert2xrda(custom_xr_dataarray) == custom_xr_dataarray).all() + assert (convert2xrda(custom_xr_dataset) == custom_xr_dataset).all() + + def test_convert2xrda_npdata_in_nokwargs(self, custom_1d_npdata, custom_2d_npdata): + converted_data = convert2xrda(custom_1d_npdata) + assert isinstance(converted_data, xr.DataArray) + assert (converted_data.values == custom_1d_npdata).all() + assert converted_data.dims == ('dim_0',) + assert converted_data.dim_0.size == custom_1d_npdata.shape[0] + + # Feed in a 2D-np.array without additional kwargs + converted_data = convert2xrda(custom_2d_npdata) + assert isinstance(converted_data, xr.DataArray) + assert (converted_data.values == custom_2d_npdata).all() + assert converted_data.dims == ('dim_0', 'dim_1') + assert converted_data.dim_0.size == custom_2d_npdata.shape[0] + assert converted_data.dim_1.size == custom_2d_npdata.shape[1] + + def test_convert2xrda_npdata_in_nokwargs_default_true(self, custom_1d_npdata, custom_2d_npdata): + converted_data = convert2xrda(custom_1d_npdata, use_1d_default=True) + assert isinstance(converted_data, xr.DataArray) + assert (converted_data.values == custom_1d_npdata).all() + assert converted_data.dims == ('points',) + assert converted_data.points.size == custom_1d_npdata.shape[0] + + # Feed in a 2D-np.array without additional kwargs + with pytest.raises(ValueError) as e: + converted_data = convert2xrda(custom_2d_npdata, use_1d_default=True) + assert "different number of dimensions on data and dims: 2 vs 1" in e.value.args[0] + + @pytest.mark.parametrize("use_1d_default", (False, True)) + def test_convert2xrda_npdata_in_kwargs(self, custom_1d_npdata, custom_2d_npdata, use_1d_default): + converted_data = convert2xrda(custom_1d_npdata, use_1d_default=use_1d_default, dims='other_points') + assert isinstance(converted_data, xr.DataArray) + assert (converted_data.values == custom_1d_npdata).all() + assert converted_data.dims == ('other_points',) + assert converted_data.other_points.size == custom_1d_npdata.shape[0] + + # Feed in a 2D-np.array with correct additional kwargs + converted_data = convert2xrda(custom_2d_npdata, use_1d_default=use_1d_default, + dims=['test_dim_0', 'test_dim_1'], + coords={'test_dim_0': list(string.ascii_lowercase[:custom_2d_npdata.shape[0]]), + 'test_dim_1': list(string.ascii_lowercase[:custom_2d_npdata.shape[1]]), + }, + ) + assert isinstance(converted_data, xr.DataArray) + assert (converted_data.values == custom_2d_npdata).all() + assert converted_data.dims == ('test_dim_0', 'test_dim_1') + assert (converted_data.coords['test_dim_0'].values == np.array(['a', 'b'])).all() + + @pytest.mark.parametrize("scalar", (1, 2.)) + def test_convert2xrda_int_float_in_nokwargs_default_true(self, scalar): + converted_data = convert2xrda(scalar, use_1d_default=True) + assert isinstance(converted_data, xr.DataArray) + assert converted_data.values == np.array([scalar]) + assert converted_data.dims == ('points',) + + @pytest.mark.parametrize("wrong_input", ({1: 'b'}, [1], 'abc')) + def test_convert2xrda_wrong_type_in_default_true_nokwargs(self, wrong_input): + with pytest.raises(TypeError) as e: + converted_data = convert2xrda(wrong_input, use_1d_default=True) + assert f"`arr' must be arry-like, int or float. But is of type {type(wrong_input)}" in e.value.args[0] + + def test_convert2xrda_dask_in_default_true_nokwargs(self, custom_1d_daarray): + with pytest.raises(TypeError) as e: + convert2xrda(custom_1d_daarray, True) + assert "`use_1d_default=True' is used with `arr' of type da.array. For da.arrays please pass `use_1d_default=False' and specify keywords for xr.DataArray via kwargs." in \ + e.value.args[0] + assert "`use_1d_default=True' is used with `arr' of type da.array. For da.arrays please pass" + \ + " `use_1d_default=False' and specify keywords for xr.DataArray via kwargs." in e.value.args[0]