Skip to content
Snippets Groups Projects
Commit 6f25c999 authored by lukas leufen's avatar lukas leufen
Browse files

include data storage

See merge request toar/machinelearningtools!14
parents ea26f48b 4d4c3eb6
Branches
Tags
2 merge requests!17update to v0.4.0,!14include data storage
Pipeline #26417 passed
......@@ -23,3 +23,6 @@ exclude_lines =
# Don't complain about import statements
import
# Don't complain about abstract class declarations and placeholders
pass
__author__ = 'Lukas Leufen'
__date__ = '2019-11-22'
from typing import Any, List, Tuple
from abc import ABC
class NameNotFoundInDataStore(Exception):
"""
Exception that get raised if given name is not found in the entire data store.
"""
pass
class NameNotFoundInScope(Exception):
"""
Exception that get raised if given name is not found in the provided scope, but can be found in other scopes.
"""
pass
class EmptyScope(Exception):
"""
Exception that get raised if given scope is not part of the data store.
"""
pass
class AbstractDataStore(ABC):
"""
Data store for all settings for the experiment workflow to save experiment parameters for the proceeding modules
and predefine parameters loaded during the experiment setup phase. The data store is hierarchically structured, so
that global settings can be overwritten by local adjustments.
"""
def __init__(self):
# empty initialise the data-store variables
self._store = {}
def put(self, name: str, obj: Any, scope: str) -> None:
"""
Abstract method to add an object to the data store
:param name: Name of object to store
:param obj: The object itself to be stored
:param scope: the scope / context of the object, under that the object is valid
"""
pass
def get(self, name: str, scope: str) -> None:
"""
Abstract method to get an object from the data store
:param name: Name to look for
:param scope: scope to search the name for
:return: the stored object
"""
pass
def search_name(self, name: str) -> None:
"""
Abstract method to search for all occurrences of given `name` in the entire data store.
:param name: Name to look for
:return: search result
"""
pass
def search_scope(self, scope: str) -> None:
"""
Abstract method to search for all object names that are stored for given scope
:param scope: scope to look for
:return: search result
"""
pass
def list_all_scopes(self) -> None:
"""
Abstract method to list all scopes in data store
:return: all found scopes
"""
pass
class DataStoreByVariable(AbstractDataStore):
"""
Data store for all settings for the experiment workflow to save experiment parameters for the proceeding modules
and predefine parameters loaded during the experiment setup phase. The data store is hierarchically structured, so
that global settings can be overwritten by local adjustments.
This implementation stores data as
<variable1>
<scope1>: value
<scope2>: value
<variable2>
<scope1>: value
<scope3>: value
"""
def put(self, name: str, obj: Any, scope: str) -> None:
"""
Store an object `obj` with given `name` under `scope`. In the current implementation, existing entries are
overwritten.
:param name: Name of object to store
:param obj: The object itself to be stored
:param scope: the scope / context of the object, under that the object is valid
"""
# open new variable related store with `name` as key if not existing
if name not in self._store.keys():
self._store[name] = {}
self._store[name][scope] = obj
def get(self, name: str, scope: str) -> Any:
"""
Retrieve an object with `name` from `scope`. If no object can be found in the exact scope, take an iterative
look on the levels above. Raises a NameNotFoundInDataStore error, if no object with given name can be found in
the entire data store. Raises a NameNotFoundInScope error, if the object is in the data store but not in the
given scope and its levels above (could be either included in another scope or a more detailed sub-scope).
:param name: Name to look for
:param scope: scope to search the name for
:return: the stored object
"""
return self._stride_through_scopes(name, scope)[2]
def _stride_through_scopes(self, name, scope, depth=0):
if depth <= scope.count("."):
local_scope = scope.rsplit(".", maxsplit=depth)[0]
try:
return name, local_scope, self._store[name][local_scope]
except KeyError:
return self._stride_through_scopes(name, scope, depth + 1)
else:
occurrences = self.search_name(name)
if len(occurrences) == 0:
raise NameNotFoundInDataStore(f"Couldn't find {name} in data store")
else:
raise NameNotFoundInScope(f"Couldn't find {name} in scope {scope} . {name} is only defined in "
f"{occurrences}")
def search_name(self, name: str) -> List[str]:
"""
Search for all occurrences of given `name` in the entire data store.
:param name: Name to look for
:return: list with all scopes and sub-scopes containing an object stored as `name`
"""
return sorted(self._store[name] if name in self._store.keys() else [])
def search_scope(self, scope: str, current_scope_only=True, return_all=False) -> List[str or Tuple]:
"""
Search for given `scope` and list all object names stored under this scope. To look also for all superior scopes
set `current_scope_only=False`. To return the scope and the object's value too, set `return_all=True`.
:param scope: scope to look for
:param current_scope_only: look only for all names for given scope if true, else search for names from superior
scopes too.
:param return_all: return name, definition scope and value if True, else just the name
:return: list with all object names (if `return_all=False`) or list with tuple of object name, object scope and
object value ordered by name (if `return_all=True`)
"""
if current_scope_only:
names = []
for (k, v) in self._store.items():
if scope in v.keys():
names.append(k)
if len(names) > 0:
if return_all:
return sorted([(name, scope, self._store[name][scope]) for name in names], key=lambda tup: tup[0])
else:
return sorted(names)
else:
raise EmptyScope(f"Given scope {scope} is not part of the data store. Available scopes are: "
f"{self.list_all_scopes()}")
else:
results = []
for name in self.list_all_names():
try:
res = self._stride_through_scopes(name, scope)
if return_all:
results.append(res)
else:
results.append(res[0])
except (NameNotFoundInDataStore, NameNotFoundInScope):
pass
if return_all:
return sorted(results, key=lambda tup: tup[0])
else:
return sorted(results)
def list_all_scopes(self) -> List[str]:
"""
List all available scopes in data store
:return: names of all stored objects
"""
scopes = []
for v in self._store.values():
for scope in v.keys():
if scope not in scopes:
scopes.append(scope)
return sorted(scopes)
def list_all_names(self) -> List[str]:
"""
List all names available in the data store.
:return: all names
"""
return sorted(self._store.keys())
class DataStoreByScope(AbstractDataStore):
"""
Data store for all settings for the experiment workflow to save experiment parameters for the proceeding modules
and predefine parameters loaded during the experiment setup phase. The data store is hierarchically structured, so
that global settings can be overwritten by local adjustments.
This implementation stores data as
<scope1>
<variable1>: value
<variable2>: value
<scope2>
<variable1>: value
<variable3>: value
"""
def put(self, name: str, obj: Any, scope: str) -> None:
"""
Store an object `obj` with given `name` under `scope`. In the current implementation, existing entries are
overwritten.
:param name: Name of object to store
:param obj: The object itself to be stored
:param scope: the scope / context of the object, under that the object is valid
"""
if scope not in self._store.keys():
self._store[scope] = {}
self._store[scope][name] = obj
def get(self, name: str, scope: str) -> Any:
"""
Retrieve an object with `name` from `scope`. If no object can be found in the exact scope, take an iterative
look on the levels above. Raises a NameNotFoundInDataStore error, if no object with given name can be found in
the entire data store. Raises a NameNotFoundInScope error, if the object is in the data store but not in the
given scope and its levels above (could be either included in another scope or a more detailed sub-scope).
:param name: Name to look for
:param scope: scope to search the name for
:return: the stored object
"""
return self._stride_through_scopes(name, scope)[2]
def _stride_through_scopes(self, name, scope, depth=0):
if depth <= scope.count("."):
local_scope = scope.rsplit(".", maxsplit=depth)[0]
try:
return name, local_scope, self._store[local_scope][name]
except KeyError:
return self._stride_through_scopes(name, scope, depth + 1)
else:
occurrences = self.search_name(name)
if len(occurrences) == 0:
raise NameNotFoundInDataStore(f"Couldn't find {name} in data store")
else:
raise NameNotFoundInScope(f"Couldn't find {name} in scope {scope} . {name} is only defined in "
f"{occurrences}")
def search_name(self, name: str) -> List[str]:
"""
Search for all occurrences of given `name` in the entire data store.
:param name: Name to look for
:return: list with all scopes and sub-scopes containing an object stored as `name`
"""
keys = []
for (key, val) in self._store.items():
if name in val.keys():
keys.append(key)
return sorted(keys)
def search_scope(self, scope: str, current_scope_only: bool = True, return_all: bool = False) -> List[str or Tuple]:
"""
Search for given `scope` and list all object names stored under this scope. To look also for all superior scopes
set `current_scope_only=False`. To return the scope and the object's value too, set `return_all=True`.
:param scope: scope to look for
:param current_scope_only: look only for all names for given scope if true, else search for names from superior
scopes too.
:param return_all: return name, definition scope and value if True, else just the name
:return: list with all object names (if `return_all=False`) or list with tuple of object name, object scope and
object value ordered by name (if `return_all=True`)
"""
if current_scope_only:
try:
if return_all:
return [(name, scope, self._store[scope][name]) for name in sorted(self._store[scope].keys())]
else:
return sorted(self._store[scope].keys())
except KeyError:
raise EmptyScope(f"Given scope {scope} is not part of the data store. Available scopes are: "
f"{self.list_all_scopes()}")
else:
results = []
for name in self.list_all_names():
try:
res = self._stride_through_scopes(name, scope)
if return_all:
results.append(res)
else:
results.append(res[0])
except (NameNotFoundInDataStore, NameNotFoundInScope):
pass
if return_all:
return sorted(results, key=lambda tup: tup[0])
else:
return sorted(results)
def list_all_scopes(self) -> List[str]:
"""
List all available scopes in data store
:return: names of all stored objects
"""
return sorted(self._store.keys())
def list_all_names(self) -> List[str]:
"""
List all names available in the data store.
:return: all names
"""
names = []
scopes = self.list_all_scopes()
for scope in scopes:
for name in self._store[scope].keys():
if name not in names:
names.append(name)
return sorted(names)
__author__ = 'Lukas Leufen'
__date__ = '2019-11-22'
from src.datastore import AbstractDataStore, DataStoreByVariable, DataStoreByScope
from src.datastore import NameNotFoundInDataStore, NameNotFoundInScope, EmptyScope
import pytest
class TestAbstractDataStore:
@pytest.fixture
def ds(self):
return AbstractDataStore()
def test_init(self, ds):
assert ds._store == {}
class TestDataStoreByVariable:
@pytest.fixture
def ds(self):
return DataStoreByVariable()
def test_put(self, ds):
ds.put("number", 3, "general.subscope")
assert ds._store["number"]["general.subscope"] == 3
def test_get(self, ds):
ds.put("number", 3, "general.subscope")
assert ds.get("number", "general.subscope") == 3
def test_get_with_sub_scope(self, ds):
ds.put("number", 3, "general")
ds.put("number", 10, "general.subscope")
assert ds.get("number", "general.subscope") == 10
assert ds.get("number", "general") == 3
def test_get_with_not_existing_sub_scope(self, ds):
ds.put("number", 3, "general")
ds.put("number2", 10, "general.subscope")
ds.put("number2", 1, "general")
assert ds.get("number", "general.subscope") == 3
def test_raise_not_in_data_store(self, ds):
ds.put("number", 22, "general")
with pytest.raises(NameNotFoundInDataStore) as e:
ds.get("number3", "general")
assert "Couldn't find number3 in data store" in e.value.args[0]
def test_search(self, ds):
ds.put("number", 22, "general")
ds.put("number", 22, "general2")
ds.put("number", 22, "general.sub")
assert ds.search_name("number") == ["general", "general.sub", "general2"]
def test_raise_not_in_scope(self, ds):
ds.put("number", 11, "general.sub")
with pytest.raises(NameNotFoundInScope) as e:
ds.get("number", "general.sub2")
assert "Couldn't find number in scope general.sub2 . number is only defined in ['general.sub']" in e.value.args[0]
def test_list_all_scopes(self, ds):
ds.put("number", 22, "general2")
ds.put("number", 11, "general.sub")
ds.put("number2", 2, "general.sub")
ds.put("number", 3, "general.sub3")
ds.put("number", 1, "general")
assert ds.list_all_scopes() == ['general', 'general.sub', 'general.sub3', 'general2']
def test_search_scope(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.search_scope("general.sub") == ["number", "number1"]
def test_search_empty_scope(self, ds):
ds.put("number", 22, "general2")
ds.put("number", 11, "general.sub")
with pytest.raises(EmptyScope) as e:
ds.search_scope("general.sub2")
assert "Given scope general.sub2 is not part of the data store." in e.value.args[0]
assert "Available scopes are: ['general.sub', 'general2']" in e.value.args[0]
def test_list_all_names(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.list_all_names() == ["number", "number1", "number2"]
def test_search_scope_and_all_superiors(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.search_scope("general.sub", current_scope_only=False) == ["number", "number1"]
assert ds.search_scope("general.sub.sub", current_scope_only=False) == ["number", "number1", "number2"]
def test_search_scope_return_all(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.search_scope("general.sub", return_all=True) == [("number", "general.sub", 11),
("number1", "general.sub", 22)]
def test_search_scope_and_all_superiors_return_all(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
ds.put("number", "ABC", "general.sub.sub")
assert ds.search_scope("general.sub", current_scope_only=False, return_all=True) == \
[("number", "general.sub", 11), ("number1", "general.sub", 22)]
assert ds.search_scope("general.sub.sub", current_scope_only=False, return_all=True) == \
[("number", "general.sub.sub", "ABC"), ("number1", "general.sub", 22), ("number2", "general.sub.sub", 3)]
class TestDataStoreByScope:
@pytest.fixture
def ds(self):
return DataStoreByScope()
def test_put_with_scope(self, ds):
ds.put("number", 3, "general.subscope")
assert ds._store["general.subscope"]["number"] == 3
def test_get(self, ds):
ds.put("number", 3, "general.subscope")
assert ds.get("number", "general.subscope") == 3
def test_get_with_sub_scope(self, ds):
ds.put("number", 3, "general")
ds.put("number", 10, "general.subscope")
assert ds.get("number", "general.subscope") == 10
assert ds.get("number", "general") == 3
def test_get_with_not_existing_sub_scope(self, ds):
ds.put("number", 3, "general")
ds.put("number2", 10, "general.subscope")
ds.put("number2", 1, "general")
assert ds.get("number", "general.subscope") == 3
def test_raise_not_in_data_store(self, ds):
ds.put("number", 22, "general")
with pytest.raises(NameNotFoundInDataStore) as e:
ds.get("number3", "general")
assert "Couldn't find number3 in data store" in e.value.args[0]
def test_search(self, ds):
ds.put("number", 22, "general")
ds.put("number", 22, "general2")
ds.put("number", 22, "general.sub")
assert ds.search_name("number") == ["general", "general.sub", "general2"]
def test_raise_not_in_scope(self, ds):
ds.put("number", 11, "general.sub")
with pytest.raises(NameNotFoundInScope) as e:
ds.get("number", "general.sub2")
assert "Couldn't find number in scope general.sub2 . number is only defined in ['general.sub']" in e.value.args[0]
def test_list_all_scopes(self, ds):
ds.put("number", 22, "general2")
ds.put("number", 11, "general.sub")
ds.put("number2", 2, "general.sub")
ds.put("number", 3, "general.sub3")
ds.put("number", 1, "general")
assert ds.list_all_scopes() == ['general', 'general.sub', 'general.sub3', 'general2']
def test_search_scope(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.search_scope("general.sub") == ["number", "number1"]
def test_search_empty_scope(self, ds):
ds.put("number", 22, "general2")
ds.put("number", 11, "general.sub")
with pytest.raises(EmptyScope) as e:
ds.search_scope("general.sub2")
assert "Given scope general.sub2 is not part of the data store." in e.value.args[0]
assert "Available scopes are: ['general.sub', 'general2']" in e.value.args[0]
def test_list_all_names(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.list_all_names() == ["number", "number1", "number2"]
def test_search_scope_and_all_superiors(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.search_scope("general.sub", current_scope_only=False) == ["number", "number1"]
assert ds.search_scope("general.sub.sub", current_scope_only=False) == ["number", "number1", "number2"]
def test_search_scope_return_all(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
assert ds.search_scope("general.sub", return_all=True) == [("number", "general.sub", 11),
("number1", "general.sub", 22)]
def test_search_scope_and_all_superiors_return_all(self, ds):
ds.put("number", 22, "general")
ds.put("number", 11, "general.sub")
ds.put("number1", 22, "general.sub")
ds.put("number2", 3, "general.sub.sub")
ds.put("number", "ABC", "general.sub.sub")
assert ds.search_scope("general.sub", current_scope_only=False, return_all=True) == \
[("number", "general.sub", 11), ("number1", "general.sub", 22)]
assert ds.search_scope("general.sub.sub", current_scope_only=False, return_all=True) == \
[("number", "general.sub.sub", "ABC"), ("number1", "general.sub", 22), ("number2", "general.sub.sub", 3)]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment