diff --git a/.gitignore b/.gitignore index 3ecdfc9dd61c77ead268c28b9787b65ad31078eb..31cfe991f3cb38160cc5a2b330ca4725ebaa834f 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,4 @@ htmlcov/ .pytest_cache /test/data/ report.html +/TestExperiment/ diff --git a/requirements.txt b/requirements.txt index cdf035784475dac51d17173e7863dbf483e20101..d2a7200b492fc3dc61f84f4432a97b05051ca184 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ pytest-lazy-fixture==0.6.1 pytest-cov pytest-html pydot +mock diff --git a/run.py b/run.py new file mode 100644 index 0000000000000000000000000000000000000000..6b5c367dae776d7aa1c5f26d70b6b952b64e064d --- /dev/null +++ b/run.py @@ -0,0 +1,147 @@ +__author__ = "Lukas Leufen" +__date__ = '2019-11-14' + + +import logging +from src.helpers import TimeTracking +from src import helpers +import argparse +import time + + +formatter = "%(asctime)s - %(levelname)s: %(message)s [%(filename)s:%(funcName)s:%(lineno)s]" +logging.basicConfig(level=logging.INFO, format=formatter) + + +class run(object): + """ + basic run class to measure execution time. Either call this class calling it by 'with' or delete the class instance + after finishing the measurement. The duration result is logged. + """ + + def __init__(self): + self.time = TimeTracking() + logging.info(f"{self.__class__.__name__} started") + + def __del__(self): + self.time.stop() + logging.info(f"{self.__class__.__name__} finished after {self.time}") + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def do_stuff(self): + time.sleep(2) + + +class ExperimentSetup: + """ + params: + trainable: Train new model if true, otherwise try to load existing model + """ + + def __init__(self, **kwargs): + self.data_path = None + self.experiment_path = None + self.experiment_name = None + self.trainable = None + self.fraction_of_train = None + self.use_all_stations_on_all_data_sets = None + self.network = None + self.var_all_dict = None + self.all_stations = None + self.variables = None + self.dimensions = None + self.dim = None + self.target_dim = None + self.target_var = None + self.setup_experiment(**kwargs) + + def _set_param(self, param, value, default=None): + if default is not None: + value = value.get(param, default) + setattr(self, param, value) + logging.info(f"set experiment attribute: {param}={value}") + + def setup_experiment(self, **kwargs): + + # set data path of this experiment + self._set_param("data_path", helpers.prepare_host()) + + # set experiment name + exp_date = args.experiment_date + exp_name, exp_path = helpers.set_experiment_name(experiment_date=exp_date) + self._set_param("experiment_name", exp_name) + self._set_param("experiment_path", exp_path) + helpers.check_path_and_create(self.experiment_path) + + # set if model is trainable + self._set_param("trainable", kwargs, default=True) + + # set fraction of train + self._set_param("fraction_of_train", kwargs, default=0.8) + + # use all stations on all data sets (train, val, test) + self._set_param("use_all_stations_on_all_data_sets", kwargs, default=True) + self._set_param("network", kwargs, default="AIRBASE") + self._set_param("var_all_dict", kwargs, default={'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum', + 'u': 'average_values', 'v': 'average_values', 'no': 'dma8eu', + 'no2': 'dma8eu', 'cloudcover': 'average_values', + 'pblheight': 'maximum'}) + self._set_param("all_stations", kwargs, default=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', + 'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', 'DEBY020', + 'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073', + 'DEBY039', 'DEBW038', 'DEBW081', 'DEBY075', 'DEBW040', + 'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042', + 'DEBW039', 'DEBY001', 'DEBY113', 'DEBY089', 'DEBW024', + 'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068', + 'DEBW010', 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084', + 'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063', + 'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034', + 'DEBY088', ]) + self._set_param("variables", kwargs, default=list(self.var_all_dict.keys())) + self._set_param("dimensions", kwargs, default={'new_index': ['datetime', 'Stations']}) + self._set_param("dim", kwargs, default='datetime') + self._set_param("target_dim", kwargs, default='variables') + self._set_param("target_var", kwargs, default="o3") + + +class PreProcessing(run): + + def __init__(self, setup): + super().__init__() + self.setup = setup + + +class Training(run): + + def __init__(self, setup): + super().__init__() + self.setup = setup + + +class PostProcessing(run): + + def __init__(self, setup): + super().__init__() + self.setup = setup + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument('--experiment_date', metavar='--exp_date', type=str, nargs=1, default=None, + help="set experiment date as string") + args = parser.parse_args() + + with run(): + exp_setup = ExperimentSetup(trainable=True) + + PreProcessing(exp_setup) + + Training(exp_setup) + + PostProcessing(exp_setup) diff --git a/src/helpers.py b/src/helpers.py index 3c422f1baec528fa0f62d8f290978990bdc471bd..4fd5cc2b965d38a090a53dfc63e6f2d7aa582c3f 100644 --- a/src/helpers.py +++ b/src/helpers.py @@ -9,6 +9,9 @@ import math from typing import Union import numpy as np import os +import time +import socket +import sys def to_list(arg): @@ -84,3 +87,85 @@ class LearningRateDecay(keras.callbacks.History): self.lr['lr'].append(current_lr) logging.info(f"Set learning rate to {current_lr}") return K.get_value(self.model.optimizer.lr) + + +class TimeTracking(object): + """ + Track time to measure execution time. Time tracking automatically starts on initialisation and ends by calling stop + method. Duration can always be shown by printing the time tracking object or calling get_current_duration. + """ + + def __init__(self, start=True): + self.start = None + self.end = None + if start: + self._start() + + def _start(self): + self.start = time.time() + self.end = None + + def _end(self): + self.end = time.time() + + def _duration(self): + if self.end: + return self.end - self.start + else: + return time.time() - self.start + + def __repr__(self): + return f"{round(self._duration(), 2)}s" + + def run(self): + self._start() + + def stop(self, get_duration=False): + if self.end is None: + self._end() + else: + msg = f"Time was already stopped {time.time() - self.end}s ago." + logging.error(msg) + raise AssertionError(msg) + if get_duration: + return self.duration() + + def duration(self): + return self._duration() + + +def prepare_host(): + hostname = socket.gethostname() + user = os.getlogin() + if hostname == 'ZAM144': + path = f'/home/{user}/Data/toar_daily/' + elif hostname == 'zam347': + path = f'/home/{user}/Data/toar_daily/' + elif hostname == 'linux-gzsx': + path = f'/home/{user}/machinelearningtools' + elif (len(hostname) > 2) and (hostname[:2] == 'jr'): + path = f'/p/project/cjjsc42/{user}/DATA/toar_daily/' + elif (len(hostname) > 2) and (hostname[:2] == 'jw'): + path = f'/p/home/jusers/{user}/juwels/intelliaq/DATA/toar_daily/' + else: + logging.error(f"unknown host '{hostname}'") + raise OSError(f"unknown host '{hostname}'") + if not os.path.exists(path): + logging.error(f"path '{path}' does not exist for host '{hostname}'.") + raise NotADirectoryError(f"path '{path}' does not exist for host '{hostname}'.") + else: + logging.info(f"set path to: {path}") + return path + + +def set_experiment_name(experiment_date=None, experiment_path=None): + + if experiment_date is None: + experiment_name = "TestExperiment" + else: + experiment_name = f"{experiment_date}_network/" + if experiment_path is None: + experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", experiment_name)) + else: + experiment_path = os.path.abspath(experiment_path) + return experiment_name, experiment_path diff --git a/test/test_helpers.py b/test/test_helpers.py index ffda6ac47e21b212d3a818d050783cffba96eb03..742082e57e4e7374b2d35fab43a66376a90c0442 100644 --- a/test/test_helpers.py +++ b/test/test_helpers.py @@ -1,9 +1,10 @@ import pytest -from src.helpers import to_list, check_path_and_create, l_p_loss, LearningRateDecay +from src.helpers import * import logging import os import keras import numpy as np +import mock class TestToList: @@ -78,3 +79,111 @@ class TestLearningRateDecay: model.compile(optimizer=keras.optimizers.Adam(), loss=l_p_loss(2)) model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=5, callbacks=[lr_decay]) assert lr_decay.lr['lr'] == [0.02, 0.02, 0.02*0.95, 0.02*0.95, 0.02*0.95*0.95] + + +class TestTimeTracking: + + def test_init(self): + t = TimeTracking() + assert t.start is not None + assert t.start < time.time() + assert t.end is None + t2 = TimeTracking(start=False) + assert t2.start is None + + def test__start(self): + t = TimeTracking(start=False) + t._start() + assert t.start < time.time() + + def test__end(self): + t = TimeTracking() + t._end() + assert t.end > t.start + + def test__duration(self): + t = TimeTracking() + d1 = t._duration() + assert d1 > 0 + d2 = t._duration() + assert d2 > d1 + t._end() + d3 = t._duration() + assert d3 > d2 + assert d3 == t._duration() + + def test_repr(self): + t = TimeTracking() + t._end() + duration = t._duration() + assert t.__repr__().rstrip() == f"{round(duration, 2)}s".rstrip() + + def test_run(self): + t = TimeTracking(start=False) + assert t.start is None + t.run() + assert t.start is not None + + def test_stop(self): + t = TimeTracking() + assert t.end is None + duration = t.stop(get_duration=True) + assert duration == t._duration() + with pytest.raises(AssertionError) as e: + t.stop() + assert "Time was already stopped" in e.value.args[0] + t.run() + assert t.end is None + assert t.stop() is None + assert t.end is not None + + def test_duration(self): + t = TimeTracking() + duration = t + assert duration is not None + duration = t.stop(get_duration=True) + assert duration == t.duration() + + +class TestPrepareHost: + + @mock.patch("socket.gethostname", side_effect=["linux-gzsx", "ZAM144", "zam347", "jrtest", "jwtest"]) + @mock.patch("os.getlogin", return_value="testUser") + @mock.patch("os.path.exists", return_value=True) + def test_prepare_host(self, mock_host, mock_user, mock_path): + path = prepare_host() + assert path == "/home/testUser/machinelearningtools" + path = prepare_host() + assert path == "/home/testUser/Data/toar_daily/" + path = prepare_host() + assert path == "/home/testUser/Data/toar_daily/" + path = prepare_host() + assert path == "/p/project/cjjsc42/testUser/DATA/toar_daily/" + path = prepare_host() + assert path == "/p/home/jusers/testUser/juwels/intelliaq/DATA/toar_daily/" + + @mock.patch("socket.gethostname", return_value="NotExistingHostName") + @mock.patch("os.getlogin", return_value="zombie21") + def test_error_handling(self, mock_user, mock_host): + with pytest.raises(OSError) as e: + prepare_host() + assert "unknown host 'NotExistingHostName'" in e.value.args[0] + mock_host.return_value = "linux-gzsx" + with pytest.raises(NotADirectoryError) as e: + prepare_host() + assert "path '/home/zombie21/machinelearningtools' does not exist for host 'linux-gzsx'" in e.value.args[0] + + +class TestSetExperimentName: + + def test_set_experiment(self): + exp_name, exp_path = set_experiment_name() + assert exp_name == "" + assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "")) + exp_name, exp_path = set_experiment_name(experiment_date="2019-11-14", experiment_path="./test2") + assert exp_name == "2019-11-14_network/" + assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test2")) + + def test_set_experiment_from_sys(self): + exp_name, _ = set_experiment_name(experiment_date="2019-11-14") + assert exp_name == "2019-11-14_network/"