Skip to content
Snippets Groups Projects
Commit 3b54d872 authored by lukas leufen's avatar lukas leufen
Browse files

Experiment Setup finished

Closes #11, #12, and #22

See merge request toar/machinelearningtools!12
parents 7e9cd7eb ea26f48b
No related branches found
No related tags found
1 merge request!12Experiment Setup finished
Pipeline #25981 passed
...@@ -56,3 +56,4 @@ htmlcov/ ...@@ -56,3 +56,4 @@ htmlcov/
.pytest_cache .pytest_cache
/test/data/ /test/data/
report.html report.html
/TestExperiment/
...@@ -9,3 +9,4 @@ pytest-lazy-fixture==0.6.1 ...@@ -9,3 +9,4 @@ pytest-lazy-fixture==0.6.1
pytest-cov pytest-cov
pytest-html pytest-html
pydot pydot
mock
run.py 0 → 100644
__author__ = "Lukas Leufen"
__date__ = '2019-11-14'
import logging
from src.helpers import TimeTracking
from src import helpers
import argparse
import time
formatter = "%(asctime)s - %(levelname)s: %(message)s [%(filename)s:%(funcName)s:%(lineno)s]"
logging.basicConfig(level=logging.INFO, format=formatter)
class run(object):
"""
basic run class to measure execution time. Either call this class calling it by 'with' or delete the class instance
after finishing the measurement. The duration result is logged.
"""
def __init__(self):
self.time = TimeTracking()
logging.info(f"{self.__class__.__name__} started")
def __del__(self):
self.time.stop()
logging.info(f"{self.__class__.__name__} finished after {self.time}")
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
pass
def do_stuff(self):
time.sleep(2)
class ExperimentSetup:
"""
params:
trainable: Train new model if true, otherwise try to load existing model
"""
def __init__(self, **kwargs):
self.data_path = None
self.experiment_path = None
self.experiment_name = None
self.trainable = None
self.fraction_of_train = None
self.use_all_stations_on_all_data_sets = None
self.network = None
self.var_all_dict = None
self.all_stations = None
self.variables = None
self.dimensions = None
self.dim = None
self.target_dim = None
self.target_var = None
self.setup_experiment(**kwargs)
def _set_param(self, param, value, default=None):
if default is not None:
value = value.get(param, default)
setattr(self, param, value)
logging.info(f"set experiment attribute: {param}={value}")
def setup_experiment(self, **kwargs):
# set data path of this experiment
self._set_param("data_path", helpers.prepare_host())
# set experiment name
exp_date = args.experiment_date
exp_name, exp_path = helpers.set_experiment_name(experiment_date=exp_date)
self._set_param("experiment_name", exp_name)
self._set_param("experiment_path", exp_path)
helpers.check_path_and_create(self.experiment_path)
# set if model is trainable
self._set_param("trainable", kwargs, default=True)
# set fraction of train
self._set_param("fraction_of_train", kwargs, default=0.8)
# use all stations on all data sets (train, val, test)
self._set_param("use_all_stations_on_all_data_sets", kwargs, default=True)
self._set_param("network", kwargs, default="AIRBASE")
self._set_param("var_all_dict", kwargs, default={'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'maximum',
'u': 'average_values', 'v': 'average_values', 'no': 'dma8eu',
'no2': 'dma8eu', 'cloudcover': 'average_values',
'pblheight': 'maximum'})
self._set_param("all_stations", kwargs, default=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087',
'DEBY052', 'DEBY032', 'DEBW022', 'DEBY004', 'DEBY020',
'DEBW030', 'DEBW037', 'DEBW031', 'DEBW015', 'DEBW073',
'DEBY039', 'DEBW038', 'DEBW081', 'DEBY075', 'DEBW040',
'DEBY053', 'DEBW059', 'DEBW027', 'DEBY072', 'DEBW042',
'DEBW039', 'DEBY001', 'DEBY113', 'DEBY089', 'DEBW024',
'DEBW004', 'DEBY037', 'DEBW056', 'DEBW029', 'DEBY068',
'DEBW010', 'DEBW026', 'DEBY002', 'DEBY079', 'DEBW084',
'DEBY049', 'DEBY031', 'DEBW019', 'DEBW001', 'DEBY063',
'DEBY005', 'DEBW046', 'DEBW103', 'DEBW052', 'DEBW034',
'DEBY088', ])
self._set_param("variables", kwargs, default=list(self.var_all_dict.keys()))
self._set_param("dimensions", kwargs, default={'new_index': ['datetime', 'Stations']})
self._set_param("dim", kwargs, default='datetime')
self._set_param("target_dim", kwargs, default='variables')
self._set_param("target_var", kwargs, default="o3")
class PreProcessing(run):
def __init__(self, setup):
super().__init__()
self.setup = setup
class Training(run):
def __init__(self, setup):
super().__init__()
self.setup = setup
class PostProcessing(run):
def __init__(self, setup):
super().__init__()
self.setup = setup
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--experiment_date', metavar='--exp_date', type=str, nargs=1, default=None,
help="set experiment date as string")
args = parser.parse_args()
with run():
exp_setup = ExperimentSetup(trainable=True)
PreProcessing(exp_setup)
Training(exp_setup)
PostProcessing(exp_setup)
...@@ -9,6 +9,9 @@ import math ...@@ -9,6 +9,9 @@ import math
from typing import Union from typing import Union
import numpy as np import numpy as np
import os import os
import time
import socket
import sys
def to_list(arg): def to_list(arg):
...@@ -84,3 +87,85 @@ class LearningRateDecay(keras.callbacks.History): ...@@ -84,3 +87,85 @@ class LearningRateDecay(keras.callbacks.History):
self.lr['lr'].append(current_lr) self.lr['lr'].append(current_lr)
logging.info(f"Set learning rate to {current_lr}") logging.info(f"Set learning rate to {current_lr}")
return K.get_value(self.model.optimizer.lr) return K.get_value(self.model.optimizer.lr)
class TimeTracking(object):
"""
Track time to measure execution time. Time tracking automatically starts on initialisation and ends by calling stop
method. Duration can always be shown by printing the time tracking object or calling get_current_duration.
"""
def __init__(self, start=True):
self.start = None
self.end = None
if start:
self._start()
def _start(self):
self.start = time.time()
self.end = None
def _end(self):
self.end = time.time()
def _duration(self):
if self.end:
return self.end - self.start
else:
return time.time() - self.start
def __repr__(self):
return f"{round(self._duration(), 2)}s"
def run(self):
self._start()
def stop(self, get_duration=False):
if self.end is None:
self._end()
else:
msg = f"Time was already stopped {time.time() - self.end}s ago."
logging.error(msg)
raise AssertionError(msg)
if get_duration:
return self.duration()
def duration(self):
return self._duration()
def prepare_host():
hostname = socket.gethostname()
user = os.getlogin()
if hostname == 'ZAM144':
path = f'/home/{user}/Data/toar_daily/'
elif hostname == 'zam347':
path = f'/home/{user}/Data/toar_daily/'
elif hostname == 'linux-gzsx':
path = f'/home/{user}/machinelearningtools'
elif (len(hostname) > 2) and (hostname[:2] == 'jr'):
path = f'/p/project/cjjsc42/{user}/DATA/toar_daily/'
elif (len(hostname) > 2) and (hostname[:2] == 'jw'):
path = f'/p/home/jusers/{user}/juwels/intelliaq/DATA/toar_daily/'
else:
logging.error(f"unknown host '{hostname}'")
raise OSError(f"unknown host '{hostname}'")
if not os.path.exists(path):
logging.error(f"path '{path}' does not exist for host '{hostname}'.")
raise NotADirectoryError(f"path '{path}' does not exist for host '{hostname}'.")
else:
logging.info(f"set path to: {path}")
return path
def set_experiment_name(experiment_date=None, experiment_path=None):
if experiment_date is None:
experiment_name = "TestExperiment"
else:
experiment_name = f"{experiment_date}_network/"
if experiment_path is None:
experiment_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", experiment_name))
else:
experiment_path = os.path.abspath(experiment_path)
return experiment_name, experiment_path
import pytest import pytest
from src.helpers import to_list, check_path_and_create, l_p_loss, LearningRateDecay from src.helpers import *
import logging import logging
import os import os
import keras import keras
import numpy as np import numpy as np
import mock
class TestToList: class TestToList:
...@@ -78,3 +79,111 @@ class TestLearningRateDecay: ...@@ -78,3 +79,111 @@ class TestLearningRateDecay:
model.compile(optimizer=keras.optimizers.Adam(), loss=l_p_loss(2)) model.compile(optimizer=keras.optimizers.Adam(), loss=l_p_loss(2))
model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=5, callbacks=[lr_decay]) model.fit(np.array([1, 0, 2, 0.5]), np.array([1, 1, 0, 0.5]), epochs=5, callbacks=[lr_decay])
assert lr_decay.lr['lr'] == [0.02, 0.02, 0.02*0.95, 0.02*0.95, 0.02*0.95*0.95] assert lr_decay.lr['lr'] == [0.02, 0.02, 0.02*0.95, 0.02*0.95, 0.02*0.95*0.95]
class TestTimeTracking:
def test_init(self):
t = TimeTracking()
assert t.start is not None
assert t.start < time.time()
assert t.end is None
t2 = TimeTracking(start=False)
assert t2.start is None
def test__start(self):
t = TimeTracking(start=False)
t._start()
assert t.start < time.time()
def test__end(self):
t = TimeTracking()
t._end()
assert t.end > t.start
def test__duration(self):
t = TimeTracking()
d1 = t._duration()
assert d1 > 0
d2 = t._duration()
assert d2 > d1
t._end()
d3 = t._duration()
assert d3 > d2
assert d3 == t._duration()
def test_repr(self):
t = TimeTracking()
t._end()
duration = t._duration()
assert t.__repr__().rstrip() == f"{round(duration, 2)}s".rstrip()
def test_run(self):
t = TimeTracking(start=False)
assert t.start is None
t.run()
assert t.start is not None
def test_stop(self):
t = TimeTracking()
assert t.end is None
duration = t.stop(get_duration=True)
assert duration == t._duration()
with pytest.raises(AssertionError) as e:
t.stop()
assert "Time was already stopped" in e.value.args[0]
t.run()
assert t.end is None
assert t.stop() is None
assert t.end is not None
def test_duration(self):
t = TimeTracking()
duration = t
assert duration is not None
duration = t.stop(get_duration=True)
assert duration == t.duration()
class TestPrepareHost:
@mock.patch("socket.gethostname", side_effect=["linux-gzsx", "ZAM144", "zam347", "jrtest", "jwtest"])
@mock.patch("os.getlogin", return_value="testUser")
@mock.patch("os.path.exists", return_value=True)
def test_prepare_host(self, mock_host, mock_user, mock_path):
path = prepare_host()
assert path == "/home/testUser/machinelearningtools"
path = prepare_host()
assert path == "/home/testUser/Data/toar_daily/"
path = prepare_host()
assert path == "/home/testUser/Data/toar_daily/"
path = prepare_host()
assert path == "/p/project/cjjsc42/testUser/DATA/toar_daily/"
path = prepare_host()
assert path == "/p/home/jusers/testUser/juwels/intelliaq/DATA/toar_daily/"
@mock.patch("socket.gethostname", return_value="NotExistingHostName")
@mock.patch("os.getlogin", return_value="zombie21")
def test_error_handling(self, mock_user, mock_host):
with pytest.raises(OSError) as e:
prepare_host()
assert "unknown host 'NotExistingHostName'" in e.value.args[0]
mock_host.return_value = "linux-gzsx"
with pytest.raises(NotADirectoryError) as e:
prepare_host()
assert "path '/home/zombie21/machinelearningtools' does not exist for host 'linux-gzsx'" in e.value.args[0]
class TestSetExperimentName:
def test_set_experiment(self):
exp_name, exp_path = set_experiment_name()
assert exp_name == ""
assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ""))
exp_name, exp_path = set_experiment_name(experiment_date="2019-11-14", experiment_path="./test2")
assert exp_name == "2019-11-14_network/"
assert exp_path == os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "test2"))
def test_set_experiment_from_sys(self):
exp_name, _ = set_experiment_name(experiment_date="2019-11-14")
assert exp_name == "2019-11-14_network/"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment