From 0081d30890d7ad702dddbd91778e624060ee1cfe Mon Sep 17 00:00:00 2001 From: Felix Kleinert <f.kleinert@fz-juelich.de> Date: Mon, 11 May 2020 08:52:38 +0200 Subject: [PATCH] update PartitionCheck to throw OS Error when executed on login nodes on HDFML --- src/run_modules/experiment_setup.py | 4 ++-- src/run_modules/partition_check.py | 6 +++++- test/test_modules/test_partition_check.py | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py index 267b6c66..b04c0e2a 100644 --- a/src/run_modules/experiment_setup.py +++ b/src/run_modules/experiment_setup.py @@ -25,8 +25,8 @@ DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "est DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries", "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "PlotConditionalQuantiles", "PlotAvailability"] -DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] -DEFAULT_HPC_HOST_LIST = ["jw", "jr", "hdfmlc"] # first part of node names for Juwels (jw[comp], ju[login]) and Jureca(jr). +DEFAULT_HPC_LOGIN_LIST = ["ju", "hdfmll"] # ju[wels} #hdfmll(ogin) +DEFAULT_HPC_HOST_LIST = ["jw", "hdfmlc"] # first part of node names for Juwels (jw[comp], hdfmlc(ompute). class ExperimentSetup(RunEnvironment): diff --git a/src/run_modules/partition_check.py b/src/run_modules/partition_check.py index ee4edf05..c5f2375a 100644 --- a/src/run_modules/partition_check.py +++ b/src/run_modules/partition_check.py @@ -8,6 +8,9 @@ class PartitionCheck(RunEnvironment): """ Checking if running on a HPC login node. The only reason to run on login nodes is to download data. Training and validation should happen on compute nodes + + Note: This Method is highly customised to the HCP-systems in Juelich (FZJ, JSC). When using an other HPC system, + make sure to double check the indexing of `self.data_store.get('hostname')'. """ def __init__(self): @@ -17,7 +20,8 @@ class PartitionCheck(RunEnvironment): self._run() def _run(self): - if self.data_store.get('hostname')[:2] in self.data_store.get('login_nodes'): + if (self.data_store.get('hostname')[:2] in self.data_store.get('login_nodes')) or ( + self.data_store.get('hostname')[:6] in self.data_store.get('login_nodes')): raise OSError( 'You are on a login node to download data. Use compute nodes and run again if ' 'you want to train and validate a model.') diff --git a/test/test_modules/test_partition_check.py b/test/test_modules/test_partition_check.py index 404c8bcb..6966b2aa 100644 --- a/test/test_modules/test_partition_check.py +++ b/test/test_modules/test_partition_check.py @@ -33,7 +33,7 @@ class TestPartitionCheck: RunEnvironment().__del__() @pytest.fixture - @mock.patch("src.helpers.get_host", return_value="jwtest") + @mock.patch("src.helpers.get_host", return_value="hdfmlc01") @mock.patch("getpass.getuser", return_value="testUser") @mock.patch("os.path.exists", return_value=False) @mock.patch("os.makedirs", side_effect=None) @@ -67,7 +67,7 @@ class TestPartitionCheck: "validate a model." == \ e.value.args[0] - @mock.patch("src.helpers.get_host", return_value="jwtest") + @mock.patch("src.helpers.get_host", return_value="hdfmlc01") @mock.patch("getpass.getuser", return_value="testUser") @mock.patch("os.path.exists", return_value=False) @mock.patch("os.makedirs", side_effect=None) -- GitLab