From 03f191dc39c640d66772808101d8c1e90c656292 Mon Sep 17 00:00:00 2001
From: Felix Kleinert <f.kleinert@fz-juelich.de>
Date: Tue, 7 Apr 2020 16:56:27 +0200
Subject: [PATCH] introduce partition check to raise an OSerror on login nodes

---
 run.py                              |  8 +++++---
 setup_venv.sh                       |  2 ++
 src/run_modules/experiment_setup.py | 23 ++++++++++++++++++++---
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/run.py b/run.py
index 98097128..0efb0e4d 100644
--- a/run.py
+++ b/run.py
@@ -4,7 +4,7 @@ __date__ = '2019-11-14'
 
 import argparse
 
-from src.run_modules.experiment_setup import ExperimentSetup
+from src.run_modules.experiment_setup import ExperimentSetup, PartitionCheck
 from src.run_modules.model_setup import ModelSetup
 from src.run_modules.post_processing import PostProcessing
 from src.run_modules.pre_processing import PreProcessing
@@ -16,10 +16,12 @@ def main(parser_args):
 
     with RunEnvironment():
         ExperimentSetup(parser_args, stations=['DEBW107', 'DEBY081', 'DEBW013', 'DEBW076', 'DEBW087', 'DEBW001'],
-                        station_type='background', trainable=False, create_new_model=False, window_history_size=6,
-                        create_new_bootstraps=True)
+                        station_type='background', trainable=False, create_new_model=True, window_history_size=6,
+                        create_new_bootstraps=False)
         PreProcessing()
 
+        PartitionCheck()
+        
         ModelSetup()
 
         Training()
diff --git a/setup_venv.sh b/setup_venv.sh
index 21733f38..960ee0e0 100755
--- a/setup_venv.sh
+++ b/setup_venv.sh
@@ -25,4 +25,6 @@ pip install --ignore-installed matplotlib==3.2.0
 # export PYTHONPATH=${PWD}/venv/lib/python3.6/site-packages:${PYTHONPATH}
 # srun python run.py
 
+# create batch run scripts
+source create_runscripts_HPC.sh
 
diff --git a/src/run_modules/experiment_setup.py b/src/run_modules/experiment_setup.py
index 6e3b69c0..45778074 100644
--- a/src/run_modules/experiment_setup.py
+++ b/src/run_modules/experiment_setup.py
@@ -24,7 +24,8 @@ DEFAULT_VAR_ALL_DICT = {'o3': 'dma8eu', 'relhum': 'average_values', 'temp': 'max
 DEFAULT_TRANSFORMATION = {"scope": "data", "method": "standardise", "mean": "estimate"}
 DEFAULT_PLOT_LIST = ["PlotMonthlySummary", "PlotStationMap", "PlotClimatologicalSkillScore", "PlotTimeSeries",
                      "PlotCompetitiveSkillScore", "PlotBootstrapSkillScore", "plot_conditional_quantiles"]
-DEFAULT_HPC_HOST_LIST = ["jw", "ju", "jr"] #first part of node names for Juwels (jw[comp], ju[login]) and Jureca(jr).
+DEFAULT_HPC_LOGIN_LIST = ["ju"] 
+DEFAULT_HPC_HOST_LIST = ["jw", "jr"] #first part of node names for Juwels (jw[comp], ju[login]) and Jureca(jr).
 
 
 class ExperimentSetup(RunEnvironment):
@@ -42,7 +43,7 @@ class ExperimentSetup(RunEnvironment):
                  create_new_model=None, bootstrap_path=None, permute_data_on_training=False, transformation=None,
                  train_min_length=None, val_min_length=None, test_min_length=None, extreme_values=None,
                  extremes_on_right_tail_only=None, evaluate_bootstraps=True, plot_list=None, number_of_bootstraps=None,
-                 create_new_bootstraps=None, data_path=None):
+                 create_new_bootstraps=None, data_path=None, login_nodes=None, hpc_hosts=None):
 
         # create run framework
         super().__init__()
@@ -51,7 +52,8 @@ class ExperimentSetup(RunEnvironment):
         self._set_param("data_path", data_path, default=helpers.prepare_host(sampling=sampling))
         self._set_param("hostname", helpers.get_host())
         # self._set_param("hostname", "jwc0123")
-        self._set_param("hpc_hosts", DEFAULT_HPC_HOST_LIST)
+        self._set_param("hpc_hosts", hpc_hosts, default=DEFAULT_HPC_HOST_LIST + DEFAULT_HPC_LOGIN_LIST)
+        self._set_param("login_nodes", login_nodes, default=DEFAULT_HPC_LOGIN_LIST)
         self._set_param("create_new_model", create_new_model, default=True)
         if self.data_store.get("create_new_model"):
             trainable = True
@@ -185,6 +187,21 @@ class ExperimentSetup(RunEnvironment):
             self._set_param("statistics_per_var", stat_new)
 
 
+class PartitionCheck(RunEnvironment):
+
+    """ Checking if running on a HPC login node. The onÃ¶y reason to run on login nodes is to download data. Training and validation should happen on compute nodes"""
+
+    def __init__(self):
+        # create run framework
+        super().__init__()
+
+        self._run()
+
+    def _run(self):
+        if self.data_store.get('hostname')[:2] in self.data_store.get('login_nodes'):
+            raise OSError('You are on a login node to download data. Use compute nodes and run again if you want to train and validate a model.')
+
+
 if __name__ == "__main__":
 
     formatter = '%(asctime)s - %(levelname)s: %(message)s  [%(filename)s:%(funcName)s:%(lineno)s]'
-- 
GitLab