From a6121ba2599cffeca4804151ad96d8cba91edfe9 Mon Sep 17 00:00:00 2001
From: Michael <m.langguth@fz-juelich.de>
Date: Wed, 3 Feb 2021 15:52:26 +0100
Subject: [PATCH] Add save_gpus_info to set-up mof model.

---
 video_prediction_tools/main_scripts/main_train_models.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/video_prediction_tools/main_scripts/main_train_models.py b/video_prediction_tools/main_scripts/main_train_models.py
index 0b1abfa4..06a7efc9 100644
--- a/video_prediction_tools/main_scripts/main_train_models.py
+++ b/video_prediction_tools/main_scripts/main_train_models.py
@@ -73,6 +73,7 @@ class TrainModel(object):
         self.create_saver_and_writer()
         self.setup_gpu_config()
         self.calculate_samples_and_epochs()
+        self.save_gpus_info()
         print("setup done")
 
     def set_seed(self):
@@ -285,9 +286,9 @@ class TrainModel(object):
         #print("hvd_size:",hvd.size())
         #print("hvd_local_rank:",hvd.local_rank())
         # also track computing node
-        cnode_file = os.path.join(self.output_dir, "GPU_worker{0}.json".format(str(hvd.local_rank())))
+        cnode_file = os.path.join(self.output_dir, "GPU_worker{0}.json".format(str(hvd.rank())))
         with open(cnode_file, "w") as fjs:
-            json.dump({"worker{0}".format(str(hvd.local_rank())): host}, fjs)
+            json.dump({"worker{0}".format(str(hvd.rank())): host}, fjs)
 
     def save_timing_to_pkl(self, training_time, time_per_iteration):
         """
-- 
GitLab