From a6121ba2599cffeca4804151ad96d8cba91edfe9 Mon Sep 17 00:00:00 2001 From: Michael <m.langguth@fz-juelich.de> Date: Wed, 3 Feb 2021 15:52:26 +0100 Subject: [PATCH] Add save_gpus_info to set-up mof model. --- video_prediction_tools/main_scripts/main_train_models.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/video_prediction_tools/main_scripts/main_train_models.py b/video_prediction_tools/main_scripts/main_train_models.py index 0b1abfa4..06a7efc9 100644 --- a/video_prediction_tools/main_scripts/main_train_models.py +++ b/video_prediction_tools/main_scripts/main_train_models.py @@ -73,6 +73,7 @@ class TrainModel(object): self.create_saver_and_writer() self.setup_gpu_config() self.calculate_samples_and_epochs() + self.save_gpus_info() print("setup done") def set_seed(self): @@ -285,9 +286,9 @@ class TrainModel(object): #print("hvd_size:",hvd.size()) #print("hvd_local_rank:",hvd.local_rank()) # also track computing node - cnode_file = os.path.join(self.output_dir, "GPU_worker{0}.json".format(str(hvd.local_rank()))) + cnode_file = os.path.join(self.output_dir, "GPU_worker{0}.json".format(str(hvd.rank()))) with open(cnode_file, "w") as fjs: - json.dump({"worker{0}".format(str(hvd.local_rank())): host}, fjs) + json.dump({"worker{0}".format(str(hvd.rank())): host}, fjs) def save_timing_to_pkl(self, training_time, time_per_iteration): """ -- GitLab