Commit a6121ba2 authored by Michael Langguth's avatar Michael Langguth
Browse files

Add save_gpus_info to set-up mof model.

parent aa6d9dd6
Pipeline #58798 passed with stages
in 2 minutes and 6 seconds
......@@ -73,6 +73,7 @@ class TrainModel(object):
self.create_saver_and_writer()
self.setup_gpu_config()
self.calculate_samples_and_epochs()
self.save_gpus_info()
print("setup done")
def set_seed(self):
......@@ -285,9 +286,9 @@ class TrainModel(object):
#print("hvd_size:",hvd.size())
#print("hvd_local_rank:",hvd.local_rank())
# also track computing node
cnode_file = os.path.join(self.output_dir, "GPU_worker{0}.json".format(str(hvd.local_rank())))
cnode_file = os.path.join(self.output_dir, "GPU_worker{0}.json".format(str(hvd.rank())))
with open(cnode_file, "w") as fjs:
json.dump({"worker{0}".format(str(hvd.local_rank())): host}, fjs)
json.dump({"worker{0}".format(str(hvd.rank())): host}, fjs)
def save_timing_to_pkl(self, training_time, time_per_iteration):
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment