From 2eb7665960bb939fca63753ea796ec8cd5ae8b7c Mon Sep 17 00:00:00 2001 From: janEbert <janpublicebert@posteo.net> Date: Tue, 9 Jul 2024 10:04:50 +0200 Subject: [PATCH] Integrate script updates into README --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9205aef..2073c67 100644 --- a/README.md +++ b/README.md @@ -262,9 +262,10 @@ use Instead of ```shell -srun python -u -m torchrun_jsc \ - --nproc_per_node="$DEVICES_PER_NODE" \ +srun env -u CUDA_VISIBLE_DEVICES python -u -m torchrun_jsc \ + --nproc_per_node=gpu \ --nnodes="$SLURM_JOB_NUM_NODES" \ + --rdzv_id="$SLURM_JOB_ID" \ --rdzv_endpoint="$MASTER_ADDR":"$MASTER_PORT" \ --rdzv_backend=c10d \ "$curr_dir"/main.py "$@" @@ -273,7 +274,7 @@ srun python -u -m torchrun_jsc \ use ```shell -srun python -u "$curr_dir"/main.py "$@" +srun env -u CUDA_VISIBLE_DEVICES python -u "$curr_dir"/main.py "$@" ``` Additionally, if using PyTorch Lightning, you may encounter issues @@ -301,6 +302,7 @@ def patch_lightning_slurm_master_addr(): 'juwelsbooster', 'juwels', 'jurecadc', + 'jusuf', ]: return -- GitLab