From 78951613c39b06c62b35f5292d5ad2c22280ff14 Mon Sep 17 00:00:00 2001 From: janEbert <janpublicebert@posteo.net> Date: Fri, 5 Jul 2024 12:05:27 +0200 Subject: [PATCH] Refactor `venv` activation Should make it more obvious to users what's going on and also make it easier for them to use their environment interactively. --- pytorch-ddp-example/activate.sh | 21 +++++++++++++++++++++ pytorch-ddp-example/modules.sh | 1 + pytorch-ddp-example/run.sbatch | 14 +------------- pytorch-fsdp-example/activate.sh | 21 +++++++++++++++++++++ pytorch-fsdp-example/modules.sh | 1 + pytorch-fsdp-example/run.sbatch | 14 +------------- 6 files changed, 46 insertions(+), 26 deletions(-) create mode 100644 pytorch-ddp-example/activate.sh create mode 100644 pytorch-fsdp-example/activate.sh diff --git a/pytorch-ddp-example/activate.sh b/pytorch-ddp-example/activate.sh new file mode 100644 index 0000000..4083f0a --- /dev/null +++ b/pytorch-ddp-example/activate.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +if [ -z "$curr_dir" ]; then + curr_file="${BASH_SOURCE[0]:-${(%):-%x}}" + curr_dir="$(dirname "$curr_file")" +fi + +venv_dir="$curr_dir"/env + +[ -x "$(command -v deactivate)" ] && deactivate + +module --force purge +source "$curr_dir"/modules.sh + +if ! [ -d "$venv_dir" ]; then + echo "Cannot set up \`venv\` on JUWELS Booster compute node." \ + "Please manually execute \`bash set_up.sh\` on a login node." + exit 1 +fi + +source "$venv_dir"/bin/activate diff --git a/pytorch-ddp-example/modules.sh b/pytorch-ddp-example/modules.sh index d690e3e..f99ea5f 100644 --- a/pytorch-ddp-example/modules.sh +++ b/pytorch-ddp-example/modules.sh @@ -1,3 +1,4 @@ #!/usr/bin/env sh +module load Stages module load GCC OpenMPI PyTorch torchvision diff --git a/pytorch-ddp-example/run.sbatch b/pytorch-ddp-example/run.sbatch index 68c3165..27c4e4c 100644 --- a/pytorch-ddp-example/run.sbatch +++ b/pytorch-ddp-example/run.sbatch @@ -18,19 +18,7 @@ curr_dir="$(dirname "$curr_file")" # Propagate the specified number of CPUs per task to each `srun`. export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK" -[ -x "$(command -v deactivate)" ] && deactivate - -module --force purge -module load Stages -source "$curr_dir"/modules.sh - -if ! [ -d "$curr_dir"/env ]; then - echo "Cannot set up \`venv\` on JUWELS Booster compute node." \ - "Please manually execute \`bash set_up.sh\` on a login node." - exit 1 -fi - -source "$curr_dir"/env/bin/activate +source "$curr_dir"/activate.sh export MASTER_ADDR="$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)" if [ "$SYSTEMNAME" = juwelsbooster ] \ diff --git a/pytorch-fsdp-example/activate.sh b/pytorch-fsdp-example/activate.sh new file mode 100644 index 0000000..4083f0a --- /dev/null +++ b/pytorch-fsdp-example/activate.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +if [ -z "$curr_dir" ]; then + curr_file="${BASH_SOURCE[0]:-${(%):-%x}}" + curr_dir="$(dirname "$curr_file")" +fi + +venv_dir="$curr_dir"/env + +[ -x "$(command -v deactivate)" ] && deactivate + +module --force purge +source "$curr_dir"/modules.sh + +if ! [ -d "$venv_dir" ]; then + echo "Cannot set up \`venv\` on JUWELS Booster compute node." \ + "Please manually execute \`bash set_up.sh\` on a login node." + exit 1 +fi + +source "$venv_dir"/bin/activate diff --git a/pytorch-fsdp-example/modules.sh b/pytorch-fsdp-example/modules.sh index d690e3e..f99ea5f 100644 --- a/pytorch-fsdp-example/modules.sh +++ b/pytorch-fsdp-example/modules.sh @@ -1,3 +1,4 @@ #!/usr/bin/env sh +module load Stages module load GCC OpenMPI PyTorch torchvision diff --git a/pytorch-fsdp-example/run.sbatch b/pytorch-fsdp-example/run.sbatch index 4af8053..4b404e4 100644 --- a/pytorch-fsdp-example/run.sbatch +++ b/pytorch-fsdp-example/run.sbatch @@ -18,19 +18,7 @@ curr_dir="$(dirname "$curr_file")" # Propagate the specified number of CPUs per task to each `srun`. export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK" -[ -x "$(command -v deactivate)" ] && deactivate - -module --force purge -module load Stages -source "$curr_dir"/modules.sh - -if ! [ -d "$curr_dir"/env ]; then - echo "Cannot set up \`venv\` on JUWELS Booster compute node." \ - "Please manually execute \`bash set_up.sh\` on a login node." - exit 1 -fi - -source "$curr_dir"/env/bin/activate +source "$curr_dir"/activate.sh export MASTER_ADDR="$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)" if [ "$SYSTEMNAME" = juwelsbooster ] \ -- GitLab