From 0d7d9fae9e1e4290daae217a6dc90f2bc89ae8ae Mon Sep 17 00:00:00 2001 From: janEbert <janpublicebert@posteo.net> Date: Fri, 5 Jul 2024 11:50:28 +0200 Subject: [PATCH] Fix environment with force-purged modules Also specify `torchvision` version. It's not perfect since it's still prone to breakage, but at least we forbid known not working versions. Thanks to @khalfaoui1 for reporting it. --- pytorch-ddp-example/requirements.txt | 2 +- pytorch-ddp-example/run.sbatch | 3 ++- pytorch-fsdp-example/requirements.txt | 2 +- pytorch-fsdp-example/run.sbatch | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pytorch-ddp-example/requirements.txt b/pytorch-ddp-example/requirements.txt index fae5096..d506f2e 100644 --- a/pytorch-ddp-example/requirements.txt +++ b/pytorch-ddp-example/requirements.txt @@ -1,3 +1,3 @@ torch>=1.9,<3 torchrun_jsc>=0.0.4 -torchvision +torchvision>=0.13 diff --git a/pytorch-ddp-example/run.sbatch b/pytorch-ddp-example/run.sbatch index 6c8ef7a..68c3165 100644 --- a/pytorch-ddp-example/run.sbatch +++ b/pytorch-ddp-example/run.sbatch @@ -20,7 +20,8 @@ export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK" [ -x "$(command -v deactivate)" ] && deactivate -module purge +module --force purge +module load Stages source "$curr_dir"/modules.sh if ! [ -d "$curr_dir"/env ]; then diff --git a/pytorch-fsdp-example/requirements.txt b/pytorch-fsdp-example/requirements.txt index 5283f03..a680418 100644 --- a/pytorch-fsdp-example/requirements.txt +++ b/pytorch-fsdp-example/requirements.txt @@ -1,3 +1,3 @@ torch>=2,<3 torchrun_jsc>=0.0.4 -torchvision +torchvision>=0.13 diff --git a/pytorch-fsdp-example/run.sbatch b/pytorch-fsdp-example/run.sbatch index 233d8b7..4af8053 100644 --- a/pytorch-fsdp-example/run.sbatch +++ b/pytorch-fsdp-example/run.sbatch @@ -20,7 +20,8 @@ export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK" [ -x "$(command -v deactivate)" ] && deactivate -module purge +module --force purge +module load Stages source "$curr_dir"/modules.sh if ! [ -d "$curr_dir"/env ]; then -- GitLab