From 0d7d9fae9e1e4290daae217a6dc90f2bc89ae8ae Mon Sep 17 00:00:00 2001
From: janEbert <janpublicebert@posteo.net>
Date: Fri, 5 Jul 2024 11:50:28 +0200
Subject: [PATCH] Fix environment with force-purged modules

Also specify `torchvision` version. It's not perfect since it's still
prone to breakage, but at least we forbid known not working versions.

Thanks to @khalfaoui1 for reporting it.
---
 pytorch-ddp-example/requirements.txt  | 2 +-
 pytorch-ddp-example/run.sbatch        | 3 ++-
 pytorch-fsdp-example/requirements.txt | 2 +-
 pytorch-fsdp-example/run.sbatch       | 3 ++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pytorch-ddp-example/requirements.txt b/pytorch-ddp-example/requirements.txt
index fae5096..d506f2e 100644
--- a/pytorch-ddp-example/requirements.txt
+++ b/pytorch-ddp-example/requirements.txt
@@ -1,3 +1,3 @@
 torch>=1.9,<3
 torchrun_jsc>=0.0.4
-torchvision
+torchvision>=0.13
diff --git a/pytorch-ddp-example/run.sbatch b/pytorch-ddp-example/run.sbatch
index 6c8ef7a..68c3165 100644
--- a/pytorch-ddp-example/run.sbatch
+++ b/pytorch-ddp-example/run.sbatch
@@ -20,7 +20,8 @@ export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK"
 
 [ -x "$(command -v deactivate)" ] && deactivate
 
-module purge
+module --force purge
+module load Stages
 source "$curr_dir"/modules.sh
 
 if ! [ -d "$curr_dir"/env ]; then
diff --git a/pytorch-fsdp-example/requirements.txt b/pytorch-fsdp-example/requirements.txt
index 5283f03..a680418 100644
--- a/pytorch-fsdp-example/requirements.txt
+++ b/pytorch-fsdp-example/requirements.txt
@@ -1,3 +1,3 @@
 torch>=2,<3
 torchrun_jsc>=0.0.4
-torchvision
+torchvision>=0.13
diff --git a/pytorch-fsdp-example/run.sbatch b/pytorch-fsdp-example/run.sbatch
index 233d8b7..4af8053 100644
--- a/pytorch-fsdp-example/run.sbatch
+++ b/pytorch-fsdp-example/run.sbatch
@@ -20,7 +20,8 @@ export SRUN_CPUS_PER_TASK="$SLURM_CPUS_PER_TASK"
 
 [ -x "$(command -v deactivate)" ] && deactivate
 
-module purge
+module --force purge
+module load Stages
 source "$curr_dir"/modules.sh
 
 if ! [ -d "$curr_dir"/env ]; then
-- 
GitLab