Skip to content
Snippets Groups Projects
Commit d60bc6ed authored by Alexandre Strube's avatar Alexandre Strube
Browse files

trying stuff

parent 7de7e947
Branches
No related tags found
No related merge requests found
Pipeline #140694 passed
from fastai.vision.all import * from fastai.vision.all import *
from fastai.distributed import * from fastai.distributed import *
from fastai.vision.models.xresnet import * from fastai.vision.models.xresnet import *
from accelerate import Accelerator
# Print status information about the distributed environment
accelerator = Accelerator()
print(accelerator.state)
path = rank0_first(untar_data, URLs.IMAGEWOOF_320) path = rank0_first(untar_data, URLs.IMAGEWOOF_320)
dls = DataBlock( dls = DataBlock(
......
...@@ -10,6 +10,20 @@ ...@@ -10,6 +10,20 @@
#SBATCH --partition=develbooster #SBATCH --partition=develbooster
#SBATCH --gres=gpu:4 #SBATCH --gres=gpu:4
# srun doesnot inherit cpus-per-task from sbatch
export SRUN_CPUS_PER_TASK=${SLURM_CPUS_PER_TASK}
# so processes know who to talk to
MASTER_ADDR="$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)"
# Allow communication over InfiniBand cells.
MASTER_ADDR="${MASTER_ADDR}i"
# Get IP for hostname.
MASTER_ADDR="$(nslookup "$MASTER_ADDR" | grep -oP '(?<=Address: ).*')"
MASTER_PORT=6000
GPUS_PER_NODE=4
NNODES=$SLURM_JOB_NUM_NODES
# Make sure we are on the right directory # Make sure we are on the right directory
cd $HOME/2023-may-intro-to-supercompting-jsc/src cd $HOME/2023-may-intro-to-supercompting-jsc/src
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#SBATCH --output=output.%j #SBATCH --output=output.%j
#SBATCH --error=err.%j #SBATCH --error=err.%j
#SBATCH --time=00:40:00 #SBATCH --time=00:40:00
#SBATCH --partition=booster #SBATCH --partition=develbooster
#SBATCH --gres=gpu:1 #SBATCH --gres=gpu:1
# Make sure we are on the right directory # Make sure we are on the right directory
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment