Skip to content
Snippets Groups Projects
Commit 3d9d6890 authored by Alexandre Strube's avatar Alexandre Strube
Browse files

finally

parent d60bc6ed
No related branches found
No related tags found
No related merge requests found
Pipeline #140695 passed
from fastai.vision.all import * from fastai.vision.all import *
from fastai.distributed import * from fastai.distributed import *
from fastai.vision.models.xresnet import * from fastai.vision.models.xresnet import *
from accelerate import Accelerator
# Print status information about the distributed environment
accelerator = Accelerator()
print(accelerator.state)
path = rank0_first(untar_data, URLs.IMAGEWOOF_320) path = rank0_first(untar_data, URLs.IMAGEWOOF_320)
dls = DataBlock( dls = DataBlock(
......
...@@ -3,9 +3,9 @@ ...@@ -3,9 +3,9 @@
#SBATCH --nodes=1 #SBATCH --nodes=1
#SBATCH --job-name=ai-multi-gpu #SBATCH --job-name=ai-multi-gpu
#SBATCH --ntasks-per-node=1 #SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=4 #SBATCH --cpus-per-task=48
#SBATCH --output=output.%j #SBATCH --output=out-distrib.%j
#SBATCH --error=err.%j #SBATCH --error=err-distrib.%j
#SBATCH --time=00:20:00 #SBATCH --time=00:20:00
#SBATCH --partition=develbooster #SBATCH --partition=develbooster
#SBATCH --gres=gpu:4 #SBATCH --gres=gpu:4
...@@ -17,13 +17,10 @@ MASTER_ADDR="$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)" ...@@ -17,13 +17,10 @@ MASTER_ADDR="$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)"
# Allow communication over InfiniBand cells. # Allow communication over InfiniBand cells.
MASTER_ADDR="${MASTER_ADDR}i" MASTER_ADDR="${MASTER_ADDR}i"
# Get IP for hostname. # Get IP for hostname.
MASTER_ADDR="$(nslookup "$MASTER_ADDR" | grep -oP '(?<=Address: ).*')"
MASTER_PORT=6000 MASTER_PORT=6000
GPUS_PER_NODE=4 GPUS_PER_NODE=4
NNODES=$SLURM_JOB_NUM_NODES NNODES=$SLURM_JOB_NUM_NODES
# Make sure we are on the right directory # Make sure we are on the right directory
cd $HOME/2023-may-intro-to-supercompting-jsc/src cd $HOME/2023-may-intro-to-supercompting-jsc/src
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
#SBATCH --job-name=ai-serial #SBATCH --job-name=ai-serial
#SBATCH --ntasks-per-node=1 #SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1 #SBATCH --cpus-per-task=1
#SBATCH --output=output.%j #SBATCH --output=out-serial.%j
#SBATCH --error=err.%j #SBATCH --error=err-serial.%j
#SBATCH --time=00:40:00 #SBATCH --time=00:40:00
#SBATCH --partition=develbooster #SBATCH --partition=develbooster
#SBATCH --gres=gpu:1 #SBATCH --gres=gpu:1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment