Merge branch 'master' into 'main'

Master See merge request !3

Merge branch 'master' into 'main'
a3a79111 · Chelsea Maria John · bb3ef5fe · 3a49d3e0 · a3a79111 · a3a79111
Commit a3a79111 authored 3 years ago by Chelsea Maria John
--- a/jobscript.sh
+++ b/jobscript.sh
 #!/bin/bash
 #SBATCH --account=opengptx-elm
 #SBATCH --partition=booster
-#SBATCH --job-name=opt175b
+#SBATCH --job-name=opt125m_test
-#SBATCH --nodes=48
+#SBATCH --nodes=4
 #SBATCH --hint=nomultithread         
 #SBATCH --ntasks-per-node=4          
 #SBATCH --cpus-per-task=12           # number of cores per tasks
 #SBATCH --gres=gpu:4                 # number of gpus
-#SBATCH --time=00:15:00              # maximum execution time (HH:MM:SS)
+#SBATCH --time=00:10:00              # maximum execution time (HH:MM:SS)
 #SBATCH --output=slurmLog/%x-%j.out  # output file name
 #SBATCH --error=slurmLog/%x-%j.err   # error file name
@@ -25,7 +25,7 @@ opt-baselines -n "$SLURM_NNODES" -g 4  \
    --account opengptx-elm \
    --partition booster \
    --prefix "$SLURM_JOB_NAME" \
-    --model-size 175b \
+    --model-size 125m \
    --juwelsbooster \
    --data "$DATA_PATH" \
    --ntasks-per-node 4 \
@@ -34,7 +34,7 @@ opt-baselines -n "$SLURM_NNODES" -g 4  \
    --tensorboard-logdir "$TENSORBOARD_PATH" \
    --no-save-dir \
    --snapshot-root "$ROOT_OUTPUT_DIR" \
-    --time 15  \
+    --time 10  \
    --no-wandb \
    --cpu-bind socket \
    --salloc

--- a/setup.bash
+++ b/setup.bash
@@ -32,6 +32,9 @@ source activate.bash
 python -m pip install --upgrade pip
+# Requires numpy 1.22.0
+python -m pip install numpy==1.22.0
 #Installing PyTorch 1.10.1 version with cuda 11.3 used by metaseq
 # python -m pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html