Skip to content
Snippets Groups Projects
Commit fade019c authored by Fahad Khalid's avatar Fahad Khalid
Browse files

Updated all scripts (except for Caffe) to use the 2019a modules.

parent e9d2c91a
Branches
No related tags found
No related merge requests found
...@@ -3,22 +3,20 @@ ...@@ -3,22 +3,20 @@
# Slurm job configuration # Slurm job configuration
#SBATCH --nodes=2 #SBATCH --nodes=2
#SBATCH --ntasks=4 #SBATCH --ntasks=4
#SBATCH --ntasks-per-node=2 #SBATCH --ntasks-per-node=4
#SBATCH --output=output_%j.out #SBATCH --output=output_%j.out
#SBATCH --error=error_%j.er #SBATCH --error=error_%j.er
#SBATCH --time=00:10:00 #SBATCH --time=00:10:00
#SBATCH --job-name=HOROVOD_KERAS_MNIST #SBATCH --job-name=HOROVOD_KERAS_MNIST
#SBATCH --gres=gpu:2 --partition=develgpus #SBATCH --gres=gpu:4 --partition=develgpus
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
# Load the required modules # Load the required modules
module use /usr/local/software/jureca/OtherStages module load GCC/8.3.0
module load Stages/2018b module load MVAPICH2/2.3.1-GDR
module load GCC/7.3.0 module load TensorFlow/1.13.1-GPU-Python-3.6.8
module load MVAPICH2/2.3-GDR module load Keras/2.2.4-GPU-Python-3.6.8
module load TensorFlow/1.12.0-GPU-Python-3.6.6 module load Horovod/0.16.2-GPU-Python-3.6.8
module load Keras/2.2.4-GPU-Python-3.6.6
module load Horovod/0.15.2-GPU-Python-3.6.6
# Run the program # Run the program
srun python -u mnist.py srun python -u mnist.py
...@@ -57,7 +57,7 @@ if args.cuda: ...@@ -57,7 +57,7 @@ if args.cuda:
dataset_file = os.path.join(data_dir, data_file) dataset_file = os.path.join(data_dir, data_file)
# [HPCNS] Dataset filename for this rank # [HPCNS] Dataset filename for this rank
dataset_for_rank = 'MNIST-data-%d' % hvd.rank() dataset_for_rank = 'MNIST'
# [HPCNS] If the path already exists, remove it # [HPCNS] If the path already exists, remove it
if os.path.exists(dataset_for_rank): if os.path.exists(dataset_for_rank):
...@@ -68,7 +68,7 @@ shutil.copytree(dataset_file, dataset_for_rank) ...@@ -68,7 +68,7 @@ shutil.copytree(dataset_file, dataset_for_rank)
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_dataset = \ train_dataset = \
datasets.MNIST(dataset_for_rank, train=True, download=False, datasets.MNIST('', train=True, download=False,
transform=transforms.Compose([ transform=transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) transforms.Normalize((0.1307,), (0.3081,))
...@@ -80,7 +80,7 @@ train_loader = torch.utils.data.DataLoader( ...@@ -80,7 +80,7 @@ train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs) train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs)
test_dataset = \ test_dataset = \
datasets.MNIST(dataset_for_rank, train=False, download=False, transform=transforms.Compose([ datasets.MNIST('', train=False, download=False, transform=transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) transforms.Normalize((0.1307,), (0.3081,))
])) ]))
......
...@@ -3,20 +3,20 @@ ...@@ -3,20 +3,20 @@
# Slurm job configuration # Slurm job configuration
#SBATCH --nodes=2 #SBATCH --nodes=2
#SBATCH --ntasks=4 #SBATCH --ntasks=4
#SBATCH --ntasks-per-node=2 #SBATCH --ntasks-per-node=4
#SBATCH --output=output_%j.out #SBATCH --output=output_%j.out
#SBATCH --error=error_%j.er #SBATCH --error=error_%j.er
#SBATCH --time=00:10:00 #SBATCH --time=00:10:00
#SBATCH --job-name=HOROVOD_PYTORCH_MNIST #SBATCH --job-name=HOROVOD_PYTORCH_MNIST
#SBATCH --gres=gpu:2 --partition=develgpus #SBATCH --gres=gpu:4 --partition=develgpus
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
# Load the required modules # Load the required modules
module load GCC/7.3.0 module load GCC/8.3.0
module load MVAPICH2/2.3-GDR module load MVAPICH2/2.3.1-GDR
module load PyTorch/1.0.0-GPU-Python-3.6.6 module load PyTorch/1.1.0-GPU-Python-3.6.8
module load torchvision/0.2.1-GPU-Python-3.6.6 module load torchvision/0.3.0-GPU-Python-3.6.8
module load Horovod/0.15.2-GPU-Python-3.6.6 module load Horovod/0.16.2-GPU-Python-3.6.8
# Run the program # Run the program
srun python -u mnist.py srun python -u mnist.py
...@@ -3,21 +3,20 @@ ...@@ -3,21 +3,20 @@
# Slurm job configuration # Slurm job configuration
#SBATCH --nodes=2 #SBATCH --nodes=2
#SBATCH --ntasks=4 #SBATCH --ntasks=4
#SBATCH --ntasks-per-node=2 #SBATCH --ntasks-per-node=4
#SBATCH --output=output_%j.out #SBATCH --output=output_%j.out
#SBATCH --error=error_%j.er #SBATCH --error=error_%j.er
#SBATCH --time=00:10:00 #SBATCH --time=00:10:00
#SBATCH --job-name=HOROVOD_TFLOW_MNIST #SBATCH --job-name=HOROVOD_TFLOW_MNIST
#SBATCH --gres=gpu:2 --partition=develgpus #SBATCH --gres=gpu:4 --partition=develgpus
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
# Load the required modules # Load the required modules
module use /usr/local/software/jureca/OtherStages module load GCC/8.3.0
module load Stages/2018b module load MVAPICH2/2.3.1-GDR
module load GCC/7.3.0 module load TensorFlow/1.13.1-GPU-Python-3.6.8
module load MVAPICH2/2.3-GDR module load Keras/2.2.4-GPU-Python-3.6.8
module load TensorFlow/1.12.0-GPU-Python-3.6.6 module load Horovod/0.16.2-GPU-Python-3.6.8
module load Horovod/0.15.2-GPU-Python-3.6.6
# Run the program # Run the program
srun python -u mnist.py srun python -u mnist.py
...@@ -108,7 +108,7 @@ def main(): ...@@ -108,7 +108,7 @@ def main():
dataset_file = os.path.join(data_dir, data_file) dataset_file = os.path.join(data_dir, data_file)
# [HPCNS] A copy of the dataset in the current directory # [HPCNS] A copy of the dataset in the current directory
dataset_copy = 'MNIST-data' dataset_copy = 'MNIST'
# [HPCNS] If the path already exists, remove it # [HPCNS] If the path already exists, remove it
if os.path.exists(dataset_copy): if os.path.exists(dataset_copy):
...@@ -120,14 +120,14 @@ def main(): ...@@ -120,14 +120,14 @@ def main():
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader( train_loader = torch.utils.data.DataLoader(
datasets.MNIST(dataset_copy, train=True, download=False, datasets.MNIST('', train=True, download=False,
transform=transforms.Compose([ transform=transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) transforms.Normalize((0.1307,), (0.3081,))
])), ])),
batch_size=args.batch_size, shuffle=True, **kwargs) batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader( test_loader = torch.utils.data.DataLoader(
datasets.MNIST(dataset_copy, train=False, download=False, transform=transforms.Compose([ datasets.MNIST('', train=False, download=False, transform=transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)) transforms.Normalize((0.1307,), (0.3081,))
])), ])),
......
...@@ -12,12 +12,9 @@ ...@@ -12,12 +12,9 @@
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
# Load the required modules # Load the required modules
module use /usr/local/software/jureca/OtherStages module load GCC/8.3.0
module load Stages/2018b module load PyTorch/1.1.0-GPU-Python-3.6.8
module load GCC/7.3.0 module load torchvision/0.3.0-GPU-Python-3.6.8
module load MVAPICH2/2.3-GDR
module load PyTorch/1.0.0-GPU-Python-3.6.6
module load torchvision/0.2.1-GPU-Python-3.6.6
# Run the program # Run the program
srun python -u mnist.py srun python -u mnist.py
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#SBATCH --output=output_%j.out #SBATCH --output=output_%j.out
#SBATCH --error=error_%j.er #SBATCH --error=error_%j.er
#SBATCH --time=00:10:00 #SBATCH --time=00:10:00
#SBATCH --job-name=TENSORFLOW_MNIST #SBATCH --job-name=TFLOW_MNIST
#SBATCH --gres=gpu:1 --partition=develgpus #SBATCH --gres=gpu:1 --partition=develgpus
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment