diff --git a/course_material/slides/fahad_DL_on_SCs_November_2019.pdf b/course_material/slides/fahad_DL_on_SCs_November_2019.pdf index b442b30fecd1bc023d919633103327f270c365da..b7f4ecd36f3ef1e0596fb2f6678f902a6bf11838 100644 Binary files a/course_material/slides/fahad_DL_on_SCs_November_2019.pdf and b/course_material/slides/fahad_DL_on_SCs_November_2019.pdf differ diff --git a/horovod_data_distributed/README.md b/horovod_data_distributed/README.md index 612d11a63175ab2ff704160603a5d5751baaadb5..3a13e2b83b5b749240563b152bf70598e62c6335 100644 --- a/horovod_data_distributed/README.md +++ b/horovod_data_distributed/README.md @@ -4,8 +4,8 @@ Please see the main docstring in each program for details. # Notes -On JURECA and JUWELS, the `mnist_data_distributed.py` program requires the [`hpc4ns.distribution`]( -https://gitlab.version.fz-juelich.de/hpc4ns/hpc4ns_utils#1-hpc4nsdistribution) +On JURECA and JUWELS, the `mnist_data_distributed.py` program requires the [`hpc4neuro.distribution`]( +https://gitlab.version.fz-juelich.de/hpc4ns/hpc4neuro#1-hpc4neurodistribution) module for distribution of training data filenames across multiple ranks. On JURON, multiple additional package are required. Please follow the steps below to setup the environment before submitting the training job. @@ -17,9 +17,9 @@ are eight training files. 1. Change to the source directory for this sample, i.e., to `dl_on_supercomputers/horovod_data_distributed` 2. Load the system-wide Python module: `module load Python/3.6.8` -3. Install the `hpc4ns` package: +3. Install the `hpc4neuro` package: - `pip install --user git+https://gitlab.version.fz-juelich.de/hpc4ns/hpc4ns_utils.git` + `pip install --user git+https://gitlab.version.fz-juelich.de/hpc4ns/hpc4neuro.git` 4. Submit the job diff --git a/horovod_data_distributed/mnist_data_distributed.py b/horovod_data_distributed/mnist_data_distributed.py index 2faa3c95d36a5954310cbce1679e1ee98954ad94..d4c68c19174058a41d0198b322a0f4035ef22419 100644 --- a/horovod_data_distributed/mnist_data_distributed.py +++ b/horovod_data_distributed/mnist_data_distributed.py @@ -22,8 +22,8 @@ import tensorflow as tf import horovod.tensorflow.keras as hvd from tensorflow.python.keras import backend as K -from hpc4ns.errors import MpiInitError -from hpc4ns.distribution import DataDistributor +from hpc4neuro.errors import MpiInitError +from hpc4neuro.distribution import DataDistributor sys.path.insert(0, '../utils') from data_utils import DataValidator @@ -93,7 +93,7 @@ def initialize_hvd_and_mpi(): are no conflicts between Horovod and mpi4py communicator initialization. - :exception: hpc4ns.errors.MpiInitError is raised in the case + :exception: hpc4neuro.errors.MpiInitError is raised in the case of initialization failure. """ diff --git a/horovod_data_distributed/setup_juron.sh b/horovod_data_distributed/setup_juron.sh index 13d0b8681e94dc7d2de40bc935a133858ef06316..7fa1a24a7361187b627c6a0d64dc57c113b843f4 100755 --- a/horovod_data_distributed/setup_juron.sh +++ b/horovod_data_distributed/setup_juron.sh @@ -4,10 +4,10 @@ module load python/3.6.1 # Create a virtual environment -python -m venv venv_dl_hpc4ns +python -m venv venv_dl_hpc4neuro # Activate the virtual environment -source venv_dl_hpc4ns/bin/activate +source venv_dl_hpc4neuro/bin/activate # Upgrade pip and setuptools pip install -U pip setuptools @@ -18,7 +18,7 @@ env MPICC=/gpfs/software/opt/openmpi/3.1.2-gcc_5.4.0-cuda_10.0.130/bin/mpicc pip # Install six pip install six -# Install hpc4ns -pip install git+https://gitlab.version.fz-juelich.de/hpc4ns/hpc4ns_utils.git +# Install hpc4neuro +pip install git+https://gitlab.version.fz-juelich.de/hpc4ns/hpc4neuro.git printf "%s\n\n" "Setup complete." diff --git a/horovod_data_distributed/submit_job_juron.sh b/horovod_data_distributed/submit_job_juron.sh index 9ad3e50a5e61cbc07d4c2ec7cf4b08add1135cde..a71bc471dc2f56c06096cd2eb5897e86dfded09f 100755 --- a/horovod_data_distributed/submit_job_juron.sh +++ b/horovod_data_distributed/submit_job_juron.sh @@ -15,7 +15,7 @@ module load tensorflow/1.12.0-gcc_5.4.0-cuda_10.0.130 module load horovod/0.15.2 # Activate the virtual environment -source venv_dl_hpc4ns/bin/activate +source venv_dl_hpc4neuro/bin/activate # Run the program mpirun -bind-to none \