diff --git a/course_material/examples/mnist_epoch_distributed.py b/course_material/examples/mnist_epoch_distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..7c9080e63af23eabeb6a6b47c8e89edf26e7190f --- /dev/null +++ b/course_material/examples/mnist_epoch_distributed.py @@ -0,0 +1,99 @@ +# Copyright (c) 2019 Forschungszentrum Juelich GmbH. +# This code is licensed under MIT license (see the LICENSE file for details). +# This code is derived from Horovod, which is licensed under the Apache License, +# Version 2.0 (see the NOTICE file for details). + +""" + This program is an adaptation of the following code sample: + https://github.com/horovod/horovod/blob/master/examples/keras_mnist.py. + The program creates and trains a shallow ANN for handwritten digit + classification using the MNIST dataset. + + The Horovod framework is used for seamless distributed training. In this + example epochs are distributed across the Horovod ranks, not data. + + To run this sample use the following command on your + workstation/laptop equipped with a GPU: + + mpirun -np 1 python -u mnist_epoch_distributed.py + + If you have more than one GPU on your system, you can increase the + number of ranks accordingly. + + The code has been tested with Python 3.7.5, tensorflow-gpu 1.13.1, and + horovod 0.16.2. + + Note: This code will NOT work on the supercomputers. + +""" + +import math +import tensorflow as tf +import horovod.tensorflow.keras as hvd +from tensorflow.python.keras import backend as K + + +# Horovod: initialize Horovod. +hvd.init() + +# Horovod: pin GPU to be used to process local rank (one GPU per process) +config = tf.ConfigProto() +config.gpu_options.visible_device_list = str(hvd.local_rank()) +K.set_session(tf.Session(config=config)) + +# Reference to the MNIST dataset +mnist = tf.keras.datasets.mnist + +# Load the MNIST dataset, split between train and test sets +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +# Normalize input samples +x_train, x_test = x_train / 255.0, x_test / 255.0 + +# Define the model, i.e., the network +model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(512, activation=tf.nn.relu), + tf.keras.layers.Dense(10, activation=tf.nn.softmax) +]) + +# Optimizer +optimizer = tf.keras.optimizers.Adam() + +# Decorate the optimizer with Horovod's distributed optimizer +optimizer = hvd.DistributedOptimizer(optimizer) + +# Horovod: adjust number of epochs based on number of GPUs. +epochs = int(math.ceil(4.0 / hvd.size())) + +# Compile the model +model.compile( + optimizer=optimizer, + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] +) + +# Training callbacks +callbacks = [ + # Horovod: broadcast initial variable states from rank 0 to all other processes. + # This is necessary to ensure consistent initialization of all workers when + # training is started with random weights or restored from a checkpoint. + hvd.callbacks.BroadcastGlobalVariablesCallback(0) +] + +# Train the model using the training set +model.fit( + x=x_train, + y=y_train, + batch_size=32, + epochs=epochs, + verbose=1 if hvd.rank() == 0 else 0, + callbacks=callbacks +) + +# Run the test on the root rank only +if hvd.rank() == 0: + # Test the model on the test set + score = model.evaluate(x=x_test, y=y_test, verbose=0) + print(f'Test loss: {score[0]}') + print(f'Test accuracy: {score[1]}') diff --git a/course_material/examples/mnist_single_gpu.py b/course_material/examples/mnist_single_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..794150fe230348b0001d86158d32a9a9e5e52cbd --- /dev/null +++ b/course_material/examples/mnist_single_gpu.py @@ -0,0 +1,67 @@ +# Copyright (c) 2019 Forschungszentrum Juelich GmbH. +# This code is licensed under MIT license (see the LICENSE file for details). +# This code is derived from Tensorflow tutorials, which is licensed under the Apache License, +# Version 2.0 (see the NOTICE file for details). + +""" + This program is an adaptation of the code sample available at + https://www.tensorflow.org/tutorials/. The program creates + and trains a shallow ANN for handwritten digit classification + using the MNIST dataset. + + To run this sample use the following command on your + workstation/laptop equipped with a GPU: + + python -u mnist.py + + The code has been tested with Python 3.7.5 and tensorflow-gpu 1.13.1. + + Note: This code will NOT work on the supercomputers. + +""" + +import tensorflow as tf + + +# Reference to the MNIST data object +mnist = tf.keras.datasets.mnist + +# Load the MNIST dataset, split between train and test sets +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +# Normalize input samples +x_train, x_test = x_train / 255.0, x_test / 255.0 + +# Define the model, i.e., the network +model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(512, activation=tf.nn.relu), + tf.keras.layers.Dense(10, activation=tf.nn.softmax) +]) + +# Optimizer +optimizer = tf.keras.optimizers.Adam() + +# No. of epochs +epochs = 4 + +# Compile the model +model.compile( + optimizer=optimizer, + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] +) + +# Train the model using the training set +model.fit( + x=x_train, + y=y_train, + batch_size=32, + epochs=epochs, + verbose=1 +) + +# Test the model using the test set +score = model.evaluate(x=x_test, y=y_test, verbose=0) +print(f'Test loss: {score[0]}') +print(f'Test accuracy: {score[1]}') diff --git a/course_material/slides/.gitkeep b/course_material/slides/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/utils/data_utils.py b/utils/data_utils.py index bab6e035ea82e22108998da561519a70eea94eac..090337b579f9f531855db370bca7d2fb90eda0bd 100644 --- a/utils/data_utils.py +++ b/utils/data_utils.py @@ -32,15 +32,9 @@ class DataValidator: recognized input data directory locations. If the check is passed, returns the fully qualified path to the input data directory. - Parameters - ---------- - filename: - Name of the data file to be checked - - Returns - ------- - string: - Fully qualified path to the input data directory + :param filename: Name of the data file to be checked. + + :return: str. Fully qualified path to the input data directory. """