Skip to content
Snippets Groups Projects
Select Git revision
  • e1965bc6d99d2654266a662f085a746f06bc9f09
  • master default protected
  • tf2
  • tf2_pytorch
  • issue_3
  • issue_2
  • 2019a
  • juwels_2019a
  • jureca_2019_a
9 results

data_utils.py

Blame
  • user avatar
    Fahad Khalid authored
    456e3c27
    History
    data_utils.py 2.26 KiB
    # Copyright (c) 2019 Forschungszentrum Juelich GmbH.
    # This code is licensed under MIT license (see the LICENSE file for details).
    
    """
        A collections of utilities for data manipulation.
    
        It was created to simplify the process of working with pre-downloaded
        datasets.
    
    """
    
    import os
    
    
    class DatasetNotFoundError(Exception):
        """ Raised when the requested dataset cannot be located. """
    
    
    class DataValidator:
        """
        This class provides functions for validation of input data.
    
        """
    
        def __init__(self):
            """ No-op constructor. """
    
        @staticmethod
        def validated_data_dir(filename):
            """
            Checks if the given 'filename' exists, and is available in any of the
            recognized input data directory locations. If the check is passed,
            returns the fully qualified path to the input data directory.
    
            Parameters
            ----------
            filename:
                Name of the data file to be checked
    
            Returns
            -------
            string:
                Fully qualified path to the input data directory
    
            """
    
            # Check the environment variable
            if 'DL_TEST_DATA_HOME' in os.environ:
                # Read the data directory path from the environment variable
                data_dir = os.environ.get('DL_TEST_DATA_HOME')
            else:
                # Set path to the 'datasets' directory in the project root
                data_dir = os.path.join(os.path.abspath('../datasets'))
    
                # We are two levels deep when executing Horovod samples
                if not os.path.exists(data_dir):
                    data_dir = os.path.join(os.path.abspath('../../datasets'))
    
                print('Using {} as the data directory.'.format(data_dir))
    
            # Check if the directory exists
            if not os.path.exists(data_dir):
                raise DatasetNotFoundError(
                    '{} refers to a non-existing directory. Please either correctly set '
                    'the DL_TEST_DATA_HOME environment variable, or make sure the datasets are '
                    'available in the project root.'.format(data_dir)
                )
    
            if not os.path.exists(os.path.join(data_dir, filename)):
                raise DatasetNotFoundError(
                    'Unable to locate {} in {}'.format(filename, data_dir)
                )
    
            return data_dir