Skip to content
Snippets Groups Projects
Select Git revision
  • c60e29e2b33c260fb62e27a80f1b5166f02affab
  • master default protected
  • 67-multithreading-is-plattform-dependent
  • cmake_windows
  • v0.8.4
  • v0.8.3
  • v0.8.2
  • v0.8
  • v0.7
  • v0.6
  • v0.5-alpha
  • v0.4
12 results

SystemSettings.h

Blame
  • system.h 4.71 KiB
    /****************************************************************************
    **  LinkTest                                                               **
    *****************************************************************************
    **  Copyright (c) 2008-2022                                                **
    **  Forschungszentrum Juelich, Juelich Supercomputing Centre               **
    **                                                                         **
    **  See the file COPYRIGHT in the package base directory for details       **
    ****************************************************************************/
    #ifndef LINKTEST_SYSTEM_H
    #define LINKTEST_SYSTEM_H
    
    #include <string>
    #include <array>
    #include <initializer_list>
    #include "utils.h"
    #include "error.h"
    
    
    namespace linktest
    {
    
    /* Representation of the system on which the benchmark is executed. Used for
     * programmatic customizations as long as we cannot handle them through 
     * configuration files or application-internal scripting.
     */
    class System
    {
    
    public:
        static void     store_singleton(System* system);
        static System*  singleton();
        static void     setup_singleton_instance();
    
        /* Callback function that allows to modify the process environment
         * at an early stage in the application. The callback is executed
         * before the first VirtualCluster instance is created.
         */
        virtual int change_environ() { return SUCCESS; }
        /* Returns `true` if the calling process should use a specific 
         * InfiniBand network device.
         */
        [[nodiscard]] virtual bool has_closest_hca() const = 0;
    
        /* Name of hte closest InfiniBand network device for the calling process.
         * If no preference can be provided, the function returns -1.
         */
        virtual void closest_hca_name(std::string& name) = 0;
    
        /* InfiniBand network device port to use. By default the first port
         * is used.
         */
        [[nodiscard]] virtual int closest_hca_port() = 0;
    
        // Retrieve the closest GPU device for the calling process.
        [[nodiscard]] virtual int closest_gpu_device() = 0;
    
        /* Get an IP address of the local host that can be reached from other
         * nodes in the system. Some systems have an hierarchical infrastructure
         * and not all IP  subnets are globally routed. In this case we need to
         * pick the right IP. 
         */
        [[nodiscard]] virtual int ip_address(IpAddr* addr) = 0;
    
    };
    
    /* A generic HPC system. The values for the choice of HCA and GPU can be
     * modified via the environment by setting
     * - `LINKTEST_SYSTEM_HCA_NAME` (e.g. to `mlx5_0`)
     * - `LINKTEST_SYSTEM_HCA_PORT` (e.g. to `1`)
     * - `LINKTEST_SYSTEM_GPU` (e.g. to `0`)
     * The variable `LINKTEST_SYSTEM_NODENAME_SUFFIX` can be set to allow the code
     * to resolve the right IP to use for the setup of the IP-based out-of-band
     * communication channel.
     */
    class GenericSystem : public System
    {
    
    public:
        GenericSystem();
    
        GenericSystem(const GenericSystem&) = delete;
        GenericSystem(GenericSystem&&) = delete;
    
        [[nodiscard]] bool has_closest_hca() const override;
                      void closest_hca_name(std::string& name) override;
        [[nodiscard]] int  closest_hca_port() override;
        [[nodiscard]] int  closest_gpu_device() override;
        [[nodiscard]] int  ip_address(IpAddr* addr) override;
    
    protected:
        std::string     closest_hca_name_;
        int             closest_hca_port_;
        int             closest_gpu_;
        std::string     nodename_suffix_;
    
    };
    
    // System with multiple GPUs and one HCA per GPU
    class MultiGpuMultiHcaSystem : public GenericSystem
    {
    
    public:
        static const int        kNumGpus = 4;
    
        MultiGpuMultiHcaSystem(const std::array<int, kNumGpus>& local_rank_to_gpu, const std::array<int, kNumGpus>& local_rank_to_hca);
        MultiGpuMultiHcaSystem(const MultiGpuMultiHcaSystem&) = delete;
        MultiGpuMultiHcaSystem(MultiGpuMultiHcaSystem&&) = delete;
    
        virtual int     change_environ();
    
    private:
        // The rank on the node
        int                             local_rank_;
        // Mapping of the local rank to the nearest GPU
        std::array<int, kNumGpus>       local_rank_to_gpu_;
        // Mapping of the local rank to the right HCA
        std::array<int, kNumGpus>       local_rank_to_hca_;
        void            verify_local_rank_();
    
    };
    
    // The JUWELS Booster system at JSC
    class System_JUWELS_Booster : public MultiGpuMultiHcaSystem
    {
    
    public:
        System_JUWELS_Booster();
    
        System_JUWELS_Booster(const System_JUWELS_Booster&) = delete;
        System_JUWELS_Booster(System_JUWELS_Booster&&) = delete;
    
    };
    
    // The JURECA-DC system at JSC
    class System_JURECA_DC_GPU : public MultiGpuMultiHcaSystem
    {
    
    public:
        System_JURECA_DC_GPU();
    
        System_JURECA_DC_GPU(const System_JURECA_DC_GPU&) = delete;
        System_JURECA_DC_GPU(System_JURECA_DC_GPU&&) = delete;
    
    };
    
    }
    
    #endif