Skip to content
Snippets Groups Projects
Select Git revision
  • c4fc33c827850b6555ee128a93a2aed19962ef0d
  • 2023 default protected
2 results

change_libdwarf_path.patch

Blame
  • system.cc 5.08 KiB
    /****************************************************************************
    **  LinkTest                                                               **
    *****************************************************************************
    **  Copyright (c) 2008-2022                                                **
    **  Forschungszentrum Juelich, Juelich Supercomputing Centre               **
    **                                                                         **
    **  See the file COPYRIGHT in the package base directory for details       **
    ****************************************************************************/
    #include "config.h"
    #include "compiler.h"
    #include "error.h"
    #include "environ.h"
    #include "system.h"
    #include <unistd.h>
    #include <sys/types.h>
    #include <sys/socket.h>
    #include <netdb.h>
    #include <cstring>
    #include <stdexcept>
    
    
    namespace linktest
    {
    
    static std::string hostname(const std::string& suffix)
    {
        char name[64];
        gethostname(name, sizeof(name));
    
        // Get only the hostname part of the string
        for(auto i = 0; name[i]; ++i) {
            if ('.' == name[i]) {
                name[i] = 0;
                break;
            }
        }
    
        return std::string(name) + suffix;
    }
    
    /* Use the hostname of the node to retrieve the IP to which other hosts should
     * connect.
     */
    static void retrieve_addr_from_hostname(const std::string& suffix, 
                        linktest::IpAddr* addr)
    {
        struct addrinfo hints;
        struct addrinfo* ailist = nullptr;
        struct addrinfo* aip = nullptr;
    
        std::memset(addr, 0, sizeof(*addr));
        addr->sin_family = AF_INET;
    
        std::memset(&hints, 0, sizeof(hints));
        hints.ai_flags    = AI_CANONNAME;
        hints.ai_family   = AF_INET;
        hints.ai_socktype = SOCK_STREAM;
        hints.ai_protocol = IPPROTO_TCP;
    
        getaddrinfo(hostname(suffix).c_str(), nullptr, &hints, &ailist);
    
        if ((aip  = ailist)) {
            *addr = *(linktest::IpAddr *)aip->ai_addr;
            freeaddrinfo(aip);
        } else {
            fatal("getaddrinfo(\"%s\") returned nullptr", 
                hostname(suffix).c_str());
        }
    }
    
    
    static System* _system_singleton = nullptr;
    
    void System::store_singleton(System* system)
    {
        _system_singleton = system;
    }
    
    System* System::singleton()
    {
        return _system_singleton;
    }
    
    void System::setup_singleton_instance()
    {
        if (std::string("JUWELS Booster") == std::string(LINKTEST_SYSTEM)) {
            store_singleton(new linktest::System_JUWELS_Booster);
        } else
        if (std::string("JURECA-DC GPU") == std::string(LINKTEST_SYSTEM)) {
            store_singleton(new linktest::System_JURECA_DC_GPU);
        } else {
            store_singleton(new linktest::GenericSystem);
        }
    }
    
    
    void read_environ_string(const std::string& k, std::string& val)
    {
        const char* p;
    
        auto ret = read_environ_str(
                (std::string(LINKTEST_ENVIRON_PREFIX) + k).c_str(), &p);
        if (0 == ret) {
            val = p;
        }
    }
    
    
    GenericSystem::GenericSystem()
    : closest_hca_port_(1), closest_gpu_(0)
    {
        read_environ_string("SYSTEM_HCA_NAME", closest_hca_name_);
        read_environ_int(LINKTEST_ENVIRON_PREFIX "SYSTEM_HCA_PORT", &closest_hca_port_);
        read_environ_int(LINKTEST_ENVIRON_PREFIX "SYSTEM_GPU", &closest_gpu_);
        read_environ_string("SYSTEM_NODENAME_SUFFIX", nodename_suffix_);
    }
    
    bool GenericSystem::has_closest_hca() const
    {
        return (closest_hca_name_.length() > 0);
    }
    
    void GenericSystem::closest_hca_name(std::string& name)
    {
        name = closest_hca_name_;
    }
    
    int GenericSystem::closest_hca_port()
    {
        return closest_hca_port_;
    }
    
    int GenericSystem::closest_gpu_device()
    {
        return closest_gpu_;
    }
    
    int GenericSystem::ip_address(IpAddr* addr)
    {
        retrieve_addr_from_hostname(nodename_suffix_, addr);
        return SUCCESS;
    }
    
    
    MultiGpuMultiHcaSystem::MultiGpuMultiHcaSystem(const std::array<int, kNumGpus>& local_rank_to_gpu,
                               const std::array<int, kNumGpus>& local_rank_to_hca)
    : local_rank_(-1),
      local_rank_to_gpu_(local_rank_to_gpu),
      local_rank_to_hca_(local_rank_to_hca)
    {
        auto err = read_environ_int("MPI_LOCALRANKID", &local_rank_);
        if (unlikely(err)) {
            error("Failed to read \"MPI_LOCALRANKID\" from environment");
        }
        verify_local_rank_();
    
        char buf[64];
        std::snprintf(buf, sizeof(buf), "mlx5_%d", 
                    local_rank_to_hca_[local_rank_]);
    
        closest_hca_name_ = buf;
        closest_hca_port_ = 1;
    
        closest_gpu_ = local_rank_to_gpu_[local_rank_];
    }
    
    int MultiGpuMultiHcaSystem::change_environ()
    {
        char buf[64];
        std::snprintf(buf, sizeof(buf), "%s:%d", 
                closest_hca_name_.c_str(), closest_hca_port_);
    
        setenv("UCX_MAX_RNDV_RAILS", "1", 0);
        setenv("UCX_NET_DEVICES", buf, 0);
    
        return SUCCESS;
    }
    
    void MultiGpuMultiHcaSystem::verify_local_rank_()
    {
        if (unlikely((local_rank_ < 0) || (local_rank_ >= kNumGpus))) {
            error("Invalid local rank = %d", local_rank_);
            throw std::runtime_error("Invalid local rank");
        }
    }
    
    System_JUWELS_Booster::System_JUWELS_Booster()
    : MultiGpuMultiHcaSystem({0, 1, 2, 3}, {0, 1, 2, 3})
    {
        nodename_suffix_ = "i";
    }
    
    System_JURECA_DC_GPU::System_JURECA_DC_GPU()
    : MultiGpuMultiHcaSystem({0, 1, 2, 3}, {1, 0, 3, 2})
    {
        nodename_suffix_ = "i";
    }
    
    }