diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index ddb78ec98fca4a2ba0a8b190636554f52b7ea027..0a45937fdeb76eecee60b003cda5301f93eeacc5 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -659,7 +659,9 @@ int Benchmark::retest_slow_pairs(slow_pair* const sp,const int n,const int iter) const int to = sp[i].to; if (0 == rank()) { - std::printf(" %6d: Retest %6d <-> %6d:", i, from, to); + const char* from_host = cl->hostnamesAndRanks.hostForRank[from].c_str(); + const char* to_host = cl->hostnamesAndRanks.hostForRank[to].c_str(); + std::printf(" %6d: Retest %6d (%s) <-> %6d (%s):", i, from, from_host, to, to_host); std::fflush(stdout); } @@ -839,9 +841,8 @@ int Benchmark::init() { randomNumberEngineSteps = std::mt19937(0); } - if(args->do_group_processes_by_hostname > 0) { - cl->getHostAndLocalRank(); - } + // Run it always to gather hostname information + cl->getHostAndLocalRank(); return SUCCESS; } diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc index 15f8dee60965f16eaabf6390725c6a71c25508da..722d39a7caa2b62259f250aa68797c83d2c4bc58 100644 --- a/benchmark/vcluster.cc +++ b/benchmark/vcluster.cc @@ -514,7 +514,7 @@ const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std const std::string dot{"."}; auto pos = executableName.find(dot); if(pos != std::string::npos) { - requestedImpl = executableName.substr(pos); + requestedImpl = executableName.substr(pos+1); } // Check environment @@ -598,7 +598,10 @@ void VirtualCluster::getHostAndLocalRank(){ for(int i=1;i<size();i++){ int j=uhostnames.size()-1; for(;j>=0;j--){ //Iterate backwards as hostnames are likely to repeat - if(hostnames[i]!=uhostnames[j]) continue; + // Compare based on c strings to avoid mistmatches due to control characters + if(strcmp(hostnames[i].c_str(), uhostnames[j].c_str()) != 0){ + continue; + } else{ ranks[j].push_back(i); break; @@ -614,14 +617,25 @@ void VirtualCluster::getHostAndLocalRank(){ std::stringstream buf; for(std::vector<std::string>::size_type i=0;i<uhostnames.size();i++){ buf << uhostnames[i].c_str() << ": " << ranks[i]; - debug("[%3d] Ranks on %s", 0, buf.str().c_str());buf.str(std::string()); + debug("[%3d] Ranks on %s", 0, buf.str().c_str()); + buf.str(std::string()); } #endif /******************************************************/ /* Check that all hosts have the same number of ranks */ /******************************************************/ - if(ranks.size()%2!=0) fatal("An even number of hosts is required!"); - for(std::vector<std::vector<int>>::size_type i=0;i<ranks.size();i++) if(ranks[0].size()!=ranks[i].size()) fatal("Hosts have differing amounts of ranks!"); + // One host is allowed for intranode testing + if(ranks.size()%2 != 0 and ranks.size() != 1){ + fatal("An even number of hosts is required!"); + } + else if(size()%2 != 0){ + fatal("An even number of ranks is required!"); + } + for(std::vector<std::vector<int>>::size_type i=0;i<ranks.size();i++){ + if(ranks[0].size()!=ranks[i].size()){ + fatal("Hosts have differing amounts of ranks!"); + } + } /**************************************/ /* Broadcast information to all ranks */ @@ -661,6 +675,17 @@ void VirtualCluster::getHostAndLocalRank(){ } } bcast(0, hostLocalRanks_.get(), buf64x4[0]*buf64x4[2]); + + /* + * Fill in the hostnamesAndRanks struct to be queried later on + */ + for(int rank = 0; rank < size(); rank++){ + hostnamesAndRanks.hostForRank.push_back(hostnames[rank]); + } + for(std::vector<std::string>::size_type host=0;host<uhostnames.size();host++){ + hostnamesAndRanks.ranksForHost.push_back(ranks[host]); + } + }else{ gather(0, &tmp32, &tmp32, 1); //Gather hostname lengths send(0, hostname().c_str(), hostnameSize()); //Send hostname diff --git a/benchmark/vcluster.h b/benchmark/vcluster.h index b516073ed5a576c26509ecadd8bbf8856843d8d4..d2c65e5f1f29e5a2d02a40b54699fe4e5d33b30f 100644 --- a/benchmark/vcluster.h +++ b/benchmark/vcluster.h @@ -73,6 +73,11 @@ public: int localRank() ; std::shared_ptr<int[]> hostLocalRanks() ; + struct { + std::vector<std::string> hostForRank; + std::vector<std::vector<int>> ranksForHost; + } hostnamesAndRanks; + /*! \brief send wrapped data in buf to rank dst (communication layer undefined) * * The data may or may not be routed through the communication layer of this cluster diff --git a/exampleBuildJEDI.sh b/exampleBuildJEDI.sh new file mode 100755 index 0000000000000000000000000000000000000000..3356cd46fdc772bf89ff2831d831d24926d806bc --- /dev/null +++ b/exampleBuildJEDI.sh @@ -0,0 +1,35 @@ +#!/bin/bash +############################################################################# +## LinkTest ## +############################################################################# +## Copyright (c) 2008-2021 ## +## Forschungszentrum Juelich, Juelich Supercomputing Centre ## +## ## +## See the file COPYRIGHT in the package base directory for details ## +############################################################################# + + +# The example uses a system that supports MPI, TCP, UCX and IBVerbs. +# Minipmi is already installed in ~/.local + +# Set-Up Environment +ml GCC OpenMPI SciPy-Stack SIONlib; + +# Install linktest in folder install +mkdir -p install_jedi; +cd benchmark; +make clean +make -j HAVE_SION=1 HAVE_TCP=0 HAVE_UCP=0 PREFIX=../install_jedi install; +cd ..; +# Install linktest-report +export CPATH=$EBROOTSCIPYMINBUNDLE/lib/python3*/site-packages/numpy/core/include:$CPATH +cd install_jedi; +python3 -m venv linktest-report-venv; +source linktest-report-venv/bin/activate +cd ../python; +python3 -m pip install .; #TODO: Add --use-feature=in-tree-build if using pip 21.0.X to 21.2.X (default from 21.3 onwards) +deactivate; +cd ..; +# Notice that we close the virtual environment, since this script is likely not sourced +# To use python-report one has to source linktest-report-venv/bin/activate again +# To uninstall: pip uninstall linktest, or remove the virtual environment completly