From 51df38fcf75173df03c34f06f1f047200b3d0079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 8 Jul 2022 14:51:29 +0200 Subject: [PATCH 01/47] Added vcluster_portals.h/.cc Addde portals support to Makefile Cleaned up Makefile --- benchmark/Makefile | 79 ++++++++++++++++++++++++++++++----- benchmark/vcluster_portals.cc | 31 ++++++++++++++ benchmark/vcluster_portals.h | 23 ++++++++++ 3 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 benchmark/vcluster_portals.cc create mode 100644 benchmark/vcluster_portals.h diff --git a/benchmark/Makefile b/benchmark/Makefile index f28a140..05f9e24 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -6,21 +6,35 @@ #** ** #** See the file COPYRIGHT in the package base directory for details ** #****************************************************************************/ -PREFIX = /usr/local/bin -USE_POSIX = 1 +# DEFAULTS +PREFIX = /usr/local/bin -HAVE_SION = 1 -HAVE_MPI = 1 -HAVE_MINIPMI = 0 -HAVE_TCP = 1 -HAVE_IBVERBS = 0 -HAVE_PSM2 = 0 -HAVE_CUDA = 0 -HAVE_UCP = 0 +USE_POSIX = 1 +HAVE_SION = 1 +HAVE_MPI = 1 +HAVE_MINIPMI = 0 +HAVE_TCP = 1 +HAVE_IBVERBS = 0 +HAVE_PSM2 = 0 +HAVE_CUDA = 0 +HAVE_UCP = 0 +HAVE_PORTALS = 0 FSANITIZE = address +SYSTEM = generic +GIT_HASH = $(shell git rev-parse --verify HEAD) +GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) +CC = mpicxx +CFLAGS = -std=c++17 -Wall -O2 +CPPFLAGS = -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" +LD = $(CC) +LDFLAGS = +LIBS = + +# Handle Dependencies +# ========================================= ifeq (1, $(HAVE_IBVERBS)) HAVE_MINIPMI = 1 HAVE_TCP = 1 @@ -37,6 +51,10 @@ ifeq (1, $(HAVE_UCP)) HAVE_MINIPMI = 1 HAVE_TCP = 1 endif +ifeq (1, $(HAVE_PORTALS)) + HAVE_MINIPMI = 1 + HAVE_TCP = 1 +endif ifdef V $(info USE_POSIX = $(USE_POSIX)) @@ -48,7 +66,9 @@ $(info HAVE_IBVERBS = $(HAVE_IBVERBS)) $(info HAVE_PSM2 = $(HAVE_PSM2)) $(info HAVE_CUDA = $(HAVE_CUDA)) $(info HAVE_UCP = $(HAVE_UCP)) +$(info HAVE_PORTALS = $(HAVE_PORTALS)) endif +# ========================================= SYSTEM = generic GIT_HASH = $(shell git rev-parse --verify HEAD) @@ -103,6 +123,8 @@ $(error CUARCH is not set) LIBS += -lcuda -lcudart endif +# DEFINE EXECUTABLES +# ========================================= linktest-versions = ifeq (1, $(HAVE_MPI)) linktest-versions += linktest.mpi @@ -120,6 +142,9 @@ ifeq (1, $(HAVE_MINIPMI)) ifeq (1, $(HAVE_UCP)) linktest-versions += linktest.ucp endif + ifeq (1, $(HAVE_PORTALS)) + linktest-versions += linktest.portals + endif ifeq (1, $(HAVE_CUDA)) linktest-versions += linktest.cuda endif @@ -128,7 +153,11 @@ endif ifdef V $(info linktest-versions = $(linktest-versions)) endif +# ========================================= + +# DEFINE OBJECT FILES AND FLAGS +# ========================================= linktest-obj = linktest.o \ system.o \ benchmark.o \ @@ -159,6 +188,9 @@ ifeq (1, $(HAVE_TCP)) CFLAGS += -DHAVE_VCLUSTER_TCP=1 endif ifeq (1, $(HAVE_MINIPMI)) + CPPFLAGS += -Iminipmi -DHAVE_MINIPMI=1 + LDFLAGS += -Lminipmi + LIBS += -lminipmi ifeq (1, $(HAVE_IBVERBS)) linktest-obj += vcluster_ibverbs.o \ ibverbs_mr.o \ @@ -179,19 +211,39 @@ ifeq (1, $(HAVE_MINIPMI)) CFLAGS += -DHAVE_VCLUSTER_UCP=1 LIBS += -lucp endif + ifeq (1, $(HAVE_PORTALS)) + linktest-obj += vcluster_portals.o + CFLAGS += -DHAVE_VCLUSTER_PORTALS=1 + LDFLAGS += -Lportals + LIBS += -lportals + endif ifeq (1, $(HAVE_CUDA)) linktest-obj += vcluster_cuda.o \ cuda_kernels.o \ gpu_nvidia.o \ memory_cuda.o - CFLAGS += -DHAVE_VCLUSTER_CUDA=1 + CU = nvcc + CUARCH = + CUFLAGS = --gpu-architecture $(CUARCH) -DHAVE_VCLUSTER_CUDA=1 + CPPFLAGS += -I$(CUDA)/include -DHAVE_CUDA=1 + LDFLAGS += -L$(CUDA)/lib + LIBS += -lcuda -lcudart endif endif ifeq (1, $(HAVE_SION)) linktest-obj += vcluster_sion_generic_adapter.o + CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --cflags --mpi) + LIBS += $(shell sionconfig --64 --gcc --libs --mpi) endif +ifeq (1, ${USE_POSIX}) + CPPFLAGS += -D__USE_POSIX +endif +# ========================================= + +# DEFINE MAKE RULES +# ========================================= ifdef V Q = else @@ -260,6 +312,10 @@ linktest.ucp: linktest @echo " "LN $@ $(call link,ucp) +linktest.portals: linktest + @echo " "LN $@ + $(call link,portals) + linktest.cuda: linktest @echo " "LN $@ $(call link,cuda) @@ -274,3 +330,4 @@ install: linktest $(linktest-versions) for f in $^ ; do \ cp -d $$f $(PREFIX)/$$f ; \ done +# ========================================= \ No newline at end of file diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc new file mode 100644 index 0000000..1f49078 --- /dev/null +++ b/benchmark/vcluster_portals.cc @@ -0,0 +1,31 @@ +/**************************************************************************** +** LinkTest ** +***************************************************************************** +** Copyright (c) 2008-2022 ** +** Forschungszentrum Juelich, Juelich Supercomputing Centre ** +** ** +** See the file COPYRIGHT in the package base directory for details ** +****************************************************************************/ +#include "vcluster_portals.h" +extern "C" { +#include <minipmi.h> +#include <portals4.h> +} + +#include "error.h" + +int VirtualClusterPortals::init() +{ + int ret = PtlInit(); + if (ret == PTL_FAIL) { + fatal("Portals failed to initialize"); + return ERROR; + } + return SUCCESS; +} + +int VirtualClusterPortals::finalize() +{ + PtlFini(); + return SUCCESS; +} diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h new file mode 100644 index 0000000..09b07e7 --- /dev/null +++ b/benchmark/vcluster_portals.h @@ -0,0 +1,23 @@ +/**************************************************************************** +** LinkTest ** +***************************************************************************** +** Copyright (c) 2008-2022 ** +** Forschungszentrum Juelich, Juelich Supercomputing Centre ** +** ** +** See the file COPYRIGHT in the package base directory for details ** +****************************************************************************/ +#ifndef LINKTEST_VCLUSTER_PORTALS_H +#define LINKTEST_VCLUSTER_PORTALS_H + +#include "vcluster.h" + +// VirtualCluster implementation based on a Portals 4 +class VirtualClusterPortals : public VirtualClusterWithHelper +{ + +public: + virtual int init() override; + virtual int finalize() override; +}; + +#endif -- GitLab From 67b165b31d3e42134c127f6806d541a62467070a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 12 Jul 2022 14:24:41 +0200 Subject: [PATCH 02/47] tmp --- benchmark/vcluster.cc | 19 +++++++++++++++---- benchmark/vcluster.h | 15 ++++++++------- benchmark/vcluster_portals.cc | 22 +++++++++++++++++++--- benchmark/vcluster_portals.h | 2 ++ benchmark/vcluster_tcp.cc | 4 ++-- 5 files changed, 46 insertions(+), 16 deletions(-) diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc index c493241..735f8a3 100644 --- a/benchmark/vcluster.cc +++ b/benchmark/vcluster.cc @@ -25,6 +25,9 @@ #if HAVE_VCLUSTER_UCP == 1 #include "vcluster_ucp.h" #endif +#if HAVE_VCLUSTER_PORTALS == 1 +#include "vcluster_portals.h" +#endif #if HAVE_VCLUSTER_CUDA == 1 #include "vcluster_cuda.h" #endif @@ -419,6 +422,11 @@ VirtualCluster* VirtualCluster::factory(const std::string& name){ return new VirtualClusterUCP(name); } else #endif +#if 1 == HAVE_VCLUSTER_PORTALS + if (VirtualClusterPortals::NAME == name) { + return new VirtualClusterPortals(); + } else +#endif #if 1 == HAVE_VCLUSTER_CUDA if ("cuda" == name) { return new VirtualClusterCUDA(name); @@ -444,28 +452,28 @@ void VirtualClusterWithHelper::set_helper_pointer(VirtualCluster* helper) int VirtualClusterWithHelper::rank() { if (unlikely(!helper_)) - throw; // Simply returning -1 will result in complicated bugs + fatal("rank() called on a VirtualClusterWithHelper with undefined helper"); return helper_->rank(); } int VirtualClusterWithHelper::size() { if (unlikely(!helper_)) - throw; // Simply returning -1 will result in complicated bugs + fatal("size() called on a VirtualClusterWithHelper with undefined helper"); return helper_->size(); } int VirtualClusterWithHelper::send(int dst, MemoryBuffer& buf) { if (unlikely(!helper_)) - return -1; + fatal("send() called on a VirtualClusterWithHelper with undefined helper"); return helper_->send(dst, buf); } int VirtualClusterWithHelper::recv(int src, MemoryBuffer& buf) { if (unlikely(!helper_)) - return -1; + fatal("recv() called on a VirtualClusterWithHelper with undefined helper"); return helper_->recv(src, buf); } @@ -486,6 +494,9 @@ const char* VirtualCluster::impls[] = #if 1 == HAVE_VCLUSTER_UCP "ucp", #endif + #if 1 == HAVE_VCLUSTER_PORTALS + VirtualClusterPortals::NAME, + #endif #if 1 == HAVE_VCLUSTER_CUDA "cuda", #endif diff --git a/benchmark/vcluster.h b/benchmark/vcluster.h index 751c4fe..f758b75 100644 --- a/benchmark/vcluster.h +++ b/benchmark/vcluster.h @@ -296,13 +296,14 @@ private: std::shared_ptr<int[]> hostLocalRanks_; }; -/* Since a full implementation of send()/recv() logic on top of some transport layer is +/* VirtualClusterWithHelper + * delegates calls to rank(), size(), send() and recv() to another VirtualCluster (the helper) + * executes calls to benchmark kernels directly + * + * Since a full implementation of send()/recv() logic on top of some transport layer is * more complicated than the logic required for the implementation of the communication - * in kpingpong() it make sense to use a different VirtualCluster for the management - * communication than for the actual benchmark. - * VirtualClusterWithHelper allows derived classes to easily re-use another VirtualCluster - * instance. We do not use inheritance since the helper logic is not really an "is-a" - * relation. + * in our kernels it make sense to use a different VirtualCluster for the management + * communication than for the actual benchmark. */ class VirtualClusterWithHelper : public VirtualCluster { @@ -320,7 +321,7 @@ public: int recv(int src, MemoryBuffer& buf) override; protected: - void set_helper_pointer(VirtualCluster* helper); + void set_helper_pointer(VirtualCluster* helper); VirtualCluster* helper_; }; diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 1f49078..db4a1e0 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -7,20 +7,30 @@ ** See the file COPYRIGHT in the package base directory for details ** ****************************************************************************/ #include "vcluster_portals.h" +#include "error.h" extern "C" { #include <minipmi.h> #include <portals4.h> } -#include "error.h" - int VirtualClusterPortals::init() { - int ret = PtlInit(); + auto ret = PtlInit(); if (ret == PTL_FAIL) { fatal("Portals failed to initialize"); return ERROR; } + helper_ = nullptr; + set_helper_pointer(VirtualCluster::factory("tcp")); + if (helper_ == nullptr) { + fatal("Portals helper (VirtualClusterTCP) not constructed"); + return ERROR; + } + ret = helper_->init(); + if (ret != SUCCESS) { + fatal("Portals helper (VirtualClusterTCP) failed to initialize"); + return ERROR; + } return SUCCESS; } @@ -29,3 +39,9 @@ int VirtualClusterPortals::finalize() PtlFini(); return SUCCESS; } + + +VirtualClusterPortals::VirtualClusterPortals() +: VirtualClusterWithHelper(VirtualClusterPortals::NAME) +{ +} diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 09b07e7..7b91507 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -16,6 +16,8 @@ class VirtualClusterPortals : public VirtualClusterWithHelper { public: + inline static const char * NAME = "portals"; + VirtualClusterPortals(); virtual int init() override; virtual int finalize() override; }; diff --git a/benchmark/vcluster_tcp.cc b/benchmark/vcluster_tcp.cc index 494d4c4..2e7d001 100644 --- a/benchmark/vcluster_tcp.cc +++ b/benchmark/vcluster_tcp.cc @@ -599,9 +599,9 @@ int VirtualClusterTCP::init() { auto ret = linktest_minipmi_context_borrow(&pmi_); #if 1 == HAVE_MINIPMI - if (unlikely(ret)) { + if (ret != SUCCESS) { error("linktest_minipmi_context_borrow() failed."); - return ERROR; + return ret; } #endif -- GitLab From b4cd105bdf6a4a78a9944777673b4a4e447201d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 13 Jul 2022 17:13:16 +0200 Subject: [PATCH 03/47] Added Network Interface initialization --- benchmark/vcluster_portals.cc | 49 ++++++++++++++++++++++++++-------- benchmark/vcluster_portals.h | 50 +++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 11 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index db4a1e0..9068a91 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -8,38 +8,65 @@ ****************************************************************************/ #include "vcluster_portals.h" #include "error.h" -extern "C" { -#include <minipmi.h> -#include <portals4.h> -} int VirtualClusterPortals::init() { - auto ret = PtlInit(); - if (ret == PTL_FAIL) { - fatal("Portals failed to initialize"); - return ERROR; - } helper_ = nullptr; set_helper_pointer(VirtualCluster::factory("tcp")); if (helper_ == nullptr) { fatal("Portals helper (VirtualClusterTCP) not constructed"); return ERROR; } - ret = helper_->init(); + auto ret = helper_->init(); if (ret != SUCCESS) { fatal("Portals helper (VirtualClusterTCP) failed to initialize"); return ERROR; } + + if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) { + warn("Portals versions other than 4.0 may not be suppported"); + } + ret = PTL_OK; + ret = PtlInit(); + ret &= PtlNIInit( + PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README" + PTL_NI_PHYSICAL | PTL_NI_MATCHING, + rank(), + nullptr,// &mni_limits_desired + &mni_limits_actual, + &mni_handle); + ret &= PtlNIInit( + PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README" + PTL_NI_PHYSICAL | PTL_NI_NO_MATCHING, + rank(), + nullptr, // &ni_limits_desired + &nni_limits_actual, + &nni_handle); + if (ret != PTL_OK) { + fatal("Portals failed to initialize"); + return ERROR; + } + return SUCCESS; } int VirtualClusterPortals::finalize() { - PtlFini(); + auto ret = PtlNIFini(nni_handle); + ret &= PtlNIFini(mni_handle); + ret &= PtlFini(); + if (ret != PTL_OK) { + fatal("Portals failed to finalize"); + return ERROR; + } + this->helper_->finalize(); return SUCCESS; } +int VirtualClusterPortals::kpingpong_send(int other, MemoryBuffer& buf) { + return -1; +} + VirtualClusterPortals::VirtualClusterPortals() : VirtualClusterWithHelper(VirtualClusterPortals::NAME) diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 7b91507..e89f17b 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -9,6 +9,11 @@ #ifndef LINKTEST_VCLUSTER_PORTALS_H #define LINKTEST_VCLUSTER_PORTALS_H +extern "C" { +#include <minipmi.h> +#include <portals4.h> +} + #include "vcluster.h" // VirtualCluster implementation based on a Portals 4 @@ -20,6 +25,51 @@ public: VirtualClusterPortals(); virtual int init() override; virtual int finalize() override; + + virtual int kpingpong(const int from, const int to, MemoryBuffer& buf, + const int num_msg, double* const timing) override { + throw("Not Implemented"); + }; + + virtual int kUniDir(const int from, const int to, + MemoryBuffer& buf1, MemoryBuffer& buf2, + const int num_msg, double* const timing, + const bool doBarrier) override { + throw("Not Implemented"); + }; + virtual int kUniDirMultiBuf(const int from,const int to, + MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, + const int num_msg, double* const timing, + const bool doBarrier) override { + throw("Not Implemented"); + }; + virtual int kUniDirLimitedMultiBuf(const int from,const int to, + MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, + const int num_msg, double* const timing, + const bool doBarrier) override { + throw("Not Implemented"); + }; + + virtual int kbipingpong(const int from, const int to, + MemoryBuffer& buf1, MemoryBuffer& buf2, + const int num_msg, double* const timing) override { + throw("Not Implemented"); + }; + +private: + int kpingpong_send(int other, MemoryBuffer& buf); + int kpingpong_recv(int other, MemoryBuffer& buf); + + // matching (send/recv) Network Interface (ni) + ptl_ni_limits_t mni_limits_requested; + ptl_ni_limits_t mni_limits_actual; + ptl_handle_ni_t mni_handle; + + // Non matching (put) Network Interface (ni) + ptl_ni_limits_t nni_limits_requested; + ptl_ni_limits_t nni_limits_actual; + ptl_handle_ni_t nni_handle; + }; #endif -- GitLab From 87c0fcf13a042362fadf874ac1fc7d24f1b3604a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 15 Jul 2022 17:52:50 +0200 Subject: [PATCH 04/47] To be reverted --- benchmark/Makefile | 4 ++-- exampleBuild.sh | 2 +- exampleRun.sh | 9 +++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index 05f9e24..c7a2d10 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -27,8 +27,8 @@ SYSTEM = generic GIT_HASH = $(shell git rev-parse --verify HEAD) GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) CC = mpicxx -CFLAGS = -std=c++17 -Wall -O2 -CPPFLAGS = -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" +CFLAGS = -std=c++17 -Wall -g -rdynamic +CPPFLAGS = -DDEBUG_PORTALS -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" LD = $(CC) LDFLAGS = LIBS = diff --git a/exampleBuild.sh b/exampleBuild.sh index 00ac73b..492d10b 100755 --- a/exampleBuild.sh +++ b/exampleBuild.sh @@ -23,7 +23,7 @@ export CPATH=$CPATH:~/.local/include/; mkdir -p install; cd benchmark; make clean -make -j HAVE_TCP=1 HAVE_IBVERBS=1 HAVE_UCP=1 PREFIX=../install install; +make -j HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install; make clean cd ..; # Install linktest-report diff --git a/exampleRun.sh b/exampleRun.sh index 0600c26..fc15465 100755 --- a/exampleRun.sh +++ b/exampleRun.sh @@ -10,13 +10,14 @@ ml GCC ParaStationMPI SIONlib salloc \ ---partition devel \ ---account cstao \ +--partition dp-bxi \ +--reservation maint-bxi \ +--account deepsea \ --nodes 2 \ srun \ --ntasks 4 \ install/linktest \ - --mode mpi \ + --mode portals \ --num-warmup-messages 10 \ --num-messages 100 \ - --size-messages $((16*1024*1024)); \ No newline at end of file + --size-messages $((16)); \ No newline at end of file -- GitLab From 74a13f4befd532a4ef3ea03b9def157014f88b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 15 Jul 2022 17:54:18 +0200 Subject: [PATCH 05/47] Implemented Initialization incl nid/pid to rank mapping --- benchmark/portals4_macros.h | 17 ++++++ benchmark/vcluster_portals.cc | 109 +++++++++++++++++++++++----------- benchmark/vcluster_portals.h | 14 ++--- 3 files changed, 96 insertions(+), 44 deletions(-) create mode 100644 benchmark/portals4_macros.h diff --git a/benchmark/portals4_macros.h b/benchmark/portals4_macros.h new file mode 100644 index 0000000..6175265 --- /dev/null +++ b/benchmark/portals4_macros.h @@ -0,0 +1,17 @@ +#ifndef LINKTEST_PORTALS4MACROS_H +#define LINKTEST_PORTALS4MACROS_H + +#define CHECK_RETURNVAL(x) do { int ret; \ + switch (ret = x) { \ + case PTL_IGNORED: \ + case PTL_OK: break; \ + case PTL_FAIL: fprintf(stderr, "=> %s returned PTL_FAIL (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ + case PTL_NO_SPACE: fprintf(stderr, "=> %s returned PTL_NO_SPACE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ + case PTL_ARG_INVALID: fprintf(stderr, "=> %s returned PTL_ARG_INVALID (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ + case PTL_NO_INIT: fprintf(stderr, "=> %s returned PTL_NO_INIT (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ + case PTL_PT_IN_USE: fprintf(stderr, "=> %s returned PTL_PT_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ + case PTL_IN_USE: fprintf(stderr, "=> %s returned PTL_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ + default: fprintf(stderr, "=> %s returned failcode %i (line %u)\n", #x, ret, (unsigned int)__LINE__); abort(); break; \ + } } while (0) + +#endif //PORTALS4MACROS \ No newline at end of file diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 9068a91..b798479 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -7,54 +7,57 @@ ** See the file COPYRIGHT in the package base directory for details ** ****************************************************************************/ #include "vcluster_portals.h" +#include "portals4_macros.h" +#include "memory.h" #include "error.h" +void VirtualClusterPortals::initPhysicalFromRank() { + ptl_process_t physId; + CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) ); + + #if defined(DEBUG_PORTALS) + info("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d", + rank(), + hostname().c_str(), + physId.phys.nid, + physId.phys.pid); + #endif + + physicalFromRank.resize(size()); + gather(0, physicalFromRank.data(), &physId, 1); + bcast(0, physicalFromRank.data(), size()); + + if(physicalFromRank.at(rank()).phys.nid != physId.phys.nid) fatal("Failed to broadcast physicalFromRank"); + if(physicalFromRank.at(rank()).phys.pid != physId.phys.pid) fatal("Failed to broadcast physicalFromRank"); +} + int VirtualClusterPortals::init() { - helper_ = nullptr; set_helper_pointer(VirtualCluster::factory("tcp")); - if (helper_ == nullptr) { - fatal("Portals helper (VirtualClusterTCP) not constructed"); - return ERROR; - } - auto ret = helper_->init(); - if (ret != SUCCESS) { - fatal("Portals helper (VirtualClusterTCP) failed to initialize"); - return ERROR; - } - + EXEC_NOFAIL(helper_->init()); + if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) { warn("Portals versions other than 4.0 may not be suppported"); } - ret = PTL_OK; - ret = PtlInit(); - ret &= PtlNIInit( + CHECK_RETURNVAL(PtlInit()); + CHECK_RETURNVAL(PtlNIInit( PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README" - PTL_NI_PHYSICAL | PTL_NI_MATCHING, - rank(), + PTL_NI_LOGICAL | PTL_NI_MATCHING, // Logical => using ranks, Matching => using send/recv semantics + PTL_PID_ANY, nullptr,// &mni_limits_desired &mni_limits_actual, - &mni_handle); - ret &= PtlNIInit( - PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README" - PTL_NI_PHYSICAL | PTL_NI_NO_MATCHING, - rank(), - nullptr, // &ni_limits_desired - &nni_limits_actual, - &nni_handle); - if (ret != PTL_OK) { - fatal("Portals failed to initialize"); - return ERROR; - } + &mni_handle)); + + initPhysicalFromRank(); + CHECK_RETURNVAL(PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data())); return SUCCESS; } int VirtualClusterPortals::finalize() { - auto ret = PtlNIFini(nni_handle); - ret &= PtlNIFini(mni_handle); - ret &= PtlFini(); + auto ret = PtlNIFini(mni_handle); + PtlFini(); if (ret != PTL_OK) { fatal("Portals failed to finalize"); return ERROR; @@ -63,9 +66,47 @@ int VirtualClusterPortals::finalize() return SUCCESS; } -int VirtualClusterPortals::kpingpong_send(int other, MemoryBuffer& buf) { - return -1; -} +// int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) +// { +// std::printf("VirtualClusterPortals::kpingpong"); +// // Sender only +// ptl_md_t md; // Memory Descriptor +// ptl_handle_md_t md_handle; +// if (rank() == from) { +// md.start = buf.p(); +// md.length = buf.len(); +// md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; +// md.eq_handle = PTL_EQ_NONE; +// #if defined(DEBUG_PORTALS) +// std::printf("PtlMDBind"); +// #endif +// PtlMDBind(this->mni_handle, nullptr, &md_handle); +// } +// // Receiver only +// ptl_me_t me; // Match List Entry +// ptl_handle_me_t me_handle; +// if (rank() == to) { +// // Allow puts to buf from anyone +// me.start = buf.p(); +// me.length = buf.len(); +// me.uid = PTL_UID_ANY; +// me.match_id.phys.nid = PTL_NID_ANY; +// me.match_id.phys.pid = PTL_PID_ANY; +// me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; +// #if defined(DEBUG_PORTALS) +// std::printf("PtlMEAppend"); +// #endif +// PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle); +// } + +// if (rank() == to) { +// ptl_ct_event_t ct; // event counter +// //ret = PtlCTWait(me.ct_handle, num_msg, &ct); //TODO this is unidirectional by default +// } else if (rank() == from) { +// //PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, rankToNidPid.at(rank()), pt_index, 1, 0, NULL, 0)); +// //PtlCTWait(write_md.ct_handle, 2, &ctc)); +// } +// } VirtualClusterPortals::VirtualClusterPortals() diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index e89f17b..e3eadf3 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -13,7 +13,7 @@ extern "C" { #include <minipmi.h> #include <portals4.h> } - +#include <vector> #include "vcluster.h" // VirtualCluster implementation based on a Portals 4 @@ -57,19 +57,13 @@ public: }; private: - int kpingpong_send(int other, MemoryBuffer& buf); - int kpingpong_recv(int other, MemoryBuffer& buf); - // matching (send/recv) Network Interface (ni) - ptl_ni_limits_t mni_limits_requested; + ptl_ni_limits_t mni_limits_desired; ptl_ni_limits_t mni_limits_actual; ptl_handle_ni_t mni_handle; + std::vector<ptl_process_t> physicalFromRank; - // Non matching (put) Network Interface (ni) - ptl_ni_limits_t nni_limits_requested; - ptl_ni_limits_t nni_limits_actual; - ptl_handle_ni_t nni_handle; - + void initPhysicalFromRank(); }; #endif -- GitLab From 111608eef935312bea0ea25152e84bf4f4abb003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Mon, 18 Jul 2022 14:44:24 +0200 Subject: [PATCH 06/47] Implemented put --- benchmark/vcluster_portals.cc | 116 +++++++++++++++++++++------------- benchmark/vcluster_portals.h | 8 +-- 2 files changed, 75 insertions(+), 49 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index b798479..8254d2c 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -11,7 +11,7 @@ #include "memory.h" #include "error.h" -void VirtualClusterPortals::initPhysicalFromRank() { +std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { ptl_process_t physId; CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) ); @@ -23,12 +23,14 @@ void VirtualClusterPortals::initPhysicalFromRank() { physId.phys.pid); #endif - physicalFromRank.resize(size()); + std::vector<ptl_process_t> physicalFromRank(size()); gather(0, physicalFromRank.data(), &physId, 1); bcast(0, physicalFromRank.data(), size()); if(physicalFromRank.at(rank()).phys.nid != physId.phys.nid) fatal("Failed to broadcast physicalFromRank"); if(physicalFromRank.at(rank()).phys.pid != physId.phys.pid) fatal("Failed to broadcast physicalFromRank"); + + return physicalFromRank; } int VirtualClusterPortals::init() @@ -48,7 +50,7 @@ int VirtualClusterPortals::init() &mni_limits_actual, &mni_handle)); - initPhysicalFromRank(); + auto physicalFromRank = getPhysicalFromRank(); CHECK_RETURNVAL(PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data())); return SUCCESS; @@ -66,47 +68,75 @@ int VirtualClusterPortals::finalize() return SUCCESS; } -// int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) -// { -// std::printf("VirtualClusterPortals::kpingpong"); -// // Sender only -// ptl_md_t md; // Memory Descriptor -// ptl_handle_md_t md_handle; -// if (rank() == from) { -// md.start = buf.p(); -// md.length = buf.len(); -// md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; -// md.eq_handle = PTL_EQ_NONE; -// #if defined(DEBUG_PORTALS) -// std::printf("PtlMDBind"); -// #endif -// PtlMDBind(this->mni_handle, nullptr, &md_handle); -// } -// // Receiver only -// ptl_me_t me; // Match List Entry -// ptl_handle_me_t me_handle; -// if (rank() == to) { -// // Allow puts to buf from anyone -// me.start = buf.p(); -// me.length = buf.len(); -// me.uid = PTL_UID_ANY; -// me.match_id.phys.nid = PTL_NID_ANY; -// me.match_id.phys.pid = PTL_PID_ANY; -// me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; -// #if defined(DEBUG_PORTALS) -// std::printf("PtlMEAppend"); -// #endif -// PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle); -// } +int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) +{ + ptl_pt_index_t pt_index; + CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) ); + + // Sender only + ptl_md_t md; // Memory Descriptor + ptl_handle_md_t md_handle; + if (rank() == from) { + md.start = buf.p(); + md.length = buf.len(); + md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; + md.eq_handle = PTL_EQ_NONE; + #if defined(DEBUG_PORTALS) + std::printf("PtlMDBind"); + #endif + CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md.ct_handle) ); // Activate event counter + CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO + } + // Receiver only + ptl_me_t me; // Match List Entry + ptl_handle_me_t me_handle; + if (rank() == to) { + me.start = buf.p(); + me.length = buf.len(); + me.uid = PTL_UID_ANY; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; + #if defined(DEBUG_PORTALS) + std::printf("PtlMEAppend"); + #endif + CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle) ); // Activate event counter + CHECK_RETURNVAL( PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone + } + barrier(); + ptl_ct_event_t ct; // event counter + if (rank() == to) { + for(auto n = 0; n < num_msg; n++) { + #if defined(DEBUG_PORTALS) + barrier(); + PtlCTGet(md.ct_handle, &ct); + info("Received puts %d. Failed puts %d", ct.success, ct.failure); + #endif + } + CHECK_RETURNVAL( PtlCTWait(me.ct_handle, num_msg, &ct) ); //TODO is this unidirectional by default + } else if (rank() == from) { + ptl_process_t target; + target.rank = to; + const ptl_size_t localOffset = 0; + const ptl_size_t remoteOffset = 0; + const ptl_match_bits_t match_bits = 1; //TDOD why? + const ptl_hdr_data_t header_data = 0; + for(auto n = 1; n <= num_msg; n++) { + #if defined(DEBUG_PORTALS) + info("Sending message number %d", n); + #endif + CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); + #if defined(DEBUG_PORTALS) + barrier(); + PtlCTGet(md.ct_handle, &ct); + info("Successful puts %d. Failed puts %d", ct.success, ct.failure); + #endif + } + CHECK_RETURNVAL( PtlCTWait(md.ct_handle, num_msg, &ct) ); + } -// if (rank() == to) { -// ptl_ct_event_t ct; // event counter -// //ret = PtlCTWait(me.ct_handle, num_msg, &ct); //TODO this is unidirectional by default -// } else if (rank() == from) { -// //PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, rankToNidPid.at(rank()), pt_index, 1, 0, NULL, 0)); -// //PtlCTWait(write_md.ct_handle, 2, &ctc)); -// } -// } + return SUCCESS; +} VirtualClusterPortals::VirtualClusterPortals() diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index e3eadf3..4afa003 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -10,7 +10,6 @@ #define LINKTEST_VCLUSTER_PORTALS_H extern "C" { -#include <minipmi.h> #include <portals4.h> } #include <vector> @@ -27,9 +26,7 @@ public: virtual int finalize() override; virtual int kpingpong(const int from, const int to, MemoryBuffer& buf, - const int num_msg, double* const timing) override { - throw("Not Implemented"); - }; + const int num_msg, double* const timing) override; virtual int kUniDir(const int from, const int to, MemoryBuffer& buf1, MemoryBuffer& buf2, @@ -61,9 +58,8 @@ private: ptl_ni_limits_t mni_limits_desired; ptl_ni_limits_t mni_limits_actual; ptl_handle_ni_t mni_handle; - std::vector<ptl_process_t> physicalFromRank; - void initPhysicalFromRank(); + std::vector<ptl_process_t> getPhysicalFromRank(); }; #endif -- GitLab From aff5aa34f7f8541617c9b534b871d054a9205eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 5 Aug 2022 14:10:16 +0200 Subject: [PATCH 07/47] Added quick test scripts --- build.sh | 12 ++++++++++++ run.sh | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100755 build.sh create mode 100755 run.sh diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..24d12ec --- /dev/null +++ b/build.sh @@ -0,0 +1,12 @@ +# 1. Set-Up Environment +ml Stages/2020 GCC/10.3.0 ParaStationMPI/5.4.9-1 SIONlib/1.7.7; + +# 2. Use locallly installed minipmi, needed since we build with UCX and IBVerbs support +#export LIBRARY_PATH=$LIBRARY_PATH:~/.local/lib/; +#export CPATH=$CPATH:~/.local/include/; + +# 3. Install linktest in folder install +mkdir -p install; +cd benchmark; +make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install clean; +cd ..; \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..621c112 --- /dev/null +++ b/run.sh @@ -0,0 +1,18 @@ +#!/bin/sh +#SBATCH --partition dp-bxi +#SBATCH --reservation maint-bxi +#SBATCH --account deepsea +#SBATCH --nodes 2 +ml Stages/2020 GCC/10.3.0 ParaStationMPI/5.4.9-1 SIONlib/1.7.7; + +export PORTALS4_DEBUG=3 + +srun \ +--label \ +--ntasks 4 \ + install/linktest \ + --mode portals \ + --num-warmup-messages 10 \ + --num-messages 100 \ + --size-messages $((16)) \ + --no-sion-file; \ No newline at end of file -- GitLab From 089dd17ab1eb1d46d0c2562204d731fdecced7a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 5 Aug 2022 15:05:30 +0200 Subject: [PATCH 08/47] switch to on partition install for openmpi --- build.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 24d12ec..6173108 100755 --- a/build.sh +++ b/build.sh @@ -1,10 +1,14 @@ # 1. Set-Up Environment -ml Stages/2020 GCC/10.3.0 ParaStationMPI/5.4.9-1 SIONlib/1.7.7; +ml Stages/2020 GCC/10.3.0 SIONlib/1.7.7; # 2. Use locallly installed minipmi, needed since we build with UCX and IBVerbs support #export LIBRARY_PATH=$LIBRARY_PATH:~/.local/lib/; #export CPATH=$CPATH:~/.local/include/; +source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh +## BXI profile +export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf + # 3. Install linktest in folder install mkdir -p install; cd benchmark; -- GitLab From 5372dcd463b6f9ec3364e4806f9c88908ba4b0dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Mon, 8 Aug 2022 13:56:14 +0200 Subject: [PATCH 09/47] Update to build run config --- build.sh | 11 ----------- loadEnv.sh | 10 ++++++++++ loadPath.sh | 7 +++++++ run.sh | 1 - 4 files changed, 17 insertions(+), 12 deletions(-) create mode 100644 loadEnv.sh create mode 100644 loadPath.sh diff --git a/build.sh b/build.sh index 6173108..745a9e4 100755 --- a/build.sh +++ b/build.sh @@ -1,14 +1,3 @@ -# 1. Set-Up Environment -ml Stages/2020 GCC/10.3.0 SIONlib/1.7.7; - -# 2. Use locallly installed minipmi, needed since we build with UCX and IBVerbs support -#export LIBRARY_PATH=$LIBRARY_PATH:~/.local/lib/; -#export CPATH=$CPATH:~/.local/include/; - -source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh -## BXI profile -export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf - # 3. Install linktest in folder install mkdir -p install; cd benchmark; diff --git a/loadEnv.sh b/loadEnv.sh new file mode 100644 index 0000000..05a4e2f --- /dev/null +++ b/loadEnv.sh @@ -0,0 +1,10 @@ +#!/bin/bash -x + + +## MPI +#source loadPath.sh /opt/mpi/openmpi/4.1.1.2/ +source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh +## BXI profile +export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf +## SIONlib and linktest +source loadPath.sh install \ No newline at end of file diff --git a/loadPath.sh b/loadPath.sh new file mode 100644 index 0000000..332eec7 --- /dev/null +++ b/loadPath.sh @@ -0,0 +1,7 @@ +#!/bin/bash +INSTALL_DIR=$(readlink -mn $1) +echo Loading $INSTALL_DIR +export LIBRARY_PATH=$LIBRARY_PATH:$INSTALL_DIR/lib/; +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$INSTALL_DIR/lib/; +export CPATH=$CPATH:$INSTALL_DIR/include/; +export PATH=$PATH:$INSTALL_DIR/bin; \ No newline at end of file diff --git a/run.sh b/run.sh index 621c112..1fb98c8 100755 --- a/run.sh +++ b/run.sh @@ -3,7 +3,6 @@ #SBATCH --reservation maint-bxi #SBATCH --account deepsea #SBATCH --nodes 2 -ml Stages/2020 GCC/10.3.0 ParaStationMPI/5.4.9-1 SIONlib/1.7.7; export PORTALS4_DEBUG=3 -- GitLab From 9bb35f360a73c18514518e70ce95d6357ad5c50a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Mon, 8 Aug 2022 14:50:51 +0200 Subject: [PATCH 10/47] Added timing measurement --- benchmark/vcluster_portals.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 8254d2c..692efdb 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -10,6 +10,7 @@ #include "portals4_macros.h" #include "memory.h" #include "error.h" +#include "timing.h" std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { ptl_process_t physId; @@ -103,6 +104,7 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle) ); // Activate event counter CHECK_RETURNVAL( PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone } + barrier(); ptl_ct_event_t ct; // event counter if (rank() == to) { @@ -121,9 +123,10 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& const ptl_size_t remoteOffset = 0; const ptl_match_bits_t match_bits = 1; //TDOD why? const ptl_hdr_data_t header_data = 0; + auto sendTime = walltime(); for(auto n = 1; n <= num_msg; n++) { #if defined(DEBUG_PORTALS) - info("Sending message number %d", n); + info("Put from %d to %d #%d", from, to, n); #endif CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); #if defined(DEBUG_PORTALS) @@ -132,9 +135,13 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& info("Successful puts %d. Failed puts %d", ct.success, ct.failure); #endif } + sendTime = walltime() - sendTime; + if(timing) *timing = sendTime; CHECK_RETURNVAL( PtlCTWait(md.ct_handle, num_msg, &ct) ); } + barrier(); + info("Kernel finished"); return SUCCESS; } -- GitLab From 3a19ab5030ee71c2dcde49b7ae14b02b998bff12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Mon, 15 Aug 2022 16:37:06 +0200 Subject: [PATCH 11/47] attach gdb --- run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.sh b/run.sh index 1fb98c8..70a1514 100755 --- a/run.sh +++ b/run.sh @@ -9,7 +9,7 @@ export PORTALS4_DEBUG=3 srun \ --label \ --ntasks 4 \ - install/linktest \ + gdb -ex run -ex bt -ex cont -ex quit --args install/linktest \ --mode portals \ --num-warmup-messages 10 \ --num-messages 100 \ -- GitLab From 5484b9f542d298a0875bdde1fff393a2e8b79757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 29 Sep 2022 16:32:00 +0200 Subject: [PATCH 12/47] Fix DEBUG defines --- build.sh | 2 +- run.sh | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index 745a9e4..363483c 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ # 3. Install linktest in folder install mkdir -p install; cd benchmark; -make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install clean; +make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -DDEBUG_PORTALS=True" PREFIX=../install install; cd ..; \ No newline at end of file diff --git a/run.sh b/run.sh index 70a1514..846f282 100755 --- a/run.sh +++ b/run.sh @@ -8,10 +8,10 @@ export PORTALS4_DEBUG=3 srun \ --label \ ---ntasks 4 \ - gdb -ex run -ex bt -ex cont -ex quit --args install/linktest \ +--ntasks 2 \ + install/linktest \ --mode portals \ - --num-warmup-messages 10 \ - --num-messages 100 \ + --num-warmup-messages 0 \ + --num-messages 3 \ --size-messages $((16)) \ --no-sion-file; \ No newline at end of file -- GitLab From e8bdcb0439833b2041ceb5b547e0bec2cf423489 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 29 Sep 2022 16:52:21 +0200 Subject: [PATCH 13/47] Moved initialization of structs in seperate functions --- benchmark/vcluster_portals.cc | 86 ++++++++++++++--------------------- benchmark/vcluster_portals.h | 27 +++++++++++ 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 692efdb..bbacd9c 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -69,54 +69,43 @@ int VirtualClusterPortals::finalize() return SUCCESS; } +void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) { + CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md_ct_handle) ); + CHECK_RETURNVAL( PtlEQAlloc(mni_handle, num_msg, &md_eq_handle) ); + md.start = buf.p(); + md.length = buf.len(); + md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; + md.eq_handle = md_eq_handle; + md.ct_handle = md_ct_handle; + CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO +} + +void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { + CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) ); + CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me_ct_handle) ); + me.start = buf.p(); + me.length = buf.len(); + me.uid = PTL_UID_ANY; + me.match_id.phys.nid = PTL_NID_ANY; + me.match_id.phys.pid = PTL_PID_ANY; + me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_EVENT_CT_COMM; + me.ct_handle = me_ct_handle; + CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone +} + int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) { - ptl_pt_index_t pt_index; - CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) ); + if (rank() == from) prepareSendStructs(buf, num_msg); + if (rank() == to) prepareRecvStructs(buf); + barrier(); - // Sender only - ptl_md_t md; // Memory Descriptor - ptl_handle_md_t md_handle; - if (rank() == from) { - md.start = buf.p(); - md.length = buf.len(); - md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; - md.eq_handle = PTL_EQ_NONE; - #if defined(DEBUG_PORTALS) - std::printf("PtlMDBind"); - #endif - CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md.ct_handle) ); // Activate event counter - CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO - } - // Receiver only - ptl_me_t me; // Match List Entry - ptl_handle_me_t me_handle; if (rank() == to) { - me.start = buf.p(); - me.length = buf.len(); - me.uid = PTL_UID_ANY; - me.match_id.phys.nid = PTL_NID_ANY; - me.match_id.phys.pid = PTL_PID_ANY; - me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; + PtlCTWait(me_ct_handle, num_msg, &recv_ct); #if defined(DEBUG_PORTALS) - std::printf("PtlMEAppend"); + info("Recv: success %d - failure %d", recv_ct.success, recv_ct.failure); #endif - CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle) ); // Activate event counter - CHECK_RETURNVAL( PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone } - - barrier(); - ptl_ct_event_t ct; // event counter - if (rank() == to) { - for(auto n = 0; n < num_msg; n++) { - #if defined(DEBUG_PORTALS) - barrier(); - PtlCTGet(md.ct_handle, &ct); - info("Received puts %d. Failed puts %d", ct.success, ct.failure); - #endif - } - CHECK_RETURNVAL( PtlCTWait(me.ct_handle, num_msg, &ct) ); //TODO is this unidirectional by default - } else if (rank() == from) { + if (rank() == from) { ptl_process_t target; target.rank = to; const ptl_size_t localOffset = 0; @@ -125,23 +114,16 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& const ptl_hdr_data_t header_data = 0; auto sendTime = walltime(); for(auto n = 1; n <= num_msg; n++) { - #if defined(DEBUG_PORTALS) - info("Put from %d to %d #%d", from, to, n); - #endif CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); - #if defined(DEBUG_PORTALS) - barrier(); - PtlCTGet(md.ct_handle, &ct); - info("Successful puts %d. Failed puts %d", ct.success, ct.failure); - #endif } + PtlCTWait(md_ct_handle, num_msg, &send_ct); sendTime = walltime() - sendTime; + #if defined(DEBUG_PORTALS) + info("Send: success %d - failure %d", send_ct.success, send_ct.failure); + #endif if(timing) *timing = sendTime; - CHECK_RETURNVAL( PtlCTWait(md.ct_handle, num_msg, &ct) ); } - barrier(); - info("Kernel finished"); return SUCCESS; } diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 4afa003..fc34c02 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -59,6 +59,33 @@ private: ptl_ni_limits_t mni_limits_actual; ptl_handle_ni_t mni_handle; + // Sender only + /** @brief Memory Descriptor (MD) */ + ptl_md_t md; + /** @brief MD Handle */ + ptl_handle_md_t md_handle; + /** @brief MD Event Counter Handle */ + ptl_handle_ct_t md_ct_handle; + /** @brief MD Event Queue Handle */ + ptl_handle_eq_t md_eq_handle; + /** @brief Send Counter */ + ptl_ct_event_t send_ct; + void prepareSendStructs(const MemoryBuffer& buf, const int num_msg); + + // Receiver only + /** @brief Portal Table Index */ + ptl_pt_index_t pt_index; + /** @brief Match List Entry (ME) */ + ptl_me_t me; + /** @brief ME Handle */ + ptl_handle_me_t me_handle; + /** @brief ME Event Counter Handle */ + ptl_handle_ct_t me_ct_handle; + /** @brief Receive Counter */ + ptl_ct_event_t recv_ct; + void prepareRecvStructs(const MemoryBuffer& buf); + + std::vector<ptl_process_t> getPhysicalFromRank(); }; -- GitLab From 1ce3d6fa60d18fa8baeb8a99a43751377024a3d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 29 Sep 2022 16:53:03 +0200 Subject: [PATCH 14/47] Added tcp helper debug since i get barrier fails --- benchmark/vcluster_tcp.cc | 8 ++++---- build.sh | 2 +- run.sh | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmark/vcluster_tcp.cc b/benchmark/vcluster_tcp.cc index 2e7d001..3dfeac9 100644 --- a/benchmark/vcluster_tcp.cc +++ b/benchmark/vcluster_tcp.cc @@ -113,12 +113,14 @@ int VirtualClusterTCP::read_tcp_environ_rank_and_size() #if 1 == HAVE_MINIPMI EXEC_IFFAIL(minipmi_get_size(pmi_, &size), error("minipmi_get_size() failed."); return ERROR); - EXEC_IFFAIL(minipmi_get_rank(pmi_, &rank), error("minipmi_get_rank() failed."); return ERROR); rank_ = rank; size_ = size; + #if defined(DEBUG_MINIPMI) + info("PMI rank: %d, PMI size: %d", rank, size); + #endif return SUCCESS; #else EXEC_IFFAIL(read_environ_int(TCP_ENVIRON_PREFIX "SIZE", &size), error("Failed to read environment variable " TCP_ENVIRON_PREFIX "SIZE"); return ERROR); @@ -606,9 +608,7 @@ int VirtualClusterTCP::init() #endif EXEC_NOFAIL(read_tcp_environ()); - EXEC_NOFAIL(read_tcp_environ_rank_and_size()); - EXEC_NOFAIL(connect_to_all()); disable_nagles_algorithm(); @@ -665,7 +665,7 @@ int VirtualClusterTCP::send(int dst, MemoryBuffer& buf) } if (unlikely((dst < 0) || (dst >= size_ ))) { - error("Invalid rank."); + error("Invalid rank: 0 < %d < %d", dst, size_); return ERROR; } diff --git a/build.sh b/build.sh index 363483c..e80c062 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ # 3. Install linktest in folder install mkdir -p install; cd benchmark; -make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -DDEBUG_PORTALS=True" PREFIX=../install install; +make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -D DEBUG_PORTALS=True -D DEBUG_MINIPMI=True" PREFIX=../install install; cd ..; \ No newline at end of file diff --git a/run.sh b/run.sh index 846f282..34472ce 100755 --- a/run.sh +++ b/run.sh @@ -4,7 +4,7 @@ #SBATCH --account deepsea #SBATCH --nodes 2 -export PORTALS4_DEBUG=3 +#export PORTALS4_DEBUG=3 srun \ --label \ -- GitLab From f5f319986cc92d80f5dca9d92e98fd973bde3199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 4 Oct 2022 18:02:25 +0200 Subject: [PATCH 15/47] Fixed call to deleted constructor MemoryBuffer --- benchmark/memory.cc | 2 +- benchmark/memory.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/memory.cc b/benchmark/memory.cc index e67dd17..b76663e 100644 --- a/benchmark/memory.cc +++ b/benchmark/memory.cc @@ -89,7 +89,7 @@ void MemoryBuffer::memory_copy(MemoryBuffer& dst, MemoryBuffer& src){ } MemoryBuffer MemoryBuffer::wrap(void* p, std::size_t len, AddressSpace::ID addr_space_id){ - return MemoryBuffer(p, len, addr_space_id); + return {p, len, addr_space_id}; } void MemoryBuffer::fill(){ diff --git a/benchmark/memory.h b/benchmark/memory.h index aa08943..6774f1a 100644 --- a/benchmark/memory.h +++ b/benchmark/memory.h @@ -165,8 +165,8 @@ class MemoryBuffer{ MemoryBuffer(const MemoryBuffer& other) = delete; MemoryBuffer& operator=(const MemoryBuffer& other) = delete; - MemoryBuffer(MemoryBuffer&& other) = delete; - MemoryBuffer& operator=(const MemoryBuffer&& other) = delete; + MemoryBuffer(MemoryBuffer&& other) = default; + MemoryBuffer& operator=(MemoryBuffer&& other) = default; /* Wrap an existing pointer into a memory buffer. We do not know the allocator * and hence have to ingore it. This is acceptable since the allocator is not -- GitLab From f820ae7cdfcac2be4660bf690164328bb2961399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 5 Oct 2022 17:18:36 +0200 Subject: [PATCH 16/47] Fixed a bug that caused empty impl list. Fix makefile doube definitions Fix CFLAGS overriding does not work Fix circular dependency --- benchmark/Makefile | 90 +++++++++------------------ benchmark/cmdline.cc | 10 +-- benchmark/linktest.cc | 6 +- benchmark/vcluster.cc | 116 +++++++++++++++-------------------- benchmark/vcluster.h | 18 ++---- benchmark/vcluster_portals.h | 14 ++--- build.sh | 2 +- 7 files changed, 96 insertions(+), 160 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index c7a2d10..d10be49 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -8,6 +8,7 @@ #****************************************************************************/ # DEFAULTS +# ========================================= PREFIX = /usr/local/bin USE_POSIX = 1 @@ -28,10 +29,16 @@ GIT_HASH = $(shell git rev-parse --verify HEAD) GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) CC = mpicxx CFLAGS = -std=c++17 -Wall -g -rdynamic -CPPFLAGS = -DDEBUG_PORTALS -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" +CPPFLAGS = -DDEBUG_PORTALS=True \ + -D_GNU_SOURCE \ + -DLINKTEST_LINUX=1 \ + -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \ + -DGIT_HASH=\"$(GIT_HASH)\" + -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" LD = $(CC) LDFLAGS = LIBS = +# ========================================= # Handle Dependencies # ========================================= @@ -56,7 +63,7 @@ ifeq (1, $(HAVE_PORTALS)) HAVE_TCP = 1 endif -ifdef V +ifdef VERBOSE $(info USE_POSIX = $(USE_POSIX)) $(info HAVE_SION = $(HAVE_SION)) $(info HAVE_MPI = $(HAVE_MPI)) @@ -70,58 +77,6 @@ $(info HAVE_PORTALS = $(HAVE_PORTALS)) endif # ========================================= -SYSTEM = generic -GIT_HASH = $(shell git rev-parse --verify HEAD) -GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) -CC = mpicxx -CFLAGS = -std=c++17 -Wall -CPPFLAGS = -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" -LD = $(CC) -LDFLAGS = -LIBS = - -# Use POSIX -ifeq (1, ${USE_POSIX}) - CPPFLAGS += -D__USE_POSIX -endif - -# SIONlib Options -ifeq (1, $(HAVE_SION)) -# CFLAGS += - CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --cflags --mpi) -# LDFLAGS += - LIBS += $(shell sionconfig --64 --gcc --libs --mpi) -endif - -# MINIPMI Options -ifeq (1, $(HAVE_MINIPMI)) -# CFLAGS += - CPPFLAGS += -Iminipmi -DHAVE_MINIPMI=1 - LDFLAGS += -Lminipmi - LIBS += -lminipmi -endif - -# UCP Options -ifeq (1, $(HAVE_UCP)) -# CFLAGS += -# CPPFLAGS += -# LDFLAGS += -# LIBS += -endif - -# CUDA Options -ifeq (1, $(HAVE_CUDA)) - CU = nvcc - CUARCH = - ifeq (, $(CUARCH)) -$(error CUARCH is not set) - endif - CUFLAGS = --gpu-architecture $(CUARCH) -# CFLAGS += - CPPFLAGS += -I$(CUDA)/include -DHAVE_CUDA=1 - LDFLAGS += -L$(CUDA)/lib - LIBS += -lcuda -lcudart -endif # DEFINE EXECUTABLES # ========================================= @@ -150,7 +105,7 @@ ifeq (1, $(HAVE_MINIPMI)) endif endif -ifdef V +ifdef VERBOSE $(info linktest-versions = $(linktest-versions)) endif # ========================================= @@ -181,11 +136,11 @@ linktest-obj = linktest.o \ ifeq (1, $(HAVE_MPI)) linktest-obj += vcluster_mpi.o - CFLAGS += -DHAVE_VCLUSTER_MPI=1 + CPPFLAGS += -DHAVE_VCLUSTER_MPI=1 endif ifeq (1, $(HAVE_TCP)) linktest-obj += vcluster_tcp.o - CFLAGS += -DHAVE_VCLUSTER_TCP=1 + CPPFLAGS += -DHAVE_VCLUSTER_TCP=1 endif ifeq (1, $(HAVE_MINIPMI)) CPPFLAGS += -Iminipmi -DHAVE_MINIPMI=1 @@ -198,22 +153,22 @@ ifeq (1, $(HAVE_MINIPMI)) ibverbs_cq.o \ ibverbs_pd.o \ ibverbs_ctx.o - CFLAGS += -DHAVE_VCLUSTER_IBVERBS=1 -DIBVERBS_SEND_INLINE=1 + CPPFLAGS += -DHAVE_VCLUSTER_IBVERBS=1 -DIBVERBS_SEND_INLINE=1 LIBS += -libverbs endif ifeq (1, $(HAVE_PSM2)) linktest-obj += vcluster_psm2.o - CFLAGS += -DHAVE_VCLUSTER_PSM2=1 + CPPFLAGS += -DHAVE_VCLUSTER_PSM2=1 LIBS += -lpsm2 endif ifeq (1, $(HAVE_UCP)) linktest-obj += vcluster_ucp.o - CFLAGS += -DHAVE_VCLUSTER_UCP=1 + CPPFLAGS += -DHAVE_VCLUSTER_UCP=1 LIBS += -lucp endif ifeq (1, $(HAVE_PORTALS)) linktest-obj += vcluster_portals.o - CFLAGS += -DHAVE_VCLUSTER_PORTALS=1 + CPPFLAGS += -DHAVE_VCLUSTER_PORTALS=1 LDFLAGS += -Lportals LIBS += -lportals endif @@ -224,6 +179,9 @@ ifeq (1, $(HAVE_MINIPMI)) memory_cuda.o CU = nvcc CUARCH = + ifeq (, $(CUARCH)) +$(error CUARCH is not set) + endif CUFLAGS = --gpu-architecture $(CUARCH) -DHAVE_VCLUSTER_CUDA=1 CPPFLAGS += -I$(CUDA)/include -DHAVE_CUDA=1 LDFLAGS += -L$(CUDA)/lib @@ -240,11 +198,19 @@ endif ifeq (1, ${USE_POSIX}) CPPFLAGS += -D__USE_POSIX endif + +ifdef VERBOSE +$(info linktest-obj = $(linktest-obj)) +$(info CFLAGS = $(CFLAGS)) +$(info CPPFLAGS = $(CPPFLAGS)) +$(info LDFLAGS = $(LDFLAGS)) +$(info LIBS = $(LIBS)) +endif # ========================================= # DEFINE MAKE RULES # ========================================= -ifdef V +ifdef VERBOSE Q = else Q = @ diff --git a/benchmark/cmdline.cc b/benchmark/cmdline.cc index 0baea8e..92063a0 100644 --- a/benchmark/cmdline.cc +++ b/benchmark/cmdline.cc @@ -782,11 +782,11 @@ void print_cmdline_usage(const std::string& prog) } std::string modeList = "["; - if(VirtualCluster::impls[0] != nullptr) { - for(auto i=0;VirtualCluster::impls[i];i++) { - modeList = modeList + VirtualCluster::impls[i] + ", "; - } - modeList.erase(modeList.size()-2); + if(VirtualCluster::impls.size() > 0) { + for(const auto& name: VirtualCluster::impls) { + modeList = modeList + name + ", "; + } + modeList.erase(modeList.size()-2); } modeList += "]"; std::fprintf(stderr, diff --git a/benchmark/linktest.cc b/benchmark/linktest.cc index 02d8623..16070dd 100644 --- a/benchmark/linktest.cc +++ b/benchmark/linktest.cc @@ -60,11 +60,7 @@ int main(int argc, char *argv[]){ { /* Determine Virtual Cluster Type */ - const auto name=get_vcluster_impl_name(argv,cmdline_args->virtual_cluster_implementation.c_str()); - if (unlikely(!name)){ - error("Failed to determine virtual-cluster implementation."); - return ERROR; - } + const auto name=VirtualCluster::get_vcluster_impl_name(argv,cmdline_args->virtual_cluster_implementation); /* Create Virtual Cluster */ bench.cl.reset(VirtualCluster::factory(name)); diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc index 735f8a3..bcc8fe5 100644 --- a/benchmark/vcluster.cc +++ b/benchmark/vcluster.cc @@ -396,6 +396,30 @@ int VirtualCluster::linktest_kbipingpong(const int from, const int to, return SUCCESS; } +const std::vector<std::string> VirtualCluster::impls = { + #if 1 == HAVE_VCLUSTER_TCP + "tcp", + #endif + #if 1 == HAVE_VCLUSTER_MPI + "mpi", + #endif + #if 1 == HAVE_VCLUSTER_IBVERBS + "ibverbs", + #endif + #if 1 == HAVE_VCLUSTER_PSM2 + "psm2", + #endif + #if 1 == HAVE_VCLUSTER_UCP + "ucp", + #endif + #if 1 == HAVE_VCLUSTER_PORTALS + VirtualClusterPortals::NAME, + #endif + #if 1 == HAVE_VCLUSTER_CUDA + "cuda", + #endif +}; + VirtualCluster* VirtualCluster::factory(const std::string& name){ #if 1 == HAVE_VCLUSTER_TCP if ("tcp" == name) { @@ -477,80 +501,38 @@ int VirtualClusterWithHelper::recv(int src, MemoryBuffer& buf) return helper_->recv(src, buf); } -const char* VirtualCluster::impls[] = - { - #if 1 == HAVE_VCLUSTER_TCP - "tcp", - #endif - #if 1 == HAVE_VCLUSTER_MPI - "mpi", - #endif - #if 1 == HAVE_VCLUSTER_IBVERBS - "ibverbs", - #endif - #if 1 == HAVE_VCLUSTER_PSM2 - "psm2", - #endif - #if 1 == HAVE_VCLUSTER_UCP - "ucp", - #endif - #if 1 == HAVE_VCLUSTER_PORTALS - VirtualClusterPortals::NAME, - #endif - #if 1 == HAVE_VCLUSTER_CUDA - "cuda", - #endif - nullptr - }; - -const char* get_vcluster_impl_name(char** argv, const char* name) +const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std::string& name) { - /* Check If Virtual-Cluster Implementation Given In 'name' */ - if(name[0]){ //Check if 'name' is a null string - for(auto i=0;VirtualCluster::impls[i];i++){ //Loop over possible virtual cluster implementations - /* Loop termination is handled by the fact that the last 'VirtualCluster::impls' is - * null pointer. - */ - if (!strcmp(VirtualCluster::impls[i],name)){ //Compare command-line implementation to possible implementations - return VirtualCluster::impls[i]; - } + + std::string requestedImpl; + + // check 'name' + if(name != "") { + requestedImpl = name; } - // If this point is reached an unknown/unsupported implementation was encountered. - error("Unknown/Unsupported command-line implementation encountered."); - return(NULL); + + // check executable extension + std::string executableName(argv[0]); + const std::string dot{"."}; + auto pos = executableName.find(dot); + if(pos != std::string::npos) { + requestedImpl = executableName.substr(pos); } - /* Check Executable Extension For Virtual-Cluster Implementation */ - // Determine suffix start - int i=std::strlen(argv[0])-1; - if(likely(i>=0)){ - int suffix_offset=-1; //Integer indicating suffix start - while(true){ - if(unlikely(argv[0][i]=='.')){ - suffix_offset=i; //Identify suffix start. - break; - } - if(i--==0) break; + // Check environment + const char* envName; + if(read_environ_str(LINKTEST_ENVIRON_PREFIX "VCLUSTER_IMPL", &envName) == SUCCESS) { + requestedImpl = {envName}; } - if(likely(suffix_offset>=0)){ //Check if suffix was encountered - // Compare suffix to supported virtual cluster implementations - for(auto i=0;VirtualCluster::impls[i];i++){ //Loop over possible virtual cluster implementations - /* Loop termination is handled by the fact that the last 'VirtualCluster::impls' is - * null pointer. - */ - if (unlikely(!strcmp(VirtualCluster::impls[i],&(argv[0][suffix_offset+1])))){ //Compare suffix to possible implementations - return VirtualCluster::impls[i]; //Return identified virtual-cluster implementation - } + + for(const auto& impl : VirtualCluster::impls) { + if(impl == requestedImpl) { + return impl; } - }else if(unlikely(suffix_offset<-1)){ //This should not happen logically! - error("Internal Error: Unexpected negative executable-name-suffix offset encountered."); - return(NULL); } - } - - /* Check If Environment Specifies Virtual-Cluster Implementation */ - if(unlikely(read_environ_str(LINKTEST_ENVIRON_PREFIX "VCLUSTER_IMPL",&name))) return NULL; - return name; + + error("Internal Error: Unexpected negative executable-name-suffix offset encountered."); + return VirtualCluster::impls.at(0); // unreachable code to supress warning } int VirtualCluster::write_parallel(const linktest_args* args, const std::vector<LinktestStats>& statsVec) diff --git a/benchmark/vcluster.h b/benchmark/vcluster.h index f758b75..b516073 100644 --- a/benchmark/vcluster.h +++ b/benchmark/vcluster.h @@ -16,7 +16,6 @@ #include <string> #include <memory> - struct linktest_args; /* A virtual view of our cluster. The VirtualCluster @@ -275,12 +274,11 @@ public: virtual int write_parallel(const linktest_args* args, const std::vector<LinktestStats>& statsVec); virtual int write_funnelled(const linktest_args* args, const std::vector<LinktestStats>& statsVec); - /* Given the name of the vcluster implementation create an instance. This - * function accesses an internal database to map the name of the implementation - * to a function that creates the instance. In order for this to work, the - * implementation needs to be properly registered (see linktest_vcluster.c). - */ - static const char* impls[]; + /** \brief List of supported transport protocol (build at compile time) */ + static const std::vector<std::string> impls; + /** \return name of the requested transport protocol aka the virtual cluster imlplementation */ + static const std::string& get_vcluster_impl_name(char** argv, const std::string& name); + /** \return pointer to requested VirtualCluster implementation */ static VirtualCluster* factory(const std::string& name); private: @@ -339,12 +337,6 @@ enum vcluster_reduce_op { SUM_DOUBLE }; -/* Get the name of the vcluster implementation to be used. The implementation - * can be chosen by means of argv[0] or an environment variable set by the - * spawner. - */ -const char* get_vcluster_impl_name(char** argv, const char* name); - template<typename T> int VirtualCluster::send(const int dst,const T* const vals,const int len){ auto tmp = MemoryBuffer::wrap<T>(const_cast<T*>(vals), len, AddressSpace::ID::Local); diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index fc34c02..5acafcb 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -22,32 +22,32 @@ class VirtualClusterPortals : public VirtualClusterWithHelper public: inline static const char * NAME = "portals"; VirtualClusterPortals(); - virtual int init() override; - virtual int finalize() override; + int init() override; + int finalize() override; - virtual int kpingpong(const int from, const int to, MemoryBuffer& buf, + int kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) override; - virtual int kUniDir(const int from, const int to, + int kUniDir(const int from, const int to, MemoryBuffer& buf1, MemoryBuffer& buf2, const int num_msg, double* const timing, const bool doBarrier) override { throw("Not Implemented"); }; - virtual int kUniDirMultiBuf(const int from,const int to, + int kUniDirMultiBuf(const int from,const int to, MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, const int num_msg, double* const timing, const bool doBarrier) override { throw("Not Implemented"); }; - virtual int kUniDirLimitedMultiBuf(const int from,const int to, + int kUniDirLimitedMultiBuf(const int from,const int to, MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, const int num_msg, double* const timing, const bool doBarrier) override { throw("Not Implemented"); }; - virtual int kbipingpong(const int from, const int to, + int kbipingpong(const int from, const int to, MemoryBuffer& buf1, MemoryBuffer& buf2, const int num_msg, double* const timing) override { throw("Not Implemented"); diff --git a/build.sh b/build.sh index e80c062..cd2cb08 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ # 3. Install linktest in folder install mkdir -p install; cd benchmark; -make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -D DEBUG_PORTALS=True -D DEBUG_MINIPMI=True" PREFIX=../install install; +make VERBOSE=1 HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install; cd ..; \ No newline at end of file -- GitLab From e4a0fa250ead2e1fac71020dd64200ffcfb2b182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 5 Oct 2022 17:42:59 +0200 Subject: [PATCH 17/47] add sionlib install description --- benchmark/installSIONlib.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 benchmark/installSIONlib.sh diff --git a/benchmark/installSIONlib.sh b/benchmark/installSIONlib.sh new file mode 100644 index 0000000..01e0cb9 --- /dev/null +++ b/benchmark/installSIONlib.sh @@ -0,0 +1,8 @@ +wget http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7 +tar -axf 'download.php?version=1.7.7' +mkdir install +cd sionlib +./configure --prefix=/p/project/deepsea/mueller24/linktest/install --disable-fortran # Check/Change Path +cd build-linux-gomp10-openmpi +make +make install \ No newline at end of file -- GitLab From 04c03fda706bb1eb154c4a03ee4413f60bce9307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 6 Oct 2022 12:10:17 +0200 Subject: [PATCH 18/47] remove verbosity --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index cd2cb08..5c082d1 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,5 @@ # 3. Install linktest in folder install mkdir -p install; cd benchmark; -make VERBOSE=1 HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install; +make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install; cd ..; \ No newline at end of file -- GitLab From 0a857909fbd0b1404316f421040fb6014d38161d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 6 Oct 2022 12:36:33 +0200 Subject: [PATCH 19/47] Merged gitignore --- .gitignore | 11 +++++++++++ benchmark/.gitignore | 10 ---------- 2 files changed, 11 insertions(+), 10 deletions(-) delete mode 100644 benchmark/.gitignore diff --git a/.gitignore b/.gitignore index 78a13cd..ff11029 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,14 @@ *.egg-info install/ **.ipynb_checkpoints/ +# Ignore generated source files +benchmark/cuda_kernels.cc +# Ignore executables +**/linktest +**/linktest.mpi +**/linktest.tcp +**/linktest.cuda +**/linktest.psm2 +**/linktest.ucp +**/linktest.ibverbs +**/linktest.portals diff --git a/benchmark/.gitignore b/benchmark/.gitignore deleted file mode 100644 index fe3166c..0000000 --- a/benchmark/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -# Ignore generated source files -cuda_kernels.cc -# Ignore executables -**/linktest -**/linktest.mpi -**/linktest.tcp -**/linktest.cuda -**/linktest.psm2 -**/linktest.ucp -**/linktest.ibverbs \ No newline at end of file -- GitLab From f88453710d10b5284f992655234f0360487eb503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 6 Oct 2022 17:08:54 +0200 Subject: [PATCH 20/47] Fixed me initialization Implemented testPut for comparison --- benchmark/vcluster_portals.cc | 158 +++++++++++++++++++++++++++------- benchmark/vcluster_portals.h | 13 ++- 2 files changed, 137 insertions(+), 34 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index bbacd9c..c41dc4d 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -11,6 +11,7 @@ #include "memory.h" #include "error.h" #include "timing.h" +#include <assert.h> std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { ptl_process_t physId; @@ -42,8 +43,8 @@ int VirtualClusterPortals::init() if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) { warn("Portals versions other than 4.0 may not be suppported"); } - CHECK_RETURNVAL(PtlInit()); - CHECK_RETURNVAL(PtlNIInit( + CHECK_RETURNVAL( PtlInit() ); + CHECK_RETURNVAL( PtlNIInit( PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README" PTL_NI_LOGICAL | PTL_NI_MATCHING, // Logical => using ranks, Matching => using send/recv semantics PTL_PID_ANY, @@ -52,7 +53,9 @@ int VirtualClusterPortals::init() &mni_handle)); auto physicalFromRank = getPhysicalFromRank(); - CHECK_RETURNVAL(PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data())); + CHECK_RETURNVAL( PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()) ); + const auto DEFAULT_OPTIONS = 0; + CHECK_RETURNVAL( PtlPTAlloc(mni_handle, DEFAULT_OPTIONS, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) ); return SUCCESS; } @@ -70,6 +73,9 @@ int VirtualClusterPortals::finalize() } void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) { + // send_ct.success = 0; + // send_ct.failure = 0; + // CHECK_RETURNVAL( PtlCTSet(md_ct_handle, send_ct) ); CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md_ct_handle) ); CHECK_RETURNVAL( PtlEQAlloc(mni_handle, num_msg, &md_eq_handle) ); md.start = buf.p(); @@ -78,19 +84,129 @@ void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const in md.eq_handle = md_eq_handle; md.ct_handle = md_ct_handle; CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO + + #if defined(DEBUG_PORTALS) + CHECK_RETURNVAL( PtlCTGet(md_ct_handle, &send_ct) ); + info("Send: before success %d - failure %d", recv_ct.success, recv_ct.failure); + #endif } void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { - CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) ); + // recv_ct.success = 0; + // recv_ct.failure = 0; + // CHECK_RETURNVAL( PtlCTSet(me_ct_handle, recv_ct) ); CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me_ct_handle) ); me.start = buf.p(); me.length = buf.len(); me.uid = PTL_UID_ANY; - me.match_id.phys.nid = PTL_NID_ANY; - me.match_id.phys.pid = PTL_PID_ANY; - me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_EVENT_CT_COMM; + me.match_id.rank = PTL_RANK_ANY; + me.match_bits = 1; //TDOD why? + me.ignore_bits = 0; //TDOD why? + me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; me.ct_handle = me_ct_handle; CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone + + #if defined(DEBUG_PORTALS) + CHECK_RETURNVAL( PtlCTGet(me_ct_handle, &recv_ct) ); + info("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure); + #endif +} + +void VirtualClusterPortals::recvMessages(const int num_msg) { + CHECK_RETURNVAL( PtlCTWait(me_ct_handle, num_msg, &recv_ct) ); + #if defined(DEBUG_PORTALS) + info("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure); + #endif +} + +void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing) { + ptl_process_t target; + target.rank = to; + const ptl_size_t localOffset = 0; + const ptl_size_t remoteOffset = 0; + const ptl_match_bits_t match_bits = 1; //TDOD why? + const ptl_hdr_data_t header_data = 0; + auto sendTime = walltime(); + for(auto n = 1; n <= num_msg; n++) { + CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); + } + CHECK_RETURNVAL( PtlCTWait(md_ct_handle, num_msg, &send_ct) ); + sendTime = walltime() - sendTime; + #if defined(DEBUG_PORTALS) + info("Send: after success %d - failure %d", send_ct.success, send_ct.failure); + #endif + if(timing) *timing = sendTime; +} + +int VirtualClusterPortals::testPut() +{ + if(not first) return 100; + first = false; + ptl_handle_ni_t ni_h = this->mni_handle; + ptl_pt_index_t pt_index = this->pt_index; + uint64_t value; + ptl_me_t value_e; + ptl_handle_me_t value_e_handle; + ptl_md_t write_md; + ptl_handle_md_t write_md_handle; + int num_procs = this->size(); + ptl_ct_event_t ctc; + int rank = this->rank(); + int ret; + ptl_process_t *procs; + + /* This test only succeeds if we have more than one rank */ + if (num_procs < 2) return 77; + + info("pt_index=%d",pt_index); + assert(pt_index == 0); + + if (1 == rank) { + value_e.start = &value; + value_e.length = sizeof(uint64_t); + value_e.uid = PTL_UID_ANY; + value_e.match_id.rank = PTL_RANK_ANY; + value_e.match_bits = 1; + value_e.ignore_bits = 0; + value_e.options = (PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM); + CHECK_RETURNVAL(PtlCTAlloc(ni_h, &value_e.ct_handle)); + CHECK_RETURNVAL(PtlMEAppend(ni_h, 0, &value_e, PTL_PRIORITY_LIST, NULL, + &value_e_handle)); + value = 0; + } else if (0 == rank) { + /* set up the landing pad so that I can read others' values */ + write_md.start = &value; + write_md.length = sizeof(uint64_t); + write_md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; + write_md.eq_handle = PTL_EQ_NONE; // i.e. don't queue send events + CHECK_RETURNVAL(PtlCTAlloc(ni_h, &write_md.ct_handle)); + CHECK_RETURNVAL(PtlMDBind(ni_h, &write_md, &write_md_handle)); + + value = 0xdeadbeef; + } + + barrier(); + + /* 0 writes to 1 */ + if (1 == rank) { + /* wait for write to arrive */ + ret = PtlCTWait(value_e.ct_handle, 1, &ctc); + assert(ctc.failure == 0); + assert(value == 0xdeadbeef); + } else if (0 == rank) { + /* write to rank 1 */ + ptl_process_t peer; + peer.rank = 1; + CHECK_RETURNVAL(PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, peer, + pt_index, 1, 0, NULL, 0)); + CHECK_RETURNVAL(PtlCTWait(write_md.ct_handle, 2, &ctc)); + assert(ctc.failure == 0); + } + + barrier(); + + info("testPut succeeded"); + return 0; } int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) @@ -98,33 +214,11 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& if (rank() == from) prepareSendStructs(buf, num_msg); if (rank() == to) prepareRecvStructs(buf); barrier(); - - if (rank() == to) { - PtlCTWait(me_ct_handle, num_msg, &recv_ct); - #if defined(DEBUG_PORTALS) - info("Recv: success %d - failure %d", recv_ct.success, recv_ct.failure); - #endif - } - if (rank() == from) { - ptl_process_t target; - target.rank = to; - const ptl_size_t localOffset = 0; - const ptl_size_t remoteOffset = 0; - const ptl_match_bits_t match_bits = 1; //TDOD why? - const ptl_hdr_data_t header_data = 0; - auto sendTime = walltime(); - for(auto n = 1; n <= num_msg; n++) { - CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); - } - PtlCTWait(md_ct_handle, num_msg, &send_ct); - sendTime = walltime() - sendTime; - #if defined(DEBUG_PORTALS) - info("Send: success %d - failure %d", send_ct.success, send_ct.failure); - #endif - if(timing) *timing = sendTime; - } + if (rank() == from) sendMessages(to, buf, num_msg, timing); + if (rank() == to) recvMessages(num_msg); return SUCCESS; + } diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 5acafcb..9e75163 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -59,6 +59,11 @@ private: ptl_ni_limits_t mni_limits_actual; ptl_handle_ni_t mni_handle; + /** @brief Portal Table (PT) Index */ + ptl_pt_index_t pt_index; + /** @brief PT Event Queue Handle */ + ptl_handle_eq_t pt_eq_handle; + // Sender only /** @brief Memory Descriptor (MD) */ ptl_md_t md; @@ -73,8 +78,6 @@ private: void prepareSendStructs(const MemoryBuffer& buf, const int num_msg); // Receiver only - /** @brief Portal Table Index */ - ptl_pt_index_t pt_index; /** @brief Match List Entry (ME) */ ptl_me_t me; /** @brief ME Handle */ @@ -85,6 +88,12 @@ private: ptl_ct_event_t recv_ct; void prepareRecvStructs(const MemoryBuffer& buf); + void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing); + void recvMessages(const int num_msg); + + bool first = true; + int testPut(); + std::vector<ptl_process_t> getPhysicalFromRank(); }; -- GitLab From d859830098301594f28045593d3681ea4bdc36fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 7 Oct 2022 11:49:48 +0200 Subject: [PATCH 21/47] Fixed succes counter reading assumes start at 0 --- benchmark/vcluster_portals.cc | 33 ++++++++++++++++++--------------- benchmark/vcluster_portals.h | 8 ++++---- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index c41dc4d..6efbe63 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -72,7 +72,7 @@ int VirtualClusterPortals::finalize() return SUCCESS; } -void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) { +ptl_size_t VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) { // send_ct.success = 0; // send_ct.failure = 0; // CHECK_RETURNVAL( PtlCTSet(md_ct_handle, send_ct) ); @@ -84,14 +84,14 @@ void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const in md.eq_handle = md_eq_handle; md.ct_handle = md_ct_handle; CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO - - #if defined(DEBUG_PORTALS) CHECK_RETURNVAL( PtlCTGet(md_ct_handle, &send_ct) ); - info("Send: before success %d - failure %d", recv_ct.success, recv_ct.failure); + #if defined(DEBUG_PORTALS) + info("Send: before success %d - failure %d", send_ct.success, send_ct.failure); #endif + return send_ct.success; } -void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { +ptl_size_t VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { // recv_ct.success = 0; // recv_ct.failure = 0; // CHECK_RETURNVAL( PtlCTSet(me_ct_handle, recv_ct) ); @@ -105,21 +105,21 @@ void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; me.ct_handle = me_ct_handle; CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone - - #if defined(DEBUG_PORTALS) CHECK_RETURNVAL( PtlCTGet(me_ct_handle, &recv_ct) ); + #if defined(DEBUG_PORTALS) info("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure); #endif + return recv_ct.success; } -void VirtualClusterPortals::recvMessages(const int num_msg) { - CHECK_RETURNVAL( PtlCTWait(me_ct_handle, num_msg, &recv_ct) ); +void VirtualClusterPortals::recvMessages(const int num_msg, ptl_size_t start_count) { + CHECK_RETURNVAL( PtlCTWait(me_ct_handle, start_count + num_msg, &recv_ct) ); #if defined(DEBUG_PORTALS) info("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure); #endif } -void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing) { +void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing) { ptl_process_t target; target.rank = to; const ptl_size_t localOffset = 0; @@ -130,7 +130,7 @@ void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const for(auto n = 1; n <= num_msg; n++) { CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); } - CHECK_RETURNVAL( PtlCTWait(md_ct_handle, num_msg, &send_ct) ); + CHECK_RETURNVAL( PtlCTWait(md_ct_handle, start_count + num_msg, &send_ct) ); sendTime = walltime() - sendTime; #if defined(DEBUG_PORTALS) info("Send: after success %d - failure %d", send_ct.success, send_ct.failure); @@ -211,11 +211,14 @@ int VirtualClusterPortals::testPut() int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) { - if (rank() == from) prepareSendStructs(buf, num_msg); - if (rank() == to) prepareRecvStructs(buf); + ptl_size_t success_count; + if (rank() == from) success_count = prepareSendStructs(buf, num_msg); + if (rank() == to) success_count = prepareRecvStructs(buf); barrier(); - if (rank() == from) sendMessages(to, buf, num_msg, timing); - if (rank() == to) recvMessages(num_msg); + if (rank() == from) sendMessages(to, buf, num_msg, success_count, timing); + if (rank() == to) recvMessages(num_msg, success_count); + + info("kpingpong ended"); return SUCCESS; diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 9e75163..7fb6309 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -75,7 +75,7 @@ private: ptl_handle_eq_t md_eq_handle; /** @brief Send Counter */ ptl_ct_event_t send_ct; - void prepareSendStructs(const MemoryBuffer& buf, const int num_msg); + ptl_size_t prepareSendStructs(const MemoryBuffer& buf, const int num_msg); // Receiver only /** @brief Match List Entry (ME) */ @@ -86,10 +86,10 @@ private: ptl_handle_ct_t me_ct_handle; /** @brief Receive Counter */ ptl_ct_event_t recv_ct; - void prepareRecvStructs(const MemoryBuffer& buf); + ptl_size_t prepareRecvStructs(const MemoryBuffer& buf); - void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing); - void recvMessages(const int num_msg); + void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing); + void recvMessages(const int num_msg, ptl_size_t start_count); bool first = true; int testPut(); -- GitLab From 7ba5fcb6e5dbeeafc9fdbca2f0eb0221c9b31d69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 14 Apr 2023 15:22:47 +0200 Subject: [PATCH 22/47] Updated to ParaStationMPI module --- build.sh | 3 ++- loadEnv.sh | 2 +- run.sh | 12 ++++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index 5c082d1..f1726d1 100755 --- a/build.sh +++ b/build.sh @@ -1,5 +1,6 @@ # 3. Install linktest in folder install +ml GCC ParaStationMPI SIONlib mkdir -p install; cd benchmark; -make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install; +make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install; cd ..; \ No newline at end of file diff --git a/loadEnv.sh b/loadEnv.sh index 05a4e2f..0e3a2e1 100644 --- a/loadEnv.sh +++ b/loadEnv.sh @@ -7,4 +7,4 @@ source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh ## BXI profile export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf ## SIONlib and linktest -source loadPath.sh install \ No newline at end of file +source ./loadPath.sh install \ No newline at end of file diff --git a/run.sh b/run.sh index 34472ce..a2e8029 100755 --- a/run.sh +++ b/run.sh @@ -1,8 +1,10 @@ #!/bin/sh #SBATCH --partition dp-bxi -#SBATCH --reservation maint-bxi #SBATCH --account deepsea -#SBATCH --nodes 2 +#SBATCH --nodes 2 +#SBATCH --tasks-per-node 1 + +ml GCC ParaStationMPI SIONlib #export PORTALS4_DEBUG=3 @@ -12,6 +14,8 @@ srun \ install/linktest \ --mode portals \ --num-warmup-messages 0 \ - --num-messages 3 \ - --size-messages $((16)) \ + --num-messages 1 \ + --size-messages 16777216 \ + --serial-tests \ + --num-slowest 0 \ --no-sion-file; \ No newline at end of file -- GitLab From b19b86c75a00b6abbd485e6fff2d7079b315a077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 26 May 2023 12:49:52 +0200 Subject: [PATCH 23/47] Rewrote as in portals-test Implemented kpingpong kpingpong synchronizes before send recv Added slow retest workaround for global barrier in portals kernel Marked future TODOs/Discussions --- benchmark/Makefile | 3 +- benchmark/benchmark.cc | 45 +++---- benchmark/benchmark.h | 2 + benchmark/linktest.cc | 4 +- benchmark/vcluster.cc | 5 +- benchmark/vcluster_portals.cc | 213 ++++++++++++++-------------------- benchmark/vcluster_portals.h | 21 ++-- run.sh | 13 +-- 8 files changed, 128 insertions(+), 178 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index d10be49..df3b16d 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -29,8 +29,7 @@ GIT_HASH = $(shell git rev-parse --verify HEAD) GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) CC = mpicxx CFLAGS = -std=c++17 -Wall -g -rdynamic -CPPFLAGS = -DDEBUG_PORTALS=True \ - -D_GNU_SOURCE \ +CPPFLAGS = -D_GNU_SOURCE \ -DLINKTEST_LINUX=1 \ -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \ -DGIT_HASH=\"$(GIT_HASH)\" diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index d913e12..acaa6e3 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -19,6 +19,7 @@ #include "environ.h" #include "format_units.h" #include "format_print.h" +#include "vcluster_portals.h" #include <cstdlib> #include <cstdio> #include <cstring> @@ -479,38 +480,18 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* int Benchmark::work_pingpong_serial(const int partner, double* const time_per_msg){ for (auto i = 0; i < size(); ++i) { barrier(); - if (i == rank()) { EXEC_NOFAIL(kernel(rank(), partner, time_per_msg, false)); - if ( 0 != i ){ - EXEC_NOFAIL(cl->send(0, &partner , 1)); - EXEC_NOFAIL(cl->send(0, time_per_msg, 1)); - } - } else if (i == partner) { - EXEC_NOFAIL(kernel(partner, rank(), nullptr, false)); - } - - if (rank() == 0){ - int buddy; - double time; - if(likely(0!=i)){ - EXEC_NOFAIL(cl->recv(i, &buddy, 1)); - EXEC_NOFAIL(cl->recv(i, &time , 1)); - }else{ - buddy=partner; - time =*time_per_msg; - } - std::printf("%6d->%6d: %ss (%sB/s) (l=%d)\n", + std::printf("%6d->%6d: %ss (%sB/s)\n", i, - buddy, - UnitPrefix::SI_prefix(time, 12).get(), - UnitPrefix::IEC_prefix(args->len_msg/time, 10).get(), - i + partner, + UnitPrefix::SI_prefix(*time_per_msg, 12).get(), + UnitPrefix::IEC_prefix(args->len_msg / *time_per_msg, 10).get() ); - std::fflush(stdout); + } else if (i == partner) { + EXEC_NOFAIL(kernel(partner, rank(), nullptr, false)); } } - barrier(); return SUCCESS; @@ -533,7 +514,6 @@ int Benchmark::work_pingpong(const int step,double* const min_time,double* const } else { EXEC_IFFAIL(work_pingpong_serial(partner, &time_per_msg), fatal("work_pingpong_serial failed.")); } - stats->accesspattern[partner] = step + 1; // In SION file steps are numbered starting with 1. stats->ptimings[partner] = time_per_msg; @@ -649,14 +629,21 @@ int Benchmark::gather_slow_pairs(struct slow_pair* const sp,const int n){ } int Benchmark::retest_one_slow_pair(const int from,const int to, double* const time){ + debug("Benchmark::retest_one_slow_pair(%d, %d)",from, to); double tv; - barrier(); if ((from == rank()) || (to == rank())) { EXEC_NOFAIL(kernel(from, to, &tv, false)); + } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels + if(cl->nameRef() == VirtualClusterPortals::NAME) { + // 2 per kernel, warup kernel and measuring kernel + barrier(); + barrier(); + barrier(); + barrier(); + } } - barrier(); if (0 != from) { diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h index 2c68f8f..feaabc1 100644 --- a/benchmark/benchmark.h +++ b/benchmark/benchmark.h @@ -15,6 +15,7 @@ #include <random> #include "stats.h" #include "slow_pairs.h" +#include "error.h" #if HAVE_CUDA == 1 #include "gpu_nvidia.h" @@ -38,6 +39,7 @@ namespace linktest{ Benchmark() = default; Benchmark(const Benchmark&) = delete; Benchmark(Benchmark&&) = delete; + ~Benchmark() = default; int main_cmdline(); int benchmark(); // Run the main benchmark [[nodiscard]] int rank() const; diff --git a/benchmark/linktest.cc b/benchmark/linktest.cc index 16070dd..1282211 100644 --- a/benchmark/linktest.cc +++ b/benchmark/linktest.cc @@ -16,6 +16,7 @@ #include "system.h" #include <thread> #include <memory> +#include <iostream> void print_linktest_version() { @@ -90,7 +91,6 @@ int main(int argc, char *argv[]){ error("Failed to execute benchmark."); return ERROR; } - /* Finalize Benchmark */ if (unlikely(bench.cl->finalize())) { error("Failed to finalize communication operations."); @@ -103,5 +103,5 @@ int main(int argc, char *argv[]){ * until the very end. */ - return SUCCESS; + return SUCCESS; } diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc index bcc8fe5..d5a9a29 100644 --- a/benchmark/vcluster.cc +++ b/benchmark/vcluster.cc @@ -241,7 +241,7 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to, const struct linktest_args* const args, double* const time){ int err; - + info("kpingpong from %d to %d", from, to); // Warmup if(args->num_warmup_msg!=0){ //No use doing this if there are no warm-up messages. double dummy; @@ -250,8 +250,9 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to, &dummy); if(unlikely(err))return ERROR; } - + info("kpingpong warmup complete"); err=kpingpong(from,to,buf,args->num_msg,time); + info("kpingpong kernel complete"); if(unlikely(err))return err; if(args->check_buffers){ if(unlikely(buf.check())){ diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 6efbe63..656b915 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -11,6 +11,7 @@ #include "memory.h" #include "error.h" #include "timing.h" +#include "stopwatch.h" #include <assert.h> std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { @@ -18,7 +19,7 @@ std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) ); #if defined(DEBUG_PORTALS) - info("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d", + debug("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d", rank(), hostname().c_str(), physId.phys.nid, @@ -37,7 +38,7 @@ std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { int VirtualClusterPortals::init() { - set_helper_pointer(VirtualCluster::factory("tcp")); + set_helper_pointer(VirtualCluster::factory("mpi")); EXEC_NOFAIL(helper_->init()); if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) { @@ -55,171 +56,135 @@ int VirtualClusterPortals::init() auto physicalFromRank = getPhysicalFromRank(); CHECK_RETURNVAL( PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()) ); const auto DEFAULT_OPTIONS = 0; - CHECK_RETURNVAL( PtlPTAlloc(mni_handle, DEFAULT_OPTIONS, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) ); + CHECK_RETURNVAL( PtlEQAlloc(mni_handle, 1000, &pt_eq_handle) ); + CHECK_RETURNVAL( PtlPTAlloc(mni_handle, DEFAULT_OPTIONS, pt_eq_handle, PTL_PT_ANY, &pt_index) ); return SUCCESS; } int VirtualClusterPortals::finalize() { - auto ret = PtlNIFini(mni_handle); + CHECK_RETURNVAL( PtlPTFree(mni_handle, pt_index) ); + CHECK_RETURNVAL( PtlNIFini(mni_handle) ); PtlFini(); - if (ret != PTL_OK) { - fatal("Portals failed to finalize"); - return ERROR; - } - this->helper_->finalize(); + EXEC_NOFAIL(helper_->finalize()); return SUCCESS; } -ptl_size_t VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) { - // send_ct.success = 0; - // send_ct.failure = 0; - // CHECK_RETURNVAL( PtlCTSet(md_ct_handle, send_ct) ); - CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md_ct_handle) ); - CHECK_RETURNVAL( PtlEQAlloc(mni_handle, num_msg, &md_eq_handle) ); +void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf) { md.start = buf.p(); md.length = buf.len(); - md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; - md.eq_handle = md_eq_handle; - md.ct_handle = md_ct_handle; - CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO - CHECK_RETURNVAL( PtlCTGet(md_ct_handle, &send_ct) ); - #if defined(DEBUG_PORTALS) - info("Send: before success %d - failure %d", send_ct.success, send_ct.failure); - #endif - return send_ct.success; + md.options = PTL_MD_EVENT_CT_ACK; + md.eq_handle = PTL_EQ_NONE; // i.e. don't queue send events + CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md.ct_handle) ); + CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor } -ptl_size_t VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { - // recv_ct.success = 0; - // recv_ct.failure = 0; - // CHECK_RETURNVAL( PtlCTSet(me_ct_handle, recv_ct) ); - CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me_ct_handle) ); +void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { me.start = buf.p(); me.length = buf.len(); me.uid = PTL_UID_ANY; me.match_id.rank = PTL_RANK_ANY; - me.match_bits = 1; //TDOD why? - me.ignore_bits = 0; //TDOD why? - me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM; - me.ct_handle = me_ct_handle; - CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone - CHECK_RETURNVAL( PtlCTGet(me_ct_handle, &recv_ct) ); + me.match_bits = MATCH_BITS; + me.ignore_bits = IGNORE_BITS; + me.options = (PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_COMM_DISABLE ); // React to puts, count communication events, do not generate full communication events + CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle)); + CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle)); + ptl_event_t event; + CHECK_RETURNVAL( PtlEQWait(pt_eq_handle, &event) ); // TODO allow PTL_EQ_DROPPED + if (! (event.type == PTL_EVENT_LINK && event.ni_fail_type == PTL_NI_OK)) { + // TODO Check for overflow/dropped events + error("PtlMEAppend failed"); + } +} + +void VirtualClusterPortals::recvMessages(const int num_msg) { + CHECK_RETURNVAL( PtlCTGet(me.ct_handle, &recv_ct) ); + const ptl_size_t start_count = recv_ct.success; + #if defined(DEBUG_PORTALS) - info("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure); + debug("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure); #endif - return recv_ct.success; -} -void VirtualClusterPortals::recvMessages(const int num_msg, ptl_size_t start_count) { - CHECK_RETURNVAL( PtlCTWait(me_ct_handle, start_count + num_msg, &recv_ct) ); + CHECK_RETURNVAL( PtlCTWait(me.ct_handle, start_count + static_cast<unsigned long>(num_msg), &recv_ct) ); + #if defined(DEBUG_PORTALS) - info("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure); + debug("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure); #endif } -void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing) { +void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg) { ptl_process_t target; target.rank = to; const ptl_size_t localOffset = 0; const ptl_size_t remoteOffset = 0; - const ptl_match_bits_t match_bits = 1; //TDOD why? const ptl_hdr_data_t header_data = 0; - auto sendTime = walltime(); + + #if defined(DEBUG_PORTALS) + CHECK_RETURNVAL( PtlCTGet(md.ct_handle, &send_ct) ); + const ptl_size_t start_count = send_ct.success; + debug("Send: before success %d - failure %d", send_ct.success, send_ct.failure); + #endif + for(auto n = 1; n <= num_msg; n++) { - CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) ); + CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, MATCH_BITS, remoteOffset, nullptr, header_data) ); } - CHECK_RETURNVAL( PtlCTWait(md_ct_handle, start_count + num_msg, &send_ct) ); - sendTime = walltime() - sendTime; + + // TODO Discuss if checking for send error (and handling) makes sense #if defined(DEBUG_PORTALS) - info("Send: after success %d - failure %d", send_ct.success, send_ct.failure); + CHECK_RETURNVAL( PtlCTWait(md.ct_handle, start_count + static_cast<unsigned long>(num_msg), &send_ct) ); + debug("Send: after success %d - failure %d", send_ct.success, send_ct.failure); #endif - if(timing) *timing = sendTime; } -int VirtualClusterPortals::testPut() -{ - if(not first) return 100; - first = false; - ptl_handle_ni_t ni_h = this->mni_handle; - ptl_pt_index_t pt_index = this->pt_index; - uint64_t value; - ptl_me_t value_e; - ptl_handle_me_t value_e_handle; - ptl_md_t write_md; - ptl_handle_md_t write_md_handle; - int num_procs = this->size(); - ptl_ct_event_t ctc; - int rank = this->rank(); - int ret; - ptl_process_t *procs; - - /* This test only succeeds if we have more than one rank */ - if (num_procs < 2) return 77; - - info("pt_index=%d",pt_index); - assert(pt_index == 0); - - if (1 == rank) { - value_e.start = &value; - value_e.length = sizeof(uint64_t); - value_e.uid = PTL_UID_ANY; - value_e.match_id.rank = PTL_RANK_ANY; - value_e.match_bits = 1; - value_e.ignore_bits = 0; - value_e.options = (PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM); - CHECK_RETURNVAL(PtlCTAlloc(ni_h, &value_e.ct_handle)); - CHECK_RETURNVAL(PtlMEAppend(ni_h, 0, &value_e, PTL_PRIORITY_LIST, NULL, - &value_e_handle)); - value = 0; - } else if (0 == rank) { - /* set up the landing pad so that I can read others' values */ - write_md.start = &value; - write_md.length = sizeof(uint64_t); - write_md.options = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK; - write_md.eq_handle = PTL_EQ_NONE; // i.e. don't queue send events - CHECK_RETURNVAL(PtlCTAlloc(ni_h, &write_md.ct_handle)); - CHECK_RETURNVAL(PtlMDBind(ni_h, &write_md, &write_md_handle)); - - value = 0xdeadbeef; - } - - barrier(); - - /* 0 writes to 1 */ - if (1 == rank) { - /* wait for write to arrive */ - ret = PtlCTWait(value_e.ct_handle, 1, &ctc); - assert(ctc.failure == 0); - assert(value == 0xdeadbeef); - } else if (0 == rank) { - /* write to rank 1 */ - ptl_process_t peer; - peer.rank = 1; - CHECK_RETURNVAL(PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, peer, - pt_index, 1, 0, NULL, 0)); - CHECK_RETURNVAL(PtlCTWait(write_md.ct_handle, 2, &ctc)); - assert(ctc.failure == 0); - } - - barrier(); +void VirtualClusterPortals::releaseRecvStructs() { + CHECK_RETURNVAL( PtlMEUnlink(me_handle) ); + CHECK_RETURNVAL( PtlCTFree(me.ct_handle) ); +}; - info("testPut succeeded"); - return 0; -} +void VirtualClusterPortals::releaseSendStructs() { + CHECK_RETURNVAL( PtlMDRelease(md_handle) ); + CHECK_RETURNVAL( PtlCTFree(md.ct_handle) ); +}; int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing) { - ptl_size_t success_count; - if (rank() == from) success_count = prepareSendStructs(buf, num_msg); - if (rank() == to) success_count = prepareRecvStructs(buf); + bool isSender = rank() == from; + bool isReceiver = rank() == to; + + auto watch = Stopwatchfactory::getRankWatch(rank(), from); + debug("kpingpong: prepareSendStructs"); + prepareSendStructs(buf); + debug("kpingpong: prepareRecvStructs"); + prepareRecvStructs(buf); barrier(); - if (rank() == from) sendMessages(to, buf, num_msg, success_count, timing); - if (rank() == to) recvMessages(num_msg, success_count); - info("kpingpong ended"); + if(isSender) { + watch->start(); + debug("kpingpong: send"); + sendMessages(to, buf, num_msg); + debug("kpingpong: recv"); + recvMessages(num_msg); + watch->stop(); + } + if(isReceiver) { + watch->start(); + debug("kpingpong: recv"); + recvMessages(num_msg); + debug("kpingpong: send"); + sendMessages(from, buf, num_msg); + watch->stop(); + } + barrier(); + debug("kpingpong: releaseRecvStructs"); + releaseRecvStructs(); + debug("kpingpong: releaseSendStructs"); + releaseSendStructs(); + if(timing) { + *timing = watch->getDuration().count() / (2*num_msg); + } + return SUCCESS; } diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 7fb6309..abc02c5 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -38,7 +38,7 @@ public: MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, const int num_msg, double* const timing, const bool doBarrier) override { - throw("Not Implemented"); + throw("Not Implemented"); // Use PTL_ME_MANAGE_LOCAL }; int kUniDirLimitedMultiBuf(const int from,const int to, MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, @@ -64,32 +64,31 @@ private: /** @brief PT Event Queue Handle */ ptl_handle_eq_t pt_eq_handle; + const ptl_match_bits_t MATCH_BITS = 1; // TODO when/how to use these? + const ptl_match_bits_t IGNORE_BITS = ~0; // ignore all bits + // Sender only /** @brief Memory Descriptor (MD) */ ptl_md_t md; /** @brief MD Handle */ ptl_handle_md_t md_handle; - /** @brief MD Event Counter Handle */ - ptl_handle_ct_t md_ct_handle; - /** @brief MD Event Queue Handle */ - ptl_handle_eq_t md_eq_handle; /** @brief Send Counter */ ptl_ct_event_t send_ct; - ptl_size_t prepareSendStructs(const MemoryBuffer& buf, const int num_msg); + void prepareSendStructs(const MemoryBuffer& buf); + void releaseSendStructs(); // Receiver only /** @brief Match List Entry (ME) */ ptl_me_t me; /** @brief ME Handle */ ptl_handle_me_t me_handle; - /** @brief ME Event Counter Handle */ - ptl_handle_ct_t me_ct_handle; /** @brief Receive Counter */ ptl_ct_event_t recv_ct; - ptl_size_t prepareRecvStructs(const MemoryBuffer& buf); + void prepareRecvStructs(const MemoryBuffer& buf); + void releaseRecvStructs(); - void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing); - void recvMessages(const int num_msg, ptl_size_t start_count); + void sendMessages(const int to, MemoryBuffer& buf, const int num_msg); + void recvMessages(const int num_msg); bool first = true; int testPut(); diff --git a/run.sh b/run.sh index a2e8029..bb7818c 100755 --- a/run.sh +++ b/run.sh @@ -1,21 +1,18 @@ #!/bin/sh #SBATCH --partition dp-bxi #SBATCH --account deepsea -#SBATCH --nodes 2 +#SBATCH --nodes 4 #SBATCH --tasks-per-node 1 ml GCC ParaStationMPI SIONlib #export PORTALS4_DEBUG=3 -srun \ ---label \ ---ntasks 2 \ - install/linktest \ +srun install/linktest \ --mode portals \ - --num-warmup-messages 0 \ - --num-messages 1 \ + --num-warmup-messages 3 \ + --num-messages 100 \ --size-messages 16777216 \ --serial-tests \ - --num-slowest 0 \ + --num-slowest 1 \ --no-sion-file; \ No newline at end of file -- GitLab From 7255cb00d4400d1005177217e8ee1733a8b81594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 21 Jun 2023 11:57:50 +0200 Subject: [PATCH 24/47] Added report level control --- benchmark/error.cc | 20 ++++++++++++++++---- benchmark/error.h | 11 +++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/benchmark/error.cc b/benchmark/error.cc index aaaae93..8915d20 100644 --- a/benchmark/error.cc +++ b/benchmark/error.cc @@ -28,54 +28,66 @@ static void report(const char* prefix, const char* file, void linktest_fatal(const char* file, const char* func, long line, const char* fmt, ...) { + #if REPORT_LEVEL >= REPORT_FATAL va_list vl; va_start(vl, fmt); report("fatal: ", file, func, line, fmt, vl); va_end(vl); - std::fflush(NULL); + std::fflush(nullptr); + #endif std::terminate(); } void linktest_error(const char* file, const char* func, long line, const char* fmt, ...) { + #if REPORT_LEVEL >= REPORT_ERROR va_list vl; va_start(vl, fmt); report("error: ", file, func, line, fmt, vl); va_end(vl); - std::fflush(NULL); + std::fflush(nullptr); + #endif } void linktest_warn(const char* file, const char* func, long line, const char* fmt, ...) { + #if REPORT_LEVEL >= REPORT_WARN va_list vl; va_start(vl, fmt); report("warning: ", file, func, line, fmt, vl); va_end(vl); - std::fflush(NULL); + std::fflush(nullptr); + #endif } void linktest_info(const char* file, const char* func, long line, const char* fmt, ...) { + #if REPORT_LEVEL >= REPORT_INFO va_list vl; va_start(vl, fmt); report("info: ", file, func, line, fmt, vl); va_end(vl); + + std::fflush(nullptr); + #endif } void linktest_debug(const char* file, const char* func, long line, const char* fmt, ...) { + #if REPORT_LEVEL >= REPORT_DEBUG va_list vl; va_start(vl, fmt); report("debug: ", file, func, line, fmt, vl); va_end(vl); - std::fflush(NULL); + std::fflush(nullptr); + #endif } \ No newline at end of file diff --git a/benchmark/error.h b/benchmark/error.h index 92160ac..2807e34 100644 --- a/benchmark/error.h +++ b/benchmark/error.h @@ -9,6 +9,17 @@ #ifndef LINKTEST_ERROR_H #define LINKTEST_ERROR_H +#define REPORT_NONE 0 +#define REPORT_FATAL 1 +#define REPORT_ERROR 2 +#define REPORT_WARN 3 +#define REPORT_INFO 4 +#define REPORT_DEBUG 5 + +#ifndef REPORT_LEVEL +#define REPORT_LEVEL REPORT_WARN +#endif + constexpr int SUCCESS = 0; constexpr int ERROR = 1; -- GitLab From 45bbc3726fc6baed77fd07560cdbf97b5c501728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 22 Jun 2023 13:16:58 +0200 Subject: [PATCH 25/47] Added DEBUG_BARRIER --- benchmark/Makefile | 3 ++- benchmark/vcluster.cc | 4 +--- benchmark/vcluster_helper.cc | 7 ++++++- benchmark/vcluster_mpi.cc | 7 ++++++- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index df3b16d..8e8e07b 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -29,7 +29,8 @@ GIT_HASH = $(shell git rev-parse --verify HEAD) GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) CC = mpicxx CFLAGS = -std=c++17 -Wall -g -rdynamic -CPPFLAGS = -D_GNU_SOURCE \ +CPPFLAGS = -DREPORT_LEVEL=5 \ + -D_GNU_SOURCE \ -DLINKTEST_LINUX=1 \ -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \ -DGIT_HASH=\"$(GIT_HASH)\" diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc index d5a9a29..b334bde 100644 --- a/benchmark/vcluster.cc +++ b/benchmark/vcluster.cc @@ -241,7 +241,7 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to, const struct linktest_args* const args, double* const time){ int err; - info("kpingpong from %d to %d", from, to); + debug("VirtualCluster::linktest_kpingpong from %d to %d", from, to); // Warmup if(args->num_warmup_msg!=0){ //No use doing this if there are no warm-up messages. double dummy; @@ -250,9 +250,7 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to, &dummy); if(unlikely(err))return ERROR; } - info("kpingpong warmup complete"); err=kpingpong(from,to,buf,args->num_msg,time); - info("kpingpong kernel complete"); if(unlikely(err))return err; if(args->check_buffers){ if(unlikely(buf.check())){ diff --git a/benchmark/vcluster_helper.cc b/benchmark/vcluster_helper.cc index c284fa9..28bd22e 100644 --- a/benchmark/vcluster_helper.cc +++ b/benchmark/vcluster_helper.cc @@ -32,11 +32,16 @@ std::string determineHostname(){ int determineCPUID(){ return (std::int32_t)sched_getcpu(); } - +#ifdef DEBUG_BARRIER +static int barrierCounter=1; +#endif int vcluster_helper_barrier(VirtualCluster* cl){ /* We do not actually send data but we still need to have * a non-NULL buffer pointer */ + #ifdef DEBUG_BARRIER + info("vcluster_helper_barrier %d", barrierCounter++); + #endif char sp = 0; MemoryBuffer buf = MemoryBuffer::wrap<char>(&sp, 0, AddressSpace::ID::Local); diff --git a/benchmark/vcluster_mpi.cc b/benchmark/vcluster_mpi.cc index b481925..e7efc78 100644 --- a/benchmark/vcluster_mpi.cc +++ b/benchmark/vcluster_mpi.cc @@ -133,9 +133,14 @@ int VirtualClusterMPI::recv(int src, MemoryBuffer& buf) src, 0, world_, MPI_STATUS_IGNORE)); } - +#ifdef DEBUG_BARRIER +static int counter = 1; +#endif int VirtualClusterMPI::barrier() { + #ifdef DEBUG_BARRIER + debug("VirtualClusterMPI::barrier %d",counter++); + #endif return _mpi_(MPI_Barrier(world_)); } -- GitLab From 8fe76cceaa6266c4dd477c95268177d6814dd6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 22 Jun 2023 14:36:56 +0200 Subject: [PATCH 26/47] Fix some weird indentations --- benchmark/cmdline.cc | 2 +- benchmark/linktest.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/benchmark/cmdline.cc b/benchmark/cmdline.cc index 92063a0..9b51db3 100644 --- a/benchmark/cmdline.cc +++ b/benchmark/cmdline.cc @@ -467,7 +467,7 @@ static bool special_cmdline_args(const std::string& prog, const std::vector<std: } } - return false; + return false; } static bool arg_match(const std::string& arg, const Argument& argdef){ diff --git a/benchmark/linktest.cc b/benchmark/linktest.cc index 1282211..1968a08 100644 --- a/benchmark/linktest.cc +++ b/benchmark/linktest.cc @@ -20,10 +20,10 @@ void print_linktest_version() { - std::fprintf(stderr, "LinkTest (version %d.%d.%d)\n", - VERSION_MAJOR, - VERSION_MINOR, - VERSION_PATCH); + std::fprintf(stderr, "LinkTest (version %d.%d.%d)\n", + VERSION_MAJOR, + VERSION_MINOR, + VERSION_PATCH); } /* Errors are propagated up the backtrace as far as possible until -- GitLab From af388a956fa061e2fbd2f93f93cc1b559414a6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 27 Jun 2023 13:53:05 +0200 Subject: [PATCH 27/47] Turned of debug messages --- benchmark/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index 8e8e07b..df3b16d 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -29,8 +29,7 @@ GIT_HASH = $(shell git rev-parse --verify HEAD) GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) CC = mpicxx CFLAGS = -std=c++17 -Wall -g -rdynamic -CPPFLAGS = -DREPORT_LEVEL=5 \ - -D_GNU_SOURCE \ +CPPFLAGS = -D_GNU_SOURCE \ -DLINKTEST_LINUX=1 \ -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \ -DGIT_HASH=\"$(GIT_HASH)\" -- GitLab From e0da49d0c378328fff4c5e6b997bad736b9e3dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 7 Jul 2023 16:59:00 +0200 Subject: [PATCH 28/47] Implemented kUnidir Done Implemented kbipingpong WIP --- benchmark/benchmark.cc | 2 +- benchmark/vcluster_portals.cc | 90 +++++++++++++++++++++++++++++++++++ benchmark/vcluster_portals.h | 9 ++-- 3 files changed, 94 insertions(+), 7 deletions(-) diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index acaa6e3..182c1a5 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -637,7 +637,7 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t EXEC_NOFAIL(kernel(from, to, &tv, false)); } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels if(cl->nameRef() == VirtualClusterPortals::NAME) { - // 2 per kernel, warup kernel and measuring kernel + // 2 per kernel, warmup kernel and measuring kernel barrier(); barrier(); barrier(); diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 656b915..7ce01fe 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -185,6 +185,96 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& *timing = watch->getDuration().count() / (2*num_msg); } + return SUCCESS; +} + +int VirtualClusterPortals::kUniDir( + const int from, const int to, + MemoryBuffer& buf1, MemoryBuffer& buf2, + const int num_msg, double* const timing, + const bool doBarrier) +{ + bool isSender = rank() == from; + bool isReceiver = rank() == to; + + auto watch = Stopwatchfactory::getRankWatch(rank(), from); + debug("kUnidir: prepareSendStructs"); + prepareSendStructs(buf1); + debug("kUnidir: prepareRecvStructs"); + prepareRecvStructs(buf2); + barrier(); + + if(isSender) { + watch->start(); + debug("kUnidir: send"); + sendMessages(to, buf1, num_msg); + debug("kUnidir: recv"); + recvMessages(1); + watch->stop(); + } + if(isReceiver) { + watch->start(); + debug("kUnidir: recv"); + recvMessages(num_msg); + debug("kUnidir: send"); + sendMessages(from, buf1, 1); + watch->stop(); + } + + barrier(); + debug("kUnidir: releaseRecvStructs"); + releaseRecvStructs(); + debug("kUnidir: releaseSendStructs"); + releaseSendStructs(); + if(timing) { + *timing = watch->getDuration().count() / num_msg; + } + + return SUCCESS; + +} + +int VirtualClusterPortals::kbipingpong( + const int from, const int to, + MemoryBuffer& buf1, MemoryBuffer& buf2, + const int num_msg, double* const timing) +{ + bool isSender = rank() == from; + bool isReceiver = rank() == to; + + auto watch = Stopwatchfactory::getRankWatch(rank(), from); + debug("kbipingpong: prepareSendStructs"); + prepareSendStructs(buf1); + debug("kbipingpong: prepareRecvStructs"); + prepareRecvStructs(buf2); + barrier(); + + if(isSender) { + watch->start(); + debug("kbipingpong: send"); + sendMessages(to, buf1, num_msg); + debug("kbipingpong: recv"); + recvMessages(num_msg); + watch->stop(); + } + if(isReceiver) { + watch->start(); + debug("kbipingpong: recv"); + sendMessages(from, buf1, num_msg); + debug("kbipingpong: send"); + recvMessages(num_msg); + watch->stop(); + } + + barrier(); + debug("kbipingpong: releaseRecvStructs"); + releaseRecvStructs(); + debug("kbipingpong: releaseSendStructs"); + releaseSendStructs(); + if(timing) { + *timing = watch->getDuration().count() / (2.0 * num_msg); + } + return SUCCESS; } diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index abc02c5..f58bbb2 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -31,9 +31,8 @@ public: int kUniDir(const int from, const int to, MemoryBuffer& buf1, MemoryBuffer& buf2, const int num_msg, double* const timing, - const bool doBarrier) override { - throw("Not Implemented"); - }; + const bool doBarrier) override; + int kUniDirMultiBuf(const int from,const int to, MemoryBufferMulti& buf_multi, MemoryBuffer& buf2, const int num_msg, double* const timing, @@ -49,9 +48,7 @@ public: int kbipingpong(const int from, const int to, MemoryBuffer& buf1, MemoryBuffer& buf2, - const int num_msg, double* const timing) override { - throw("Not Implemented"); - }; + const int num_msg, double* const timing) override; private: // matching (send/recv) Network Interface (ni) -- GitLab From c4179376c7bf779380e86d73eec3c53512467ea6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 11 Jul 2023 15:14:55 +0200 Subject: [PATCH 29/47] Fixed synchronization missing in pingpong_serial --- benchmark/benchmark.cc | 27 ++++++++++++++++++++++++++- benchmark/output_sion.cc | 5 ++++- benchmark/vcluster_helper.cc | 2 +- benchmark/vcluster_portals.cc | 25 ++++++------------------- 4 files changed, 37 insertions(+), 22 deletions(-) diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index 182c1a5..8f9d85f 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -100,7 +100,7 @@ int Benchmark::kernel(const int from, const int to, double* const time_per_msg, return cl->linktest_kbipingpong(from, to, *buf1, *buf2, args, time_per_msg); } else if (args->do_unidir) { if (args->use_multi_buf) { - auto M=((args->num_msg>args->num_warmup_msg)?args->num_msg:args->num_warmup_msg); + auto M = std::max(args->num_msg, args->num_warmup_msg); if(args->num_multi_buf==M){ return cl->linktest_kUniDirMultiBuf(from, to, *buf_multi, *buf2, args, time_per_msg, doBarrier); }else{ @@ -398,6 +398,7 @@ int Benchmark::printIterationResults(const int iter){ } } + debug("Benchmark::printIterationResults->barrier()"); EXEC_NOFAIL(cl->barrier()); return SUCCESS; @@ -438,6 +439,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* auto from = (sign < 0) ? partner : rank(); auto to = (sign < 0) ? rank() : partner; + debug("Benchmark::work_pingpong_parallel 1->barrier()"); barrier(); #ifdef DEBUG_KERNEL_SYNCHRONIZATION std::unique_ptr<StopwatchI> rootWatch = Stopwatchfactory::getRootWatch(rank()); @@ -449,6 +451,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* rootWatch->stop(); tBeforeBarrier=rootWatch->getDuration(); #endif + debug("Benchmark::work_pingpong_parallel 2->barrier()"); barrier(); #ifdef DEBUG_KERNEL_SYNCHRONIZATION rootWatch->stop(); @@ -462,6 +465,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* rootWatch->stop(); tBeforeBarrier=rootWatch->getDuration(); #endif + debug("Benchmark::work_pingpong_parallel 3->barrier()"); barrier(); #ifdef DEBUG_KERNEL_SYNCHRONIZATION rootWatch->stop(); @@ -479,6 +483,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* */ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_msg){ for (auto i = 0; i < size(); ++i) { + debug("Benchmark::work_pingpong_serial 1->barrier()"); barrier(); if (i == rank()) { EXEC_NOFAIL(kernel(rank(), partner, time_per_msg, false)); @@ -490,8 +495,21 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms ); } else if (i == partner) { EXEC_NOFAIL(kernel(partner, rank(), nullptr, false)); + } else { + if(cl->nameRef() == VirtualClusterPortals::NAME) { + // 2 per kernel, warmup kernel and measuring kernel + debug("Benchmark::work_pingpong_serial 2->barrier()"); + barrier(); + debug("Benchmark::work_pingpong_serial 3->barrier()"); + barrier(); + debug("Benchmark::work_pingpong_serial 4->barrier()"); + barrier(); + debug("Benchmark::work_pingpong_serial 5->barrier()"); + barrier(); + } } } + debug("Benchmark::work_pingpong_serial 6->barrier()"); barrier(); return SUCCESS; @@ -549,6 +567,7 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain double runningSumBandwidth=0.0; /* Execute all-to-all test if desired */ + debug("Benchmark::run_iteration 1->barrier()"); barrier(); if (args->do_alltoall){ EXEC_NOFAIL(work_alltoall()); @@ -570,6 +589,7 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain if(!args->do_serial) std::printf(fmt[!!args->do_serial], nDigits, step+1); //Start print out early so user knows step has started std::fflush(stdout); const double stepStartTime = walltime(); + debug("Benchmark::run_iteration 2->barrier()"); barrier(); EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth)); *tWork += (walltime() - stepStartTime); @@ -579,12 +599,14 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain } }else{ for (unsigned int step: stepPermutation) { + debug("Benchmark::run_iteration 3->barrier()"); barrier(); EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth)); } } /* Execute all-to-all test if desired */ + debug("Benchmark::run_iteration 4->barrier()"); barrier(); if (args->do_alltoall){ EXEC_NOFAIL(work_alltoall()); @@ -638,12 +660,14 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels if(cl->nameRef() == VirtualClusterPortals::NAME) { // 2 per kernel, warmup kernel and measuring kernel + debug("Benchmark::retest_one_slow_pair 1,2,3,4->barrier()"); barrier(); barrier(); barrier(); barrier(); } } + debug("Benchmark::retest_one_slow_pair 5->barrier()"); barrier(); if (0 != from) { @@ -969,6 +993,7 @@ int Benchmark::benchmark(){ prepareBuffers(); rootWatch->start(); + debug("Benchmark::benchmark->barrier()"); cl->barrier(); rootWatch->stop(); diff --git a/benchmark/output_sion.cc b/benchmark/output_sion.cc index f671e1f..0748a50 100644 --- a/benchmark/output_sion.cc +++ b/benchmark/output_sion.cc @@ -199,7 +199,7 @@ static int linktest_output_sion_funnelled_root(VirtualCluster* cl, return ERROR; } } - + debug("linktest_output_sion_funnelled_root->barrier"); EXEC_NOFAIL(cl->barrier()); printTimingIfRoot(cl->rank(), "[sioncollect]", std::chrono::duration<double>(walltime() - begin)); @@ -326,11 +326,13 @@ int linktest_output_sion_parallel(VirtualCluster* cl, }; auto sion_api = create_and_register_api(args->virtual_cluster_implementation); + debug("linktest_output_sion_parallel 1->barrier"); cl->barrier(); char* buffer; long long sz; EXEC_IFFAIL(linktest_output_sion_collect_local_data(cl, args, statsVec, &buffer, &sz), error("linktest_output_sion_collect_local_data failed."); return ERROR); + debug("linktest_output_sion_parallel 2->barrier"); cl->barrier(); auto filename = args->output.c_str(); @@ -362,6 +364,7 @@ int linktest_output_sion_parallel(VirtualCluster* cl, &fp, //fileptr &newfname //newfname ); + debug("linktest_output_sion_parallel 3->barrier"); cl->barrier(); rootWatch->stop(); printTiming("[sionopen]"); diff --git a/benchmark/vcluster_helper.cc b/benchmark/vcluster_helper.cc index 28bd22e..b977cf1 100644 --- a/benchmark/vcluster_helper.cc +++ b/benchmark/vcluster_helper.cc @@ -40,7 +40,7 @@ int vcluster_helper_barrier(VirtualCluster* cl){ * a non-NULL buffer pointer */ #ifdef DEBUG_BARRIER - info("vcluster_helper_barrier %d", barrierCounter++); + debug("vcluster_helper_barrier %d", barrierCounter++); #endif char sp = 0; MemoryBuffer buf = MemoryBuffer::wrap<char>(&sp, 0, AddressSpace::ID::Local); diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 7ce01fe..0f6cdd5 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -239,9 +239,6 @@ int VirtualClusterPortals::kbipingpong( MemoryBuffer& buf1, MemoryBuffer& buf2, const int num_msg, double* const timing) { - bool isSender = rank() == from; - bool isReceiver = rank() == to; - auto watch = Stopwatchfactory::getRankWatch(rank(), from); debug("kbipingpong: prepareSendStructs"); prepareSendStructs(buf1); @@ -249,22 +246,12 @@ int VirtualClusterPortals::kbipingpong( prepareRecvStructs(buf2); barrier(); - if(isSender) { - watch->start(); - debug("kbipingpong: send"); - sendMessages(to, buf1, num_msg); - debug("kbipingpong: recv"); - recvMessages(num_msg); - watch->stop(); - } - if(isReceiver) { - watch->start(); - debug("kbipingpong: recv"); - sendMessages(from, buf1, num_msg); - debug("kbipingpong: send"); - recvMessages(num_msg); - watch->stop(); - } + watch->start(); + debug("kbipingpong: send"); + sendMessages(from, buf1, num_msg); + debug("kbipingpong: recv"); + recvMessages(num_msg); + watch->stop(); barrier(); debug("kbipingpong: releaseRecvStructs"); -- GitLab From 52f38e1d7a0a0c9ead6b96eb92adcd05e37602b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 14 Jul 2023 00:35:18 +0200 Subject: [PATCH 30/47] Fixed initial counter value for bidir Moved all debugs inside function calls Removed DEBUG_PORTALS --- benchmark/vcluster_portals.cc | 136 +++++++++++++++++----------------- benchmark/vcluster_portals.h | 20 +++-- run.sh | 20 +++++ 3 files changed, 102 insertions(+), 74 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index 0f6cdd5..d5f494f 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -18,13 +18,11 @@ std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() { ptl_process_t physId; CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) ); - #if defined(DEBUG_PORTALS) debug("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d", rank(), hostname().c_str(), physId.phys.nid, physId.phys.pid); - #endif std::vector<ptl_process_t> physicalFromRank(size()); gather(0, physicalFromRank.data(), &physId, 1); @@ -64,6 +62,7 @@ int VirtualClusterPortals::init() int VirtualClusterPortals::finalize() { + debug("VirtualClusterPortals::finalize()"); CHECK_RETURNVAL( PtlPTFree(mni_handle, pt_index) ); CHECK_RETURNVAL( PtlNIFini(mni_handle) ); PtlFini(); @@ -72,6 +71,7 @@ int VirtualClusterPortals::finalize() } void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf) { + debug("VirtualClusterPortals::prepareSendStructs(%p)", buf.p()); md.start = buf.p(); md.length = buf.len(); md.options = PTL_MD_EVENT_CT_ACK; @@ -81,6 +81,7 @@ void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf) { } void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { + debug("VirtualClusterPortals::prepareRecvStructs(%p)", buf.p()); me.start = buf.p(); me.length = buf.len(); me.uid = PTL_UID_ANY; @@ -98,51 +99,53 @@ void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) { } } -void VirtualClusterPortals::recvMessages(const int num_msg) { - CHECK_RETURNVAL( PtlCTGet(me.ct_handle, &recv_ct) ); - const ptl_size_t start_count = recv_ct.success; - - #if defined(DEBUG_PORTALS) - debug("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure); - #endif +ptl_size_t VirtualClusterPortals::getSendCounter() { + debug("VirtualClusterPortals::getSendCounter()"); + CHECK_RETURNVAL( PtlCTGet(md.ct_handle, &send_ct) ); + debug("Send (MD): success %d - failure %d", send_ct.success, send_ct.failure); + if(send_ct.failure > 0) { + error("Failed operation on MD"); + } + return send_ct.success; +} - CHECK_RETURNVAL( PtlCTWait(me.ct_handle, start_count + static_cast<unsigned long>(num_msg), &recv_ct) ); +ptl_size_t VirtualClusterPortals::getRecvCounter() { + debug("VirtualClusterPortals::getRecvCounter()"); + CHECK_RETURNVAL( PtlCTGet(me.ct_handle, &recv_ct) ); + debug("Recv (ME): success %d - failure %d", recv_ct.success, recv_ct.failure); + if(recv_ct.failure > 0) { + error("Failed operation on ME"); + } + return recv_ct.success; +} - #if defined(DEBUG_PORTALS) - debug("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure); - #endif +void VirtualClusterPortals::recvMessages(const unsigned long num_msg, const unsigned long counter_start) { + debug("VirtualClusterPortals::recvMessages(%lu, %lu)", num_msg, counter_start); + CHECK_RETURNVAL( PtlCTWait(me.ct_handle, counter_start + num_msg, &recv_ct) ); } -void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg) { - ptl_process_t target; - target.rank = to; +void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const unsigned long num_msg, const unsigned long counter_start) { + debug("VirtualClusterPortals::sendMessages(%d, %p, %lu, %lu)", to, num_msg, buf.p() , counter_start); const ptl_size_t localOffset = 0; const ptl_size_t remoteOffset = 0; const ptl_hdr_data_t header_data = 0; - - #if defined(DEBUG_PORTALS) - CHECK_RETURNVAL( PtlCTGet(md.ct_handle, &send_ct) ); - const ptl_size_t start_count = send_ct.success; - debug("Send: before success %d - failure %d", send_ct.success, send_ct.failure); - #endif + ptl_process_t target; + target.rank = to; - for(auto n = 1; n <= num_msg; n++) { + for(unsigned long n = 1; n <= num_msg; n++) { CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, MATCH_BITS, remoteOffset, nullptr, header_data) ); } - - // TODO Discuss if checking for send error (and handling) makes sense - #if defined(DEBUG_PORTALS) - CHECK_RETURNVAL( PtlCTWait(md.ct_handle, start_count + static_cast<unsigned long>(num_msg), &send_ct) ); - debug("Send: after success %d - failure %d", send_ct.success, send_ct.failure); - #endif + CHECK_RETURNVAL( PtlCTWait(md.ct_handle, counter_start + num_msg, &send_ct) ); } void VirtualClusterPortals::releaseRecvStructs() { + debug("releaseRecvStructs()"); CHECK_RETURNVAL( PtlMEUnlink(me_handle) ); CHECK_RETURNVAL( PtlCTFree(me.ct_handle) ); }; void VirtualClusterPortals::releaseSendStructs() { + debug("releaseSendStructs()"); CHECK_RETURNVAL( PtlMDRelease(md_handle) ); CHECK_RETURNVAL( PtlCTFree(md.ct_handle) ); }; @@ -151,36 +154,33 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& { bool isSender = rank() == from; bool isReceiver = rank() == to; - auto watch = Stopwatchfactory::getRankWatch(rank(), from); - debug("kpingpong: prepareSendStructs"); prepareSendStructs(buf); - debug("kpingpong: prepareRecvStructs"); prepareRecvStructs(buf); + auto sendCounterBeforeKernel = getSendCounter(); + auto recvCounterBeforeKernel = getRecvCounter(); + barrier(); if(isSender) { watch->start(); - debug("kpingpong: send"); - sendMessages(to, buf, num_msg); - debug("kpingpong: recv"); - recvMessages(num_msg); + sendMessages(to, buf, num_msg, sendCounterBeforeKernel); + recvMessages(num_msg, recvCounterBeforeKernel); watch->stop(); } if(isReceiver) { watch->start(); - debug("kpingpong: recv"); - recvMessages(num_msg); - debug("kpingpong: send"); - sendMessages(from, buf, num_msg); + recvMessages(num_msg, recvCounterBeforeKernel); + sendMessages(from, buf, num_msg, sendCounterBeforeKernel); watch->stop(); } barrier(); - debug("kpingpong: releaseRecvStructs"); - releaseRecvStructs(); - debug("kpingpong: releaseSendStructs"); + + getSendCounter(); + getRecvCounter(); releaseSendStructs(); + releaseRecvStructs(); if(timing) { *timing = watch->getDuration().count() / (2*num_msg); } @@ -196,36 +196,33 @@ int VirtualClusterPortals::kUniDir( { bool isSender = rank() == from; bool isReceiver = rank() == to; - auto watch = Stopwatchfactory::getRankWatch(rank(), from); - debug("kUnidir: prepareSendStructs"); prepareSendStructs(buf1); - debug("kUnidir: prepareRecvStructs"); prepareRecvStructs(buf2); + auto sendCounterBeforeKernel = getSendCounter(); + auto recvCounterBeforeKernel = getRecvCounter(); + barrier(); if(isSender) { watch->start(); - debug("kUnidir: send"); - sendMessages(to, buf1, num_msg); - debug("kUnidir: recv"); - recvMessages(1); + sendMessages(to, buf1, num_msg, sendCounterBeforeKernel); + recvMessages(1, recvCounterBeforeKernel); watch->stop(); } if(isReceiver) { watch->start(); - debug("kUnidir: recv"); - recvMessages(num_msg); - debug("kUnidir: send"); - sendMessages(from, buf1, 1); + recvMessages(num_msg, recvCounterBeforeKernel); + sendMessages(from, buf1, 1, sendCounterBeforeKernel); watch->stop(); } barrier(); - debug("kUnidir: releaseRecvStructs"); - releaseRecvStructs(); - debug("kUnidir: releaseSendStructs"); + + getSendCounter(); + getRecvCounter(); releaseSendStructs(); + releaseRecvStructs(); if(timing) { *timing = watch->getDuration().count() / num_msg; } @@ -239,25 +236,32 @@ int VirtualClusterPortals::kbipingpong( MemoryBuffer& buf1, MemoryBuffer& buf2, const int num_msg, double* const timing) { + int partner; + if(rank() == from) { + partner = to; + } + if(rank() == to) { + partner = from; + } auto watch = Stopwatchfactory::getRankWatch(rank(), from); - debug("kbipingpong: prepareSendStructs"); prepareSendStructs(buf1); - debug("kbipingpong: prepareRecvStructs"); prepareRecvStructs(buf2); + auto sendCounterBeforeKernel = getSendCounter(); + auto recvCounterBeforeKernel = getRecvCounter(); + barrier(); watch->start(); - debug("kbipingpong: send"); - sendMessages(from, buf1, num_msg); - debug("kbipingpong: recv"); - recvMessages(num_msg); + sendMessages(partner, buf1, num_msg, sendCounterBeforeKernel); + recvMessages(num_msg, recvCounterBeforeKernel); watch->stop(); barrier(); - debug("kbipingpong: releaseRecvStructs"); - releaseRecvStructs(); - debug("kbipingpong: releaseSendStructs"); + + getSendCounter(); + getRecvCounter(); releaseSendStructs(); + releaseRecvStructs(); if(timing) { *timing = watch->getDuration().count() / (2.0 * num_msg); } diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index f58bbb2..0002629 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -64,32 +64,36 @@ private: const ptl_match_bits_t MATCH_BITS = 1; // TODO when/how to use these? const ptl_match_bits_t IGNORE_BITS = ~0; // ignore all bits - // Sender only + // Sender /** @brief Memory Descriptor (MD) */ ptl_md_t md; /** @brief MD Handle */ ptl_handle_md_t md_handle; /** @brief Send Counter */ ptl_ct_event_t send_ct; + /** @brief Prepare portals data structures on sender side (MD) */ void prepareSendStructs(const MemoryBuffer& buf); + /** @brief Read current send (MD) counter value */ + ptl_size_t getSendCounter(); + /** @brief Free portals data structures on sender side (MD) */ void releaseSendStructs(); - // Receiver only + // Receiver /** @brief Match List Entry (ME) */ ptl_me_t me; /** @brief ME Handle */ ptl_handle_me_t me_handle; /** @brief Receive Counter */ ptl_ct_event_t recv_ct; + /** @brief Prepare portals data structures on receiver side (ME) */ void prepareRecvStructs(const MemoryBuffer& buf); + /** @brief Read current recv (ME) counter value */ + ptl_size_t getRecvCounter(); + /** @brief Free portals data structures on receiver side (ME) */ void releaseRecvStructs(); - void sendMessages(const int to, MemoryBuffer& buf, const int num_msg); - void recvMessages(const int num_msg); - - bool first = true; - int testPut(); - + void sendMessages(const int to, MemoryBuffer& buf, const unsigned long num_msg, const unsigned long counter_start); + void recvMessages(const unsigned long num_msg, const unsigned long counter_start); std::vector<ptl_process_t> getPhysicalFromRank(); }; diff --git a/run.sh b/run.sh index bb7818c..e8f908f 100755 --- a/run.sh +++ b/run.sh @@ -9,6 +9,26 @@ ml GCC ParaStationMPI SIONlib #export PORTALS4_DEBUG=3 srun install/linktest \ + --mode portals \ + --num-warmup-messages 3 \ + --num-messages 100 \ + --size-messages 16777216 \ + --serial-tests \ + --num-slowest 1 \ + --no-sion-file; + +srun install/linktest \ + --unidirectional \ + --mode portals \ + --num-warmup-messages 3 \ + --num-messages 100 \ + --size-messages 16777216 \ + --serial-tests \ + --num-slowest 1 \ + --no-sion-file; + +srun install/linktest \ + --bidirectional \ --mode portals \ --num-warmup-messages 3 \ --num-messages 100 \ -- GitLab From 3e0907d28bad2f59d2aa982defd992057ca43b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Thu, 27 Jul 2023 16:59:21 +0200 Subject: [PATCH 31/47] Add missing ifdefs for portals code --- benchmark/benchmark.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index 8f9d85f..17a1d2f 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -19,7 +19,9 @@ #include "environ.h" #include "format_units.h" #include "format_print.h" +#if HAVE_VCLUSTER_PORTALS == 1 #include "vcluster_portals.h" +#endif #include <cstdlib> #include <cstdio> #include <cstring> @@ -496,6 +498,7 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms } else if (i == partner) { EXEC_NOFAIL(kernel(partner, rank(), nullptr, false)); } else { + #if HAVE_VCLUSTER_PORTALS == 1 if(cl->nameRef() == VirtualClusterPortals::NAME) { // 2 per kernel, warmup kernel and measuring kernel debug("Benchmark::work_pingpong_serial 2->barrier()"); @@ -507,6 +510,7 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms debug("Benchmark::work_pingpong_serial 5->barrier()"); barrier(); } + #endif } } debug("Benchmark::work_pingpong_serial 6->barrier()"); @@ -658,6 +662,7 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t if ((from == rank()) || (to == rank())) { EXEC_NOFAIL(kernel(from, to, &tv, false)); } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels + #if HAVE_VCLUSTER_PORTALS == 1 if(cl->nameRef() == VirtualClusterPortals::NAME) { // 2 per kernel, warmup kernel and measuring kernel debug("Benchmark::retest_one_slow_pair 1,2,3,4->barrier()"); @@ -666,6 +671,7 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t barrier(); barrier(); } + #endif } debug("Benchmark::retest_one_slow_pair 5->barrier()"); barrier(); -- GitLab From 4720cfb033b5c0e27320713be71f425f8b390b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 2 Aug 2023 17:10:13 +0200 Subject: [PATCH 32/47] Added compile failure table --- test/LinktestMain.xml | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml index a55d84c..74c8d51 100644 --- a/test/LinktestMain.xml +++ b/test/LinktestMain.xml @@ -60,7 +60,7 @@ if [ $? -eq 0 ]; then touch ../ready; else - echo "Linktest compile failed" >> ../error; + echo "linktest compile failed" >> ../error; fi set +x </do> @@ -99,7 +99,7 @@ if [ $? -eq 0 ]; then touch ready; else - echo "linktest-report compile failed" >> error; + echo "inktest-report compile failed" >> error; fi deactivate set +x @@ -123,7 +123,7 @@ if [ $? -eq 0 ]; then touch ready; else - echo "python-report run failed" >> error; + echo "linktest-report run failed" >> error; fi deactivate set +x @@ -148,6 +148,12 @@ </patternset> <!-- Analyse --> + <analyser name="analyseCompiles"> + <analyse step="Compile"> + <file use="errorFilePatterns">error</file> + </analyse> + </analyser> + <analyser name="analyseRuns"> <analyse step="LayerTest" tag="!noLayerTest"> <file use="LinktestOutPatterns">linktest.log</file> @@ -202,6 +208,19 @@ <column title="Errors">error_msg</column> </table> </result> + <result> + <use>analyseCompiles</use> + <table name="CompileErrors" style="pretty" sort="jube_step_name"> + <column title="Test">jube_step_name</column> + <column title="Compiler">Compiler</column> + <column title="MPI">MPI</column> + <column title="Setting">Transport_Layer_Settings</column> + <column title="Layer">Messaging_Layer</column> + <column title="Srun Args">SRUN_Arguments</column> + <column title="Options">Options</column> + <column title="Errors">error_msg</column> + </table> + </result> </benchmark> </jube> -- GitLab From de721ba9ffca9527a43564be18ecaf1d8d0499c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 4 Aug 2023 12:41:53 +0200 Subject: [PATCH 33/47] Ported JUBE tests to deep. Added Portals Layer Test --- test/Default.xml | 25 +++++++++++++++++++------ test/LayerTest.xml | 25 +++++++++++++++++++++++-- test/LinktestMain.xml | 4 ++-- 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/test/Default.xml b/test/Default.xml index c0abbfe..6e5a458 100644 --- a/test/Default.xml +++ b/test/Default.xml @@ -29,9 +29,9 @@ <parameter name="DefaultCompiler">GCC</parameter> <parameter name="Compiler" tag="!noCompileRunTest">GCC,Intel,NVHPC</parameter> <parameter name="Compiler" tag="noCompileRunTest">${DefaultCompiler}</parameter> - <parameter name="DefaultMPI">OpenMPI</parameter> + <parameter name="DefaultMPI">ParaStationMPI</parameter> <parameter name="MPI" mode="python" tag="!noCompileRunTest"> - { + "ParaStationMPI" if "${System_Name}" == "deep" else { "GCC": "ParaStationMPI,OpenMPI", "Intel": "ParaStationMPI,OpenMPI,IntelMPI", "NVHPC": "ParaStationMPI,OpenMPI" @@ -68,7 +68,13 @@ </parameter> </parameterset> <parameterset name="Slurm"> <!-- depends on Linktest_Args, System and Environment parameters --> - <parameter name="Account">cstao</parameter> + <parameter name="Account" mode="python"> + { + "juwels": "cstao", + "jurecadc": "cstao", + "deep": "deepsea" + }["${System_Name}"] + </parameter> <parameter name="Partition" mode="python"> { "juwels": { @@ -78,14 +84,20 @@ "jurecadc": { False: "dc-cpu-devel", True : "dc-gpu-devel" + }, + "deep": { + False: "dp-cn", + True : "dp-esb" } }["${System_Name}"][ ${WithGPUs} ] </parameter> <parameter name="Max_WallClock_Time">00:01:00</parameter> <parameter name="Number_Of_Nodes" mode="python">1 if "${Messaging_Layer}" == "cuda" else 2</parameter> - <parameter name="Number_Of_Tasks_Per_Node">4</parameter> + <parameter name="Number_Of_Tasks_Per_Node" mode="python"> + "1" if (${WithGPUs} and "${System_Name}" == "deep") else "4" + </parameter> <parameter name="Number_Of_Cores_Per_Task">1</parameter> - <parameter name="Gres" mode="python">"#SBATCH --gres=gpu:4" if ${WithGPUs} else ""</parameter> + <parameter name="Gres" mode="python">"#SBATCH --gres=gpu:${Number_Of_Tasks_Per_Node}" if ${WithGPUs} else ""</parameter> <parameter name="SRUN_Arguments" mode="python"> "" if "${Messaging_Layer}" == "mpi" else { "ParaStationMPI": "--mpi=pspmi", @@ -98,7 +110,8 @@ <parameter name="CuArch" mode="python"> { "juwels": "sm_70", - "jurecadc": "sm_80" + "jurecadc": "sm_80", + "deep": "sm_70", }[ "${System_Name}" ] </parameter> <parameter name="Enable_Layer" mode="python"> diff --git a/test/LayerTest.xml b/test/LayerTest.xml index 5ca1410..4594ede 100644 --- a/test/LayerTest.xml +++ b/test/LayerTest.xml @@ -4,8 +4,29 @@ <parameter name="Messaging_Layer" mode="python"> { "juwels": "ibverbs,ucp,tcp,cuda", - "jurecadc": "ibverbs,ucp,tcp,cuda" <!-- TODO add psm2 which is available only on jureca booster which shares login node --> + "jurecadc": "ibverbs,ucp,tcp,cuda", + "deep": "ibverbs,portals" }[ "${System_Name}" ] - </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,tcp --> + </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,portals,tcp --> + +</parameterset> +<parameterset name="Slurm" init_with="Default.xml"> + <parameter name="Partition" mode="python"> + "dp-bxi" if "${System_Name}" == "portals" else { + "juwels": { + False: "devel", + True : "develgpus" + }, + "jurecadc": { + False: "dc-cpu-devel", + True : "dc-gpu-devel" + }, + "deep": { + False: "dp-cn", + True : "dc-esb" + } + }["${System_Name}"][ ${WithGPUs} ] + </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,portals,tcp --> + </parameterset> </jube> \ No newline at end of file diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml index 74c8d51..50b23c2 100644 --- a/test/LinktestMain.xml +++ b/test/LinktestMain.xml @@ -71,8 +71,8 @@ </step> <step name="LayerTest" depend="Compile" active="'$Stack' == '$Default_Stack' and ${WithCUDA} == ${WithGPUs}" suffix="${Messaging_Layer}" tag="!noLayerTest"> - <use from="LayerTest.xml">Linktest_Args</use> - <use from="Default.xml">System, Environment, Slurm, Misc</use> + <use from="LayerTest.xml">Linktest_Args, Slurm</use> + <use from="Default.xml">System, Environment, Misc</use> <use>ExecutionScript</use> <use>SubstituteInputParameters</use> <do done_file="ready" error_file="error" tag="!dryRun">sbatch execute.sbatch</do> -- GitLab From f14e93c0f7336860db0a3ce4c39a3828bcb3ad01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Mon, 7 Aug 2023 16:50:28 +0200 Subject: [PATCH 34/47] Fixed HAVE_CUDA -> HAVE_VCLUSTER_CUDA --- benchmark/Makefile | 2 +- benchmark/benchmark.cc | 2 +- benchmark/benchmark.h | 4 ++-- benchmark/cmdline.cc | 6 +++--- benchmark/gpu_nvidia.h | 4 ++-- benchmark/memory.cc | 8 ++++---- benchmark/memory.h | 4 ++-- benchmark/memory_multi.cc | 10 +++++----- benchmark/vcluster.cc | 24 +++++++++++++++++++++++- benchmark/vcluster_cuda.cc | 2 +- benchmark/vcluster_portals.h | 2 +- 11 files changed, 45 insertions(+), 23 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index df3b16d..8d3404a 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -182,7 +182,7 @@ ifeq (1, $(HAVE_MINIPMI)) $(error CUARCH is not set) endif CUFLAGS = --gpu-architecture $(CUARCH) -DHAVE_VCLUSTER_CUDA=1 - CPPFLAGS += -I$(CUDA)/include -DHAVE_CUDA=1 + CPPFLAGS += -I$(CUDA)/include -DHAVE_VCLUSTER_CUDA=1 LDFLAGS += -L$(CUDA)/lib LIBS += -lcuda -lcudart endif diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index 17a1d2f..7b22af7 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -844,7 +844,7 @@ int Benchmark::init() { alloc.reset(new PosixMemAlignedAllocator()); break; case(AllocatorCUDA): - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 if(cl->rank()==0){info("Using CUDA memory allocator"); std::fflush(stdout);} gpudev.reset(new cuda::GpuDevice(System::singleton()->closest_gpu_device())); gpuctx.reset(new cuda::GpuContext(gpudev.get())); diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h index feaabc1..22bfe18 100644 --- a/benchmark/benchmark.h +++ b/benchmark/benchmark.h @@ -17,7 +17,7 @@ #include "slow_pairs.h" #include "error.h" -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 #include "gpu_nvidia.h" #endif @@ -52,7 +52,7 @@ namespace linktest{ void barrier() const; const struct linktest_args* args; std::unique_ptr<VirtualCluster> cl; - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 std::unique_ptr<cuda::GpuDevice> gpudev; std::unique_ptr<cuda::GpuContext> gpuctx; // Declaration order important! MemoryBuffer~ needs to be called before before GpuContext~ #endif diff --git a/benchmark/cmdline.cc b/benchmark/cmdline.cc index 9b51db3..6c3ec9b 100644 --- a/benchmark/cmdline.cc +++ b/benchmark/cmdline.cc @@ -661,13 +661,13 @@ const struct linktest_args* parse_cmdline_args(int argc, char **argv){ } if(cmdline_args.alloc_typ==AllocatorCUDA){ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 #else fatal("Requested CUDA memory-allocator type, but compiled without CUDA support."); #endif }else{ if(cmdline_args.do_use_gpus||cmdline_args.virtual_cluster_implementation=="cuda"){ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 if(cmdline_args.alloc_typ==AllocatorDefault){ cmdline_args.alloc_typ=AllocatorCUDA; } else { @@ -821,7 +821,7 @@ void print_cmdline_args(const struct linktest_args* args){ case(AllocatorPOSIXAlignedMalloc): return "posix_memalign"; case(AllocatorCUDA): - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 return "CUDA"; #else return "No CUDA"; diff --git a/benchmark/gpu_nvidia.h b/benchmark/gpu_nvidia.h index fbf77a9..0d3b386 100644 --- a/benchmark/gpu_nvidia.h +++ b/benchmark/gpu_nvidia.h @@ -9,8 +9,8 @@ #ifndef LINKTEST_GPU_NVIDIA_H #define LINKTEST_GPU_NVIDIA_H -#if 1 != HAVE_CUDA -#error gpu_nvidia can only compile with HAVE_CUDA=1 +#if 1 != HAVE_VCLUSTER_CUDA +#error gpu_nvidia can only compile with HAVE_VCLUSTER_CUDA=1 #endif #include "config.h" diff --git a/benchmark/memory.cc b/benchmark/memory.cc index b76663e..a9245b5 100644 --- a/benchmark/memory.cc +++ b/benchmark/memory.cc @@ -8,7 +8,7 @@ ****************************************************************************/ #include "memory.h" #include "compiler.h" -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 #include "memory_cuda.h" #include "gpu_nvidia.h" #endif @@ -109,7 +109,7 @@ void MemoryBuffer::fill(){ if(use_mt()){ throw std::runtime_error("Not Implemented!"); }else{ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 linktest::cuda::fill<char>(linktest::cuda::GpuContext::singleton(), pointer<char>(), pointer<char>() + len(), (char )0xff); @@ -147,7 +147,7 @@ int MemoryBuffer::check(){ } break; case AddressSpace::ID::CudaDeviceLocal: - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 throw std::runtime_error("check() called for local CUDA address space"); #else throw std::runtime_error("check() called on a CUDA address space but LinkTest was compiled without CUDA support"); @@ -231,7 +231,7 @@ int PosixMemAlignedAllocator::free(void* p, std::size_t len){ return SUCCESS; } -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 CudaDeviceAllocator::CudaDeviceAllocator(linktest::cuda::GpuContext* ctx):ctx_(ctx){} AddressSpace::ID CudaDeviceAllocator::address_space_id() const{ return AddressSpace::ID::CudaDeviceLocal; diff --git a/benchmark/memory.h b/benchmark/memory.h index 6774f1a..1e15a67 100644 --- a/benchmark/memory.h +++ b/benchmark/memory.h @@ -14,7 +14,7 @@ #include <cstdint> #include <unistd.h> -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 namespace linktest{ namespace cuda{ class Allocator; @@ -129,7 +129,7 @@ class PosixMemAlignedAllocator : public Allocator { size_t pgsize_ = sysconf(_SC_PAGESIZE); }; -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 // A memory allocation on a GPU class CudaDeviceAllocator : public Allocator{ public: diff --git a/benchmark/memory_multi.cc b/benchmark/memory_multi.cc index ab85a0f..488486e 100644 --- a/benchmark/memory_multi.cc +++ b/benchmark/memory_multi.cc @@ -8,7 +8,7 @@ ****************************************************************************/ #include "memory_multi.h" #include "compiler.h" -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 #include "memory_cuda.h" #include "gpu_nvidia.h" #endif @@ -91,13 +91,13 @@ void MemoryBufferMulti::fill(){ break; }case AddressSpace::ID::CudaDeviceLocal:{ if(use_mt()){ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 throw std::runtime_error("fill() for multiple buffers in CUDA address spaces not yet implemented"); #else throw std::runtime_error("fill() called on CUDA address space but linktest was compiled without CUDA"); #endif }else{ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 throw std::runtime_error("fill() for multiple buffers in CUDA address spaces not yet implemented"); #else throw std::runtime_error("fill() called on CUDA address space but linktest was compiled without CUDA"); @@ -146,13 +146,13 @@ int MemoryBufferMulti::check(std::size_t* buffer, std::size_t* byte){ break; }case AddressSpace::ID::CudaDeviceLocal:{ if(use_mt()){ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 throw std::runtime_error("check(buffer,byte) for multiple buffers in CUDA address spaces not yet implemented"); #else throw std::runtime_error("check(buffer,byte) called on CUDA address space but linktest was compiled without CUDA"); #endif }else{ - #if HAVE_CUDA == 1 + #if HAVE_VCLUSTER_CUDA == 1 throw std::runtime_error("check(buffer,byte) for multiple buffers in CUDA address spaces not yet implemented"); #else throw std::runtime_error("check(buffer,byte) called on CUDA address space but linktest was compiled without CUDA"); diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc index b334bde..90d40b3 100644 --- a/benchmark/vcluster.cc +++ b/benchmark/vcluster.cc @@ -502,7 +502,6 @@ int VirtualClusterWithHelper::recv(int src, MemoryBuffer& buf) const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std::string& name) { - std::string requestedImpl; // check 'name' @@ -524,6 +523,29 @@ const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std requestedImpl = {envName}; } + #if 1 == HAVE_VCLUSTER_TCP + debug("HAVE_VCLUSTER_TCP == 1"); + #endif + #if 1 == HAVE_VCLUSTER_MPI + debug("HAVE_VCLUSTER_MPI == 1"); + #endif + #if 1 == HAVE_VCLUSTER_IBVERBS + debug("HAVE_VCLUSTER_IBVERBS == 1"); + #endif + #if 1 == HAVE_VCLUSTER_PSM2 + debug("HAVE_VCLUSTER_PSM2 == 1"); + #endif + #if 1 == HAVE_VCLUSTER_UCP + debug("HAVE_VCLUSTER_UCP == 1"); + #endif + #if 1 == HAVE_VCLUSTER_PORTALS + debug("HAVE_VCLUSTER_PORTALS == 1"); + #endif + #if 1 == HAVE_VCLUSTER_CUDA + debug("HAVE_VCLUSTER_CUDA == 1"); + #endif + debug("requestedImpl = %s", requestedImpl.c_str()); + for(const auto& impl : VirtualCluster::impls) { if(impl == requestedImpl) { return impl; diff --git a/benchmark/vcluster_cuda.cc b/benchmark/vcluster_cuda.cc index 57ccc21..970d892 100644 --- a/benchmark/vcluster_cuda.cc +++ b/benchmark/vcluster_cuda.cc @@ -16,7 +16,7 @@ #include "error.h" #include "output_sion.h" #include "pmi.h" -#if HAVE_CUDA == 1 +#if HAVE_VCLUSTER_CUDA == 1 #include "gpu_nvidia.h" #endif #include <cassert> diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h index 0002629..0a00c3d 100644 --- a/benchmark/vcluster_portals.h +++ b/benchmark/vcluster_portals.h @@ -20,7 +20,7 @@ class VirtualClusterPortals : public VirtualClusterWithHelper { public: - inline static const char * NAME = "portals"; + static constexpr char NAME[] = "portals"; VirtualClusterPortals(); int init() override; int finalize() override; -- GitLab From 8bf55419f8f91d022437b8728a20014b00d50cf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 16 Aug 2023 14:39:53 +0200 Subject: [PATCH 35/47] Removed temporary helper files --- build.sh | 6 ------ loadEnv.sh | 10 ---------- loadPath.sh | 7 ------- run.sh | 38 -------------------------------------- 4 files changed, 61 deletions(-) delete mode 100755 build.sh delete mode 100644 loadEnv.sh delete mode 100644 loadPath.sh delete mode 100755 run.sh diff --git a/build.sh b/build.sh deleted file mode 100755 index f1726d1..0000000 --- a/build.sh +++ /dev/null @@ -1,6 +0,0 @@ -# 3. Install linktest in folder install -ml GCC ParaStationMPI SIONlib -mkdir -p install; -cd benchmark; -make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install; -cd ..; \ No newline at end of file diff --git a/loadEnv.sh b/loadEnv.sh deleted file mode 100644 index 0e3a2e1..0000000 --- a/loadEnv.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -x - - -## MPI -#source loadPath.sh /opt/mpi/openmpi/4.1.1.2/ -source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh -## BXI profile -export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf -## SIONlib and linktest -source ./loadPath.sh install \ No newline at end of file diff --git a/loadPath.sh b/loadPath.sh deleted file mode 100644 index 332eec7..0000000 --- a/loadPath.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -INSTALL_DIR=$(readlink -mn $1) -echo Loading $INSTALL_DIR -export LIBRARY_PATH=$LIBRARY_PATH:$INSTALL_DIR/lib/; -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$INSTALL_DIR/lib/; -export CPATH=$CPATH:$INSTALL_DIR/include/; -export PATH=$PATH:$INSTALL_DIR/bin; \ No newline at end of file diff --git a/run.sh b/run.sh deleted file mode 100755 index e8f908f..0000000 --- a/run.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh -#SBATCH --partition dp-bxi -#SBATCH --account deepsea -#SBATCH --nodes 4 -#SBATCH --tasks-per-node 1 - -ml GCC ParaStationMPI SIONlib - -#export PORTALS4_DEBUG=3 - -srun install/linktest \ - --mode portals \ - --num-warmup-messages 3 \ - --num-messages 100 \ - --size-messages 16777216 \ - --serial-tests \ - --num-slowest 1 \ - --no-sion-file; - -srun install/linktest \ - --unidirectional \ - --mode portals \ - --num-warmup-messages 3 \ - --num-messages 100 \ - --size-messages 16777216 \ - --serial-tests \ - --num-slowest 1 \ - --no-sion-file; - -srun install/linktest \ - --bidirectional \ - --mode portals \ - --num-warmup-messages 3 \ - --num-messages 100 \ - --size-messages 16777216 \ - --serial-tests \ - --num-slowest 1 \ - --no-sion-file; \ No newline at end of file -- GitLab From af8a02921adda2488352728d050afaccafebdbae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 16 Aug 2023 14:40:29 +0200 Subject: [PATCH 36/47] fixed --group-processes-by-hostname used too often --- test/execute_base.sbatch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/execute_base.sbatch b/test/execute_base.sbatch index e89c7dd..02dbf38 100644 --- a/test/execute_base.sbatch +++ b/test/execute_base.sbatch @@ -50,7 +50,7 @@ fi if [ §NUM_RANDOMIZE_TASKS§ -ne 0 ]; then args+=" --num-randomize-tasks §NUM_RANDOMIZE_TASKS§" fi -if [ §HOSTNAME_GROUPING§ ]; then +if [ §HOSTNAME_GROUPING§ -ne 0 ]; then args+=" --group-processes-by-hostname" fi set -x # echos commands before executing -- GitLab From 0d601ea7295091c673271111aba7d5281fbd1526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 16 Aug 2023 16:11:07 +0200 Subject: [PATCH 37/47] Revert exampleBuild and exampleRun --- exampleBuild.sh | 16 ++-------------- exampleRun.sh | 9 ++++----- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/exampleBuild.sh b/exampleBuild.sh index 492d10b..fd88411 100755 --- a/exampleBuild.sh +++ b/exampleBuild.sh @@ -23,19 +23,7 @@ export CPATH=$CPATH:~/.local/include/; mkdir -p install; cd benchmark; make clean -make -j HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install; +make -j 12 HAVE_TCP=1 HAVE_IBVERBS=1 HAVE_UCP=1 PREFIX=../install install; make clean cd ..; -# Install linktest-report -# FIX for JSC Systems -#export CPATH=/p/software/<SYSTEM>/stages/2022/software/SciPy-bundle/2021.10-gcccoremkl-11.2.0-2021.4.0/lib/python3.9/site-packages/numpy/core/include:$CPATH -cd install; -python3 -m venv linktest-report-venv; -source linktest-report-venv/bin/activate -cd ../python; -python3 -m pip install .; #TODO: Add --use-feature=in-tree-build if using pip 21.0.X to 21.2.X (default from 21.3 onwards) -deactivate; -cd ..; -# Notice that we close the virtual environment, since this script is likely not sourced -# To use python-report one has to source linktest-report-venv/bin/activate again -# To uninstall: pip uninstall linktest, or remove the virtual environment completly + diff --git a/exampleRun.sh b/exampleRun.sh index fc15465..0600c26 100755 --- a/exampleRun.sh +++ b/exampleRun.sh @@ -10,14 +10,13 @@ ml GCC ParaStationMPI SIONlib salloc \ ---partition dp-bxi \ ---reservation maint-bxi \ ---account deepsea \ +--partition devel \ +--account cstao \ --nodes 2 \ srun \ --ntasks 4 \ install/linktest \ - --mode portals \ + --mode mpi \ --num-warmup-messages 10 \ --num-messages 100 \ - --size-messages $((16)); \ No newline at end of file + --size-messages $((16*1024*1024)); \ No newline at end of file -- GitLab From c702e66ae0797027337ffe01e8558efb6c05339b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Wed, 16 Aug 2023 16:26:00 +0200 Subject: [PATCH 38/47] Added back linktest-report-venv in exampleBuild --- exampleBuild.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/exampleBuild.sh b/exampleBuild.sh index fd88411..07eac17 100755 --- a/exampleBuild.sh +++ b/exampleBuild.sh @@ -26,4 +26,16 @@ make clean make -j 12 HAVE_TCP=1 HAVE_IBVERBS=1 HAVE_UCP=1 PREFIX=../install install; make clean cd ..; - +# Install linktest-report +# FIX for JSC Systems +#export CPATH=/p/software/<SYSTEM>/stages/2022/software/SciPy-bundle/2021.10-gcccoremkl-11.2.0-2021.4.0/lib/python3.9/site-packages/numpy/core/include:$CPATH +cd install; +python3 -m venv linktest-report-venv; +source linktest-report-venv/bin/activate +cd ../python; +python3 -m pip install .; #TODO: Add --use-feature=in-tree-build if using pip 21.0.X to 21.2.X (default from 21.3 onwards) +deactivate; +cd ..; +# Notice that we close the virtual environment, since this script is likely not sourced +# To use python-report one has to source linktest-report-venv/bin/activate again +# To uninstall: pip uninstall linktest, or remove the virtual environment completly -- GitLab From 135bd4eb9164a57761341dd6fc9617880d794399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 29 Aug 2023 16:12:52 +0200 Subject: [PATCH 39/47] Fixed mpi-settings no longer exists on deep --- test/Default.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Default.xml b/test/Default.xml index 6e5a458..a65c93e 100644 --- a/test/Default.xml +++ b/test/Default.xml @@ -50,7 +50,7 @@ <parameter name="Default_Stack">$DefaultCompiler $DefaultMPI</parameter> <parameter name="Unload_CUDA" mode="python">"CUDA" if "${Compiler} ${MPI} ${CUDA} " == "Intel IntelMPI " else ""</parameter> <parameter name="Transport_Layer_Settings" mode="python"> - "" if not ${WithCUDA} else { + "" if not ${WithCUDA} or "${System_Name}" == "deep" else { "ParaStationMPI": "mpi-settings/CUDA", "OpenMPI": "UCX-settings/RC-CUDA", "IntelMPI": "" @@ -126,7 +126,7 @@ "": "" }[ "${CUDA}" ] </parameter> - <parameter name="Make">make -j ${Enable_Layer} ${DefineCuArch}</parameter> + <parameter name="Make">make -j24 ${Enable_Layer} ${DefineCuArch}</parameter> </parameterset> <parameterset name="Misc"> <!-- depends on Linktest_Args parameters --> <parameter name="Report_Name">linktest_${Messaging_Layer}_${Number_Of_Nodes}nx${Number_Of_Tasks_Per_Node}c</parameter> -- GitLab From c9dd5ae17851f972c48ec6c2fac41d10ed872396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 29 Aug 2023 16:14:09 +0200 Subject: [PATCH 40/47] Improved compile step suffix Renamed Makefile variables to reflect C++ usage --- benchmark/Makefile | 32 ++++++++++++++++---------------- test/LinktestMain.xml | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/benchmark/Makefile b/benchmark/Makefile index 8d3404a..7a27cfc 100644 --- a/benchmark/Makefile +++ b/benchmark/Makefile @@ -27,14 +27,14 @@ FSANITIZE = address SYSTEM = generic GIT_HASH = $(shell git rev-parse --verify HEAD) GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD) -CC = mpicxx -CFLAGS = -std=c++17 -Wall -g -rdynamic +CXX = mpicxx +CXXFLAGS = -std=c++17 -Wall -g -rdynamic CPPFLAGS = -D_GNU_SOURCE \ -DLINKTEST_LINUX=1 \ -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \ -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\" -LD = $(CC) +LD = $(CXX) LDFLAGS = LIBS = # ========================================= @@ -190,7 +190,7 @@ endif ifeq (1, $(HAVE_SION)) linktest-obj += vcluster_sion_generic_adapter.o - CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --cflags --mpi) + CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --CXXFLAGS --mpi) LIBS += $(shell sionconfig --64 --gcc --libs --mpi) endif @@ -200,7 +200,7 @@ endif ifdef VERBOSE $(info linktest-obj = $(linktest-obj)) -$(info CFLAGS = $(CFLAGS)) +$(info CXXFLAGS = $(CXXFLAGS)) $(info CPPFLAGS = $(CPPFLAGS)) $(info LDFLAGS = $(LDFLAGS)) $(info LIBS = $(LIBS)) @@ -210,12 +210,12 @@ endif # DEFINE MAKE RULES # ========================================= ifdef VERBOSE - Q = + QUIET = else - Q = @ + QUIET = @ endif -link = $(Q)ln -s linktest linktest.$(1) +link = $(QUIET)ln -s linktest linktest.$(1) SYMB_EXE := $(shell find . -type l -iname "linktest.*") @@ -226,36 +226,36 @@ all: optimized compile: linktest $(linktest-versions) .PHONY: optimized -optimized: CFLAGS += -O3 +optimized: CXXFLAGS += -O3 optimized: compile .PHONY: debug -debug: CFLAGS += -O0 -g +debug: CXXFLAGS += -O0 -g debug: compile .PHONY: sanitized sanitized: debug -sanitized: CFLAGS += -fsanitize=$(FSANITIZE) -static-libasan -fno-omit-frame-pointer +sanitized: CXXFLAGS += -fsanitize=$(FSANITIZE) -static-libasan -fno-omit-frame-pointer sanitized: LDFLAGS += -fsanitize=$(FSANITIZE) -static-libasan sanitized: compile memory_cuda.cc: cuda_kernels.cc %.o: %.cc - @echo " "CC $@ - $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ + @echo " "CXX $@ + $(QUIET)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ %.cubin: %.cu @echo " "CU $@ - $(Q)$(CU) $(CUFLAGS) --cubin $< -o $@ + $(QUIET)$(CU) $(CUFLAGS) --cubin $< -o $@ %.cc: %.cubin convert.py @echo " "CONVERT $@ - $(Q)python3 convert.py $< $@ $(basename $@) + $(QUIET)python3 convert.py $< $@ $(basename $@) linktest: $(linktest-obj) @echo " "LD $@ - $(Q)$(LD) $(LDFLAGS) $^ $(LIBS) -o $@ + $(QUIET)$(LD) $(LDFLAGS) $^ $(LIBS) -o $@ linktest.tcp: linktest @echo " "LN $@ diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml index 50b23c2..54a1a69 100644 --- a/test/LinktestMain.xml +++ b/test/LinktestMain.xml @@ -49,7 +49,7 @@ <sub source="§SRUN_ARGS§" dest="${SRUN_Arguments}" /> </substituteset> - <step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack}"> + <step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack} ${CUDA}"> <use>Sources</use> <use from="Default.xml">System, Environment, Build</use> <do done_file="ready" error_file="error" tag="!dryRun"> -- GitLab From b82056f72796cdc52a7fd200bc92ed0a258247e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 29 Aug 2023 16:59:54 +0200 Subject: [PATCH 41/47] Removed cluttering debug code --- benchmark/benchmark.cc | 42 ------------------------------------------ 1 file changed, 42 deletions(-) diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc index 7b22af7..444c346 100644 --- a/benchmark/benchmark.cc +++ b/benchmark/benchmark.cc @@ -400,7 +400,6 @@ int Benchmark::printIterationResults(const int iter){ } } - debug("Benchmark::printIterationResults->barrier()"); EXEC_NOFAIL(cl->barrier()); return SUCCESS; @@ -441,39 +440,11 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* auto from = (sign < 0) ? partner : rank(); auto to = (sign < 0) ? rank() : partner; - debug("Benchmark::work_pingpong_parallel 1->barrier()"); barrier(); -#ifdef DEBUG_KERNEL_SYNCHRONIZATION - std::unique_ptr<StopwatchI> rootWatch = Stopwatchfactory::getRootWatch(rank()); - duration_t tBeforeBarrier; - rootWatch->start(); -#endif EXEC_NOFAIL(kernel(from, to, &tmp1, true)); -#ifdef DEBUG_KERNEL_SYNCHRONIZATION - rootWatch->stop(); - tBeforeBarrier=rootWatch->getDuration(); -#endif - debug("Benchmark::work_pingpong_parallel 2->barrier()"); barrier(); -#ifdef DEBUG_KERNEL_SYNCHRONIZATION - rootWatch->stop(); - printTimingIfRoot(rank(), "[Kernel A->B Before Barrier]", tBeforeBarrier ); - printTimingIfRoot(rank(), "[Kernel A->B After Barrier]", rootWatch->getDuration()); - barrier(); //Additional barrier to reduce desynchronization due to printing - rootWatch->start(); -#endif EXEC_NOFAIL(kernel(to, from, &tmp2, true)); -#ifdef DEBUG_KERNEL_SYNCHRONIZATION - rootWatch->stop(); - tBeforeBarrier=rootWatch->getDuration(); -#endif - debug("Benchmark::work_pingpong_parallel 3->barrier()"); barrier(); -#ifdef DEBUG_KERNEL_SYNCHRONIZATION - rootWatch->stop(); - printTimingIfRoot(rank(), "[Kernel B->A Before Barrier]", tBeforeBarrier ); - printTimingIfRoot(rank(), "[Kernel B->A After Barrier]", rootWatch->getDuration()); -#endif *time_per_msg = (sign > 0) ? tmp1 : tmp2; @@ -485,7 +456,6 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double* */ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_msg){ for (auto i = 0; i < size(); ++i) { - debug("Benchmark::work_pingpong_serial 1->barrier()"); barrier(); if (i == rank()) { EXEC_NOFAIL(kernel(rank(), partner, time_per_msg, false)); @@ -501,19 +471,14 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms #if HAVE_VCLUSTER_PORTALS == 1 if(cl->nameRef() == VirtualClusterPortals::NAME) { // 2 per kernel, warmup kernel and measuring kernel - debug("Benchmark::work_pingpong_serial 2->barrier()"); barrier(); - debug("Benchmark::work_pingpong_serial 3->barrier()"); barrier(); - debug("Benchmark::work_pingpong_serial 4->barrier()"); barrier(); - debug("Benchmark::work_pingpong_serial 5->barrier()"); barrier(); } #endif } } - debug("Benchmark::work_pingpong_serial 6->barrier()"); barrier(); return SUCCESS; @@ -571,7 +536,6 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain double runningSumBandwidth=0.0; /* Execute all-to-all test if desired */ - debug("Benchmark::run_iteration 1->barrier()"); barrier(); if (args->do_alltoall){ EXEC_NOFAIL(work_alltoall()); @@ -593,7 +557,6 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain if(!args->do_serial) std::printf(fmt[!!args->do_serial], nDigits, step+1); //Start print out early so user knows step has started std::fflush(stdout); const double stepStartTime = walltime(); - debug("Benchmark::run_iteration 2->barrier()"); barrier(); EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth)); *tWork += (walltime() - stepStartTime); @@ -603,14 +566,12 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain } }else{ for (unsigned int step: stepPermutation) { - debug("Benchmark::run_iteration 3->barrier()"); barrier(); EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth)); } } /* Execute all-to-all test if desired */ - debug("Benchmark::run_iteration 4->barrier()"); barrier(); if (args->do_alltoall){ EXEC_NOFAIL(work_alltoall()); @@ -665,7 +626,6 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t #if HAVE_VCLUSTER_PORTALS == 1 if(cl->nameRef() == VirtualClusterPortals::NAME) { // 2 per kernel, warmup kernel and measuring kernel - debug("Benchmark::retest_one_slow_pair 1,2,3,4->barrier()"); barrier(); barrier(); barrier(); @@ -673,7 +633,6 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t } #endif } - debug("Benchmark::retest_one_slow_pair 5->barrier()"); barrier(); if (0 != from) { @@ -999,7 +958,6 @@ int Benchmark::benchmark(){ prepareBuffers(); rootWatch->start(); - debug("Benchmark::benchmark->barrier()"); cl->barrier(); rootWatch->stop(); -- GitLab From f7aecaec75940acb66a92dc080dc3ef719d58119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 12 Sep 2023 10:45:47 +0200 Subject: [PATCH 42/47] Added NOLINT for makrocs --- benchmark/error.h | 3 ++- benchmark/portals4_macros.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmark/error.h b/benchmark/error.h index 2807e34..e1db667 100644 --- a/benchmark/error.h +++ b/benchmark/error.h @@ -35,10 +35,11 @@ void linktest_debug(const char* file, const char* func, long line, const char* f * The names are pretty generic so we have to be careful to avoid naming conflicts * that result in hard to understand compiler errors. */ +// NOLINTBEGIN #define fatal(fmt, ...) linktest_fatal(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__) #define error(fmt, ...) linktest_error(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__) #define warn(fmt, ...) linktest_warn(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__) #define info(fmt, ...) linktest_info(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__) #define debug(fmt, ...) linktest_debug(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__) - +// NOLINTEND #endif \ No newline at end of file diff --git a/benchmark/portals4_macros.h b/benchmark/portals4_macros.h index 6175265..91fb342 100644 --- a/benchmark/portals4_macros.h +++ b/benchmark/portals4_macros.h @@ -1,6 +1,6 @@ #ifndef LINKTEST_PORTALS4MACROS_H #define LINKTEST_PORTALS4MACROS_H - +// NOLINTBEGIN #define CHECK_RETURNVAL(x) do { int ret; \ switch (ret = x) { \ case PTL_IGNORED: \ @@ -12,6 +12,7 @@ case PTL_PT_IN_USE: fprintf(stderr, "=> %s returned PTL_PT_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ case PTL_IN_USE: fprintf(stderr, "=> %s returned PTL_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \ default: fprintf(stderr, "=> %s returned failcode %i (line %u)\n", #x, ret, (unsigned int)__LINE__); abort(); break; \ - } } while (0) + } } while (0) +// NOLINTEND #endif //PORTALS4MACROS \ No newline at end of file -- GitLab From c6f7f8cf5273acf3f9350075497c594941ae87da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 12 Sep 2023 10:57:59 +0200 Subject: [PATCH 43/47] Tidied up implicit conversion and unused arguments --- benchmark/vcluster_portals.cc | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc index d5f494f..f408683 100644 --- a/benchmark/vcluster_portals.cc +++ b/benchmark/vcluster_portals.cc @@ -181,18 +181,19 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& getRecvCounter(); releaseSendStructs(); releaseRecvStructs(); - if(timing) { + if (timing != nullptr) + { *timing = watch->getDuration().count() / (2*num_msg); } - + return SUCCESS; } int VirtualClusterPortals::kUniDir( const int from, const int to, - MemoryBuffer& buf1, MemoryBuffer& buf2, - const int num_msg, double* const timing, - const bool doBarrier) + MemoryBuffer &buf1, MemoryBuffer &buf2, + const int num_msg, double *const timing, + const bool /*doBarrier*/) { bool isSender = rank() == from; bool isReceiver = rank() == to; @@ -223,10 +224,11 @@ int VirtualClusterPortals::kUniDir( getRecvCounter(); releaseSendStructs(); releaseRecvStructs(); - if(timing) { + if (timing != nullptr) + { *timing = watch->getDuration().count() / num_msg; } - + return SUCCESS; } @@ -262,10 +264,11 @@ int VirtualClusterPortals::kbipingpong( getRecvCounter(); releaseSendStructs(); releaseRecvStructs(); - if(timing) { + if (timing != nullptr) + { *timing = watch->getDuration().count() / (2.0 * num_msg); } - + return SUCCESS; } -- GitLab From 59e38a7c9d7ab78025c073eef9445fc860556df3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Fri, 4 Aug 2023 15:39:12 +0200 Subject: [PATCH 44/47] Fix Stage 2023 Intel only has IntelMPI --- test/Default.xml | 6 +++--- test/LayerTest.xml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/Default.xml b/test/Default.xml index a65c93e..68c12e9 100644 --- a/test/Default.xml +++ b/test/Default.xml @@ -33,7 +33,7 @@ <parameter name="MPI" mode="python" tag="!noCompileRunTest"> "ParaStationMPI" if "${System_Name}" == "deep" else { "GCC": "ParaStationMPI,OpenMPI", - "Intel": "ParaStationMPI,OpenMPI,IntelMPI", + "Intel": "IntelMPI", "NVHPC": "ParaStationMPI,OpenMPI" }[ "${Compiler}" ] </parameter> @@ -46,8 +46,8 @@ }[ "${Compiler}" ] </parameter> <parameter name="WithCUDA">("${CUDA}" == "CUDA")</parameter> - <parameter name="Stack">$Compiler $MPI</parameter> - <parameter name="Default_Stack">$DefaultCompiler $DefaultMPI</parameter> + <parameter name="Stack">${Compiler}_${MPI}</parameter> + <parameter name="Default_Stack">${DefaultCompiler}_${DefaultMPI}</parameter> <parameter name="Unload_CUDA" mode="python">"CUDA" if "${Compiler} ${MPI} ${CUDA} " == "Intel IntelMPI " else ""</parameter> <parameter name="Transport_Layer_Settings" mode="python"> "" if not ${WithCUDA} or "${System_Name}" == "deep" else { diff --git a/test/LayerTest.xml b/test/LayerTest.xml index 4594ede..4ab7775 100644 --- a/test/LayerTest.xml +++ b/test/LayerTest.xml @@ -5,14 +5,14 @@ { "juwels": "ibverbs,ucp,tcp,cuda", "jurecadc": "ibverbs,ucp,tcp,cuda", - "deep": "ibverbs,portals" + "deep": "ibverbs,ucp,tcp,cuda,portals" }[ "${System_Name}" ] </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,portals,tcp --> </parameterset> <parameterset name="Slurm" init_with="Default.xml"> <parameter name="Partition" mode="python"> - "dp-bxi" if "${System_Name}" == "portals" else { + "dp-bxi" if "${Messaging_Layer}" == "portals" else { "juwels": { False: "devel", True : "develgpus" -- GitLab From 591b7d0f0add4f3119b365054c734e20960e774d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 12 Sep 2023 11:18:14 +0200 Subject: [PATCH 45/47] Replaced space with underscore in suffix --- test/LinktestMain.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml index 54a1a69..5f1982c 100644 --- a/test/LinktestMain.xml +++ b/test/LinktestMain.xml @@ -49,7 +49,7 @@ <sub source="§SRUN_ARGS§" dest="${SRUN_Arguments}" /> </substituteset> - <step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack} ${CUDA}"> + <step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack}_${CUDA}"> <use>Sources</use> <use from="Default.xml">System, Environment, Build</use> <do done_file="ready" error_file="error" tag="!dryRun"> -- GitLab From fb09bdf17d661f7e9432479f2b4a6dd82e24cdf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 12 Sep 2023 16:52:15 +0200 Subject: [PATCH 46/47] Cleaned up error table generation --- test/Default.xml | 2 + test/LinktestMain.xml | 84 ++++++++++++++++++++++------------------ test/execute_base.sbatch | 2 +- 3 files changed, 49 insertions(+), 39 deletions(-) diff --git a/test/Default.xml b/test/Default.xml index 68c12e9..734aaad 100644 --- a/test/Default.xml +++ b/test/Default.xml @@ -46,7 +46,9 @@ }[ "${Compiler}" ] </parameter> <parameter name="WithCUDA">("${CUDA}" == "CUDA")</parameter> + <parameter name="WithCUDATxt" mode="python">"Yes" if ${WithCUDA} else "No"</parameter> <parameter name="Stack">${Compiler}_${MPI}</parameter> + <parameter name="StackWithCuda">${Stack}_${CUDA}</parameter> <parameter name="Default_Stack">${DefaultCompiler}_${DefaultMPI}</parameter> <parameter name="Unload_CUDA" mode="python">"CUDA" if "${Compiler} ${MPI} ${CUDA} " == "Intel IntelMPI " else ""</parameter> <parameter name="Transport_Layer_Settings" mode="python"> diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml index 5f1982c..cf67976 100644 --- a/test/LinktestMain.xml +++ b/test/LinktestMain.xml @@ -3,6 +3,18 @@ <benchmark name="JSC Linktest Test Suite" outpath="runs"> <comment>Testing compilation and common usages of JSC Linktest</comment> + <parameterset name="JUBE_Extra"> + <parameter name="JUBE_REPORT_LAST_CMD" update_mode="step"> + if [ $? -eq 0 ]; then + touch "${jube_wp_abspath}/ready"; + else + echo "${jube_step_name} failed" >> "${jube_wp_abspath}/error"; + fi + </parameter> + </parameterset> + + + <fileset name="Sources"> <copy>../benchmark</copy> </fileset> @@ -50,23 +62,19 @@ </substituteset> <step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack}_${CUDA}"> - <use>Sources</use> + <use>JUBE_Extra,Sources</use> <use from="Default.xml">System, Environment, Build</use> <do done_file="ready" error_file="error" tag="!dryRun"> set -x $Load_Modules cd benchmark $Make - if [ $? -eq 0 ]; then - touch ../ready; - else - echo "linktest compile failed" >> ../error; - fi + $JUBE_REPORT_LAST_CMD set +x </do> <do done_file="ready" error_file="error" tag="dryRun"> echo "Assume succesful compile" - touch ready + $JUBE_REPORT_LAST_CMD </do> </step> @@ -88,7 +96,7 @@ <step name="CompileLinktestReport" active="'$Stack' == '$Default_Stack'" tag="!noLinktestReportTest"> <use from="Default.xml">Environment</use> - <use>ReportSources</use> + <use>JUBE_Extra,ReportSources</use> <do done_file="ready" error_file="error"> set -x $Load_Modules @@ -96,17 +104,13 @@ python3 -m venv venvLinktest source venvLinktest/bin/activate pip install ./python - if [ $? -eq 0 ]; then - touch ready; - else - echo "inktest-report compile failed" >> error; - fi + $JUBE_REPORT_LAST_CMD deactivate set +x </do> </step> - <step name="CompileRunTest" procs="9" depend="Compile" active="${WithCUDA} == ${WithGPUs}" suffix="${Stack}_${CUDA}" tag="!noCompileRunTest"> + <step name="CompileRunTest" procs="9" depend="Compile" active="${WithCUDA} == ${WithGPUs}" suffix="${StackWithCuda}" tag="!noCompileRunTest"> <use from="CompileRunTest.xml">Linktest_Args</use> <use from="Default.xml">System, Environment, Slurm, Misc</use> <use>ExecutionScript</use> @@ -115,16 +119,13 @@ </step> <step name="LinktestReportTest" procs="7" depend="ModeTest,CompileLinktestReport" active="$No_Sion_File == 0" suffix="${Mode}" tag="!(noLinktestReportTest|noModeTest)"> + <use>JUBE_Extra</use> <do done_file="ready" error_file="error" tag="!dryRun"> set -x $Load_Modules source CompileLinktestReport/venvLinktest/bin/activate linktest-report -i ModeTest/${Report_Name}.sion -o report.pdf - if [ $? -eq 0 ]; then - touch ready; - else - echo "linktest-report run failed" >> error; - fi + $JUBE_REPORT_LAST_CMD deactivate set +x </do> @@ -143,38 +144,46 @@ <pattern name="Options">\+ srun .*?\.sion (.*?)\n</pattern> </patternset> - <patternset name="errorFilePatterns"> - <pattern name="error_msg">.*</pattern> + <patternset name="genericPatterns"> + <pattern name="all">.*</pattern> </patternset> <!-- Analyse --> <analyser name="analyseCompiles"> <analyse step="Compile"> - <file use="errorFilePatterns">error</file> + <file use="genericPatterns">error</file> + <file use="genericPatterns">ready</file> </analyse> </analyser> <analyser name="analyseRuns"> - <analyse step="LayerTest" tag="!noLayerTest"> + <analyse step="Compile"> + <file use="genericPatterns">error</file> + <file use="genericPatterns">ready</file> + </analyse> + <analyse step="CompileRunTest" tag="!noCompileRunTest"> <file use="LinktestOutPatterns">linktest.log</file> <file use="LinktestErrPatterns">linktest.error</file> - <file use="errorFilePatterns">error</file> + <file use="genericPatterns">error</file> + <file use="genericPatterns">ready</file> </analyse> - <analyse step="ModeTest" tag="!noModeTest"> + <analyse step="LayerTest" tag="!noLayerTest"> <file use="LinktestOutPatterns">linktest.log</file> <file use="LinktestErrPatterns">linktest.error</file> - <file use="errorFilePatterns">error</file> + <file use="genericPatterns">error</file> + <file use="genericPatterns">ready</file> </analyse> - <analyse step="CompileRunTest" tag="!noCompileRunTest"> + <analyse step="ModeTest" tag="!noModeTest"> <file use="LinktestOutPatterns">linktest.log</file> <file use="LinktestErrPatterns">linktest.error</file> - <file use="errorFilePatterns">error</file> + <file use="genericPatterns">error</file> + <file use="genericPatterns">ready</file> </analyse> </analyser> <analyser name="analyseReports"> <analyse step="LinktestReportTest" tag="!(noLinktestReportTest|noModeTest)"> - <file use="errorFilePatterns">error</file> + <file use="genericPatterns">error</file> </analyse> </analyser> @@ -197,28 +206,27 @@ </result> <result> <use>analyseRuns,analyseReports</use> - <table name="ErrorResult" style="pretty" sort="jube_step_name"> + <table name="RunErrors" style="pretty" sort="jube_step_name,Compiler,MPI,Transport_Layer_Settings,WithCUDATxt,Messaging_Layer,SRUN_Arguments,Options"> <column title="Test">jube_step_name</column> <column title="Compiler">Compiler</column> <column title="MPI">MPI</column> - <column title="Setting">Transport_Layer_Settings</column> + <column title="MPI Settings">Transport_Layer_Settings</column> + <column title="CUDA">WithCUDATxt</column> <column title="Layer">Messaging_Layer</column> <column title="Srun Args">SRUN_Arguments</column> <column title="Options">Options</column> - <column title="Errors">error_msg</column> + <column title="Errors">all</column> </table> </result> <result> <use>analyseCompiles</use> - <table name="CompileErrors" style="pretty" sort="jube_step_name"> + <table name="CompileErrors" style="pretty" sort="jube_step_name,Compiler,MPI,CUDA"> <column title="Test">jube_step_name</column> <column title="Compiler">Compiler</column> <column title="MPI">MPI</column> - <column title="Setting">Transport_Layer_Settings</column> - <column title="Layer">Messaging_Layer</column> - <column title="Srun Args">SRUN_Arguments</column> - <column title="Options">Options</column> - <column title="Errors">error_msg</column> + <column title="MPI Settings">Transport_Layer_Settings</column> + <column title="CUDA">WithCUDATxt</column> + <column title="Errors">all</column> </table> </result> diff --git a/test/execute_base.sbatch b/test/execute_base.sbatch index 02dbf38..21cfb76 100644 --- a/test/execute_base.sbatch +++ b/test/execute_base.sbatch @@ -61,7 +61,7 @@ srun --ntasks=${SLURM_NTASKS} \ # Indicate Success to jube if [ $? -ne 0 ]; then - echo "linktest run failed" >> error; + echo "LinkTest run failed" >> error; else touch ready; fi -- GitLab From 3c63dce8deb5089673a9461596caa06c5b6977c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de> Date: Tue, 12 Sep 2023 16:56:45 +0200 Subject: [PATCH 47/47] Removed example install for sionlib --- benchmark/installSIONlib.sh | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 benchmark/installSIONlib.sh diff --git a/benchmark/installSIONlib.sh b/benchmark/installSIONlib.sh deleted file mode 100644 index 01e0cb9..0000000 --- a/benchmark/installSIONlib.sh +++ /dev/null @@ -1,8 +0,0 @@ -wget http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7 -tar -axf 'download.php?version=1.7.7' -mkdir install -cd sionlib -./configure --prefix=/p/project/deepsea/mueller24/linktest/install --disable-fortran # Check/Change Path -cd build-linux-gomp10-openmpi -make -make install \ No newline at end of file -- GitLab