From 51df38fcf75173df03c34f06f1f047200b3d0079 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 8 Jul 2022 14:51:29 +0200
Subject: [PATCH 01/47] Added vcluster_portals.h/.cc Addde portals support to
 Makefile Cleaned up Makefile

---
 benchmark/Makefile            | 79 ++++++++++++++++++++++++++++++-----
 benchmark/vcluster_portals.cc | 31 ++++++++++++++
 benchmark/vcluster_portals.h  | 23 ++++++++++
 3 files changed, 122 insertions(+), 11 deletions(-)
 create mode 100644 benchmark/vcluster_portals.cc
 create mode 100644 benchmark/vcluster_portals.h

diff --git a/benchmark/Makefile b/benchmark/Makefile
index f28a140..05f9e24 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -6,21 +6,35 @@
 #**                                                                         **
 #**  See the file COPYRIGHT in the package base directory for details       **
 #****************************************************************************/
-PREFIX    = /usr/local/bin
 
-USE_POSIX                 = 1
+# DEFAULTS
+PREFIX    = /usr/local/bin
 
-HAVE_SION                 = 1
-HAVE_MPI                  = 1
-HAVE_MINIPMI              = 0
-HAVE_TCP                  = 1
-HAVE_IBVERBS              = 0
-HAVE_PSM2                 = 0
-HAVE_CUDA                 = 0
-HAVE_UCP                  = 0
+USE_POSIX     = 1
+HAVE_SION     = 1
+HAVE_MPI      = 1
+HAVE_MINIPMI  = 0
+HAVE_TCP      = 1
+HAVE_IBVERBS  = 0
+HAVE_PSM2     = 0
+HAVE_CUDA     = 0
+HAVE_UCP      = 0
+HAVE_PORTALS  = 0
 
 FSANITIZE = address
 
+SYSTEM   = generic
+GIT_HASH = $(shell git rev-parse --verify HEAD)
+GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
+CC       = mpicxx
+CFLAGS   = -std=c++17 -Wall -O2
+CPPFLAGS = -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
+LD       = $(CC)
+LDFLAGS  =
+LIBS     =
+
+# Handle Dependencies
+# =========================================
 ifeq (1, $(HAVE_IBVERBS))
 	HAVE_MINIPMI = 1
 	HAVE_TCP     = 1
@@ -37,6 +51,10 @@ ifeq (1, $(HAVE_UCP))
 	HAVE_MINIPMI = 1
 	HAVE_TCP     = 1
 endif
+ifeq (1, $(HAVE_PORTALS))
+	HAVE_MINIPMI = 1
+	HAVE_TCP     = 1
+endif
 
 ifdef V
 $(info USE_POSIX    = $(USE_POSIX))
@@ -48,7 +66,9 @@ $(info HAVE_IBVERBS = $(HAVE_IBVERBS))
 $(info HAVE_PSM2    = $(HAVE_PSM2))
 $(info HAVE_CUDA    = $(HAVE_CUDA))
 $(info HAVE_UCP     = $(HAVE_UCP))
+$(info HAVE_PORTALS = $(HAVE_PORTALS))
 endif
+# =========================================
 
 SYSTEM   = generic
 GIT_HASH = $(shell git rev-parse --verify HEAD)
@@ -103,6 +123,8 @@ $(error CUARCH is not set)
 	LIBS     += -lcuda -lcudart
 endif
 
+# DEFINE EXECUTABLES
+# =========================================
 linktest-versions = 
 ifeq (1, $(HAVE_MPI))
 	linktest-versions += linktest.mpi
@@ -120,6 +142,9 @@ ifeq (1, $(HAVE_MINIPMI))
 	ifeq (1, $(HAVE_UCP))
 		linktest-versions += linktest.ucp
 	endif
+	ifeq (1, $(HAVE_PORTALS))
+		linktest-versions += linktest.portals
+	endif
 	ifeq (1, $(HAVE_CUDA))
 		linktest-versions += linktest.cuda
 	endif
@@ -128,7 +153,11 @@ endif
 ifdef V
 $(info linktest-versions = $(linktest-versions))
 endif
+# =========================================
+
 
+# DEFINE OBJECT FILES AND FLAGS
+# =========================================
 linktest-obj = linktest.o \
                system.o \
                benchmark.o \
@@ -159,6 +188,9 @@ ifeq (1, $(HAVE_TCP))
 	CFLAGS       += -DHAVE_VCLUSTER_TCP=1
 endif
 ifeq (1, $(HAVE_MINIPMI))
+	CPPFLAGS += -Iminipmi -DHAVE_MINIPMI=1
+	LDFLAGS  += -Lminipmi
+	LIBS     += -lminipmi
 	ifeq (1, $(HAVE_IBVERBS))
 		linktest-obj += vcluster_ibverbs.o \
 		                ibverbs_mr.o \
@@ -179,19 +211,39 @@ ifeq (1, $(HAVE_MINIPMI))
 		CFLAGS       += -DHAVE_VCLUSTER_UCP=1
 		LIBS         += -lucp
 	endif
+	ifeq (1, $(HAVE_PORTALS))
+		linktest-obj += vcluster_portals.o
+		CFLAGS       += -DHAVE_VCLUSTER_PORTALS=1 
+		LDFLAGS      += -Lportals
+		LIBS         += -lportals
+	endif
 	ifeq (1, $(HAVE_CUDA))
 		linktest-obj += vcluster_cuda.o \
 		                cuda_kernels.o \
 		                gpu_nvidia.o \
 		                memory_cuda.o
-		CFLAGS       += -DHAVE_VCLUSTER_CUDA=1
+		CU            = nvcc
+		CUARCH        = 
+		CUFLAGS       = --gpu-architecture $(CUARCH) -DHAVE_VCLUSTER_CUDA=1
+		CPPFLAGS     += -I$(CUDA)/include -DHAVE_CUDA=1
+		LDFLAGS      += -L$(CUDA)/lib
+		LIBS         += -lcuda -lcudart
 	endif
 endif
 
 ifeq (1, $(HAVE_SION))
 	linktest-obj += vcluster_sion_generic_adapter.o
+	CPPFLAGS     += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --cflags --mpi)
+	LIBS         += $(shell sionconfig --64 --gcc --libs --mpi)
 endif
 
+ifeq (1, ${USE_POSIX})
+	CPPFLAGS += -D__USE_POSIX
+endif
+# =========================================
+
+# DEFINE MAKE RULES
+# =========================================
 ifdef V
 	Q =
 else
@@ -260,6 +312,10 @@ linktest.ucp: linktest
 	@echo " "LN $@
 	$(call link,ucp)
 
+linktest.portals: linktest
+	@echo " "LN $@
+	$(call link,portals)
+
 linktest.cuda: linktest
 	@echo " "LN $@
 	$(call link,cuda)
@@ -274,3 +330,4 @@ install: linktest $(linktest-versions)
 	for f in $^ ; do              \
 		cp -d $$f $(PREFIX)/$$f ; \
 	done
+# =========================================
\ No newline at end of file
diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
new file mode 100644
index 0000000..1f49078
--- /dev/null
+++ b/benchmark/vcluster_portals.cc
@@ -0,0 +1,31 @@
+/****************************************************************************
+**  LinkTest                                                               **
+*****************************************************************************
+**  Copyright (c) 2008-2022                                                **
+**  Forschungszentrum Juelich, Juelich Supercomputing Centre               **
+**                                                                         **
+**  See the file COPYRIGHT in the package base directory for details       **
+****************************************************************************/
+#include "vcluster_portals.h"
+extern "C" {
+#include <minipmi.h>
+#include <portals4.h>
+}
+
+#include "error.h"
+
+int VirtualClusterPortals::init()
+{
+    int ret = PtlInit();
+    if (ret == PTL_FAIL) {
+        fatal("Portals failed to initialize");
+        return ERROR;
+    }
+    return SUCCESS;
+}
+
+int VirtualClusterPortals::finalize()
+{
+    PtlFini();
+    return SUCCESS;
+}
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
new file mode 100644
index 0000000..09b07e7
--- /dev/null
+++ b/benchmark/vcluster_portals.h
@@ -0,0 +1,23 @@
+/****************************************************************************
+**  LinkTest                                                               **
+*****************************************************************************
+**  Copyright (c) 2008-2022                                                **
+**  Forschungszentrum Juelich, Juelich Supercomputing Centre               **
+**                                                                         **
+**  See the file COPYRIGHT in the package base directory for details       **
+****************************************************************************/
+#ifndef LINKTEST_VCLUSTER_PORTALS_H
+#define LINKTEST_VCLUSTER_PORTALS_H
+
+#include "vcluster.h"
+
+// VirtualCluster implementation based on a Portals 4
+class VirtualClusterPortals : public VirtualClusterWithHelper
+{
+
+public:
+    virtual int init() override;
+    virtual int finalize() override;
+};
+
+#endif
-- 
GitLab


From 67b165b31d3e42134c127f6806d541a62467070a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 12 Jul 2022 14:24:41 +0200
Subject: [PATCH 02/47] tmp

---
 benchmark/vcluster.cc         | 19 +++++++++++++++----
 benchmark/vcluster.h          | 15 ++++++++-------
 benchmark/vcluster_portals.cc | 22 +++++++++++++++++++---
 benchmark/vcluster_portals.h  |  2 ++
 benchmark/vcluster_tcp.cc     |  4 ++--
 5 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc
index c493241..735f8a3 100644
--- a/benchmark/vcluster.cc
+++ b/benchmark/vcluster.cc
@@ -25,6 +25,9 @@
 #if HAVE_VCLUSTER_UCP == 1
 #include "vcluster_ucp.h"
 #endif
+#if HAVE_VCLUSTER_PORTALS == 1
+#include "vcluster_portals.h"
+#endif
 #if HAVE_VCLUSTER_CUDA == 1
 #include "vcluster_cuda.h"
 #endif
@@ -419,6 +422,11 @@ VirtualCluster*  VirtualCluster::factory(const std::string& name){
         return new VirtualClusterUCP(name);
     } else
 #endif
+#if 1 == HAVE_VCLUSTER_PORTALS
+    if (VirtualClusterPortals::NAME == name) {
+        return new VirtualClusterPortals();
+    } else
+#endif
 #if 1 == HAVE_VCLUSTER_CUDA
     if ("cuda" == name) {
         return new VirtualClusterCUDA(name);
@@ -444,28 +452,28 @@ void VirtualClusterWithHelper::set_helper_pointer(VirtualCluster* helper)
 int VirtualClusterWithHelper::rank()
 {
     if (unlikely(!helper_))
-        throw;  // Simply returning -1 will result in complicated bugs
+        fatal("rank() called on a VirtualClusterWithHelper with undefined helper");
     return helper_->rank();
 }
 
 int VirtualClusterWithHelper::size()
 {
     if (unlikely(!helper_))
-        throw;  // Simply returning -1 will result in complicated bugs
+        fatal("size() called on a VirtualClusterWithHelper with undefined helper");
     return helper_->size();
 }
 
 int VirtualClusterWithHelper::send(int dst, MemoryBuffer& buf)
 {
     if (unlikely(!helper_))
-        return -1;
+        fatal("send() called on a VirtualClusterWithHelper with undefined helper");
     return helper_->send(dst, buf);
 }
 
 int VirtualClusterWithHelper::recv(int src, MemoryBuffer& buf)
 {
     if (unlikely(!helper_))
-        return -1;
+        fatal("recv() called on a VirtualClusterWithHelper with undefined helper");
     return helper_->recv(src, buf);
 }
 
@@ -486,6 +494,9 @@ const char* VirtualCluster::impls[] =
         #if 1 == HAVE_VCLUSTER_UCP
         "ucp",
         #endif
+        #if 1 == HAVE_VCLUSTER_PORTALS
+        VirtualClusterPortals::NAME,
+        #endif
         #if 1 == HAVE_VCLUSTER_CUDA
         "cuda",
         #endif
diff --git a/benchmark/vcluster.h b/benchmark/vcluster.h
index 751c4fe..f758b75 100644
--- a/benchmark/vcluster.h
+++ b/benchmark/vcluster.h
@@ -296,13 +296,14 @@ private:
           std::shared_ptr<int[]> hostLocalRanks_;
 };
 
-/* Since a full implementation of send()/recv() logic on top of some transport layer is
+/* VirtualClusterWithHelper 
+ * delegates calls to rank(), size(), send() and recv() to another VirtualCluster (the helper)
+ * executes calls to benchmark kernels directly
+ *
+ * Since a full implementation of send()/recv() logic on top of some transport layer is
  * more complicated than the logic required for the implementation of the communication
- * in kpingpong() it make sense to use a different VirtualCluster for the management 
- * communication than for the actual benchmark. 
- * VirtualClusterWithHelper allows derived classes to easily re-use another VirtualCluster
- * instance. We do not use inheritance since the helper logic is not really an "is-a"
- * relation.
+ * in our kernels it make sense to use a different VirtualCluster for the management 
+ * communication than for the actual benchmark.
  */
 class VirtualClusterWithHelper : public VirtualCluster {
 
@@ -320,7 +321,7 @@ public:
     int                    recv(int src, MemoryBuffer& buf) override;
 
 protected:
-    void    set_helper_pointer(VirtualCluster* helper);
+    void set_helper_pointer(VirtualCluster* helper);
     VirtualCluster*         helper_;
 
 };
diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 1f49078..db4a1e0 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -7,20 +7,30 @@
 **  See the file COPYRIGHT in the package base directory for details       **
 ****************************************************************************/
 #include "vcluster_portals.h"
+#include "error.h"
 extern "C" {
 #include <minipmi.h>
 #include <portals4.h>
 }
 
-#include "error.h"
-
 int VirtualClusterPortals::init()
 {
-    int ret = PtlInit();
+    auto ret = PtlInit();
     if (ret == PTL_FAIL) {
         fatal("Portals failed to initialize");
         return ERROR;
     }
+    helper_ = nullptr;
+    set_helper_pointer(VirtualCluster::factory("tcp"));
+    if (helper_ == nullptr) {
+        fatal("Portals helper (VirtualClusterTCP) not constructed");
+        return ERROR;
+    }
+    ret = helper_->init();
+    if (ret != SUCCESS) {
+        fatal("Portals helper (VirtualClusterTCP) failed to initialize");
+        return ERROR;
+    }
     return SUCCESS;
 }
 
@@ -29,3 +39,9 @@ int VirtualClusterPortals::finalize()
     PtlFini();
     return SUCCESS;
 }
+
+
+VirtualClusterPortals::VirtualClusterPortals()
+: VirtualClusterWithHelper(VirtualClusterPortals::NAME)
+{
+}
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 09b07e7..7b91507 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -16,6 +16,8 @@ class VirtualClusterPortals : public VirtualClusterWithHelper
 {
 
 public:
+    inline static const char * NAME = "portals";
+    VirtualClusterPortals();
     virtual int init() override;
     virtual int finalize() override;
 };
diff --git a/benchmark/vcluster_tcp.cc b/benchmark/vcluster_tcp.cc
index 494d4c4..2e7d001 100644
--- a/benchmark/vcluster_tcp.cc
+++ b/benchmark/vcluster_tcp.cc
@@ -599,9 +599,9 @@ int VirtualClusterTCP::init()
 {
     auto ret = linktest_minipmi_context_borrow(&pmi_);
 #if 1 == HAVE_MINIPMI
-    if (unlikely(ret)) {
+    if (ret != SUCCESS) {
         error("linktest_minipmi_context_borrow() failed.");
-        return ERROR;
+        return ret;
     }
 #endif
 
-- 
GitLab


From b4cd105bdf6a4a78a9944777673b4a4e447201d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 13 Jul 2022 17:13:16 +0200
Subject: [PATCH 03/47] Added Network Interface initialization

---
 benchmark/vcluster_portals.cc | 49 ++++++++++++++++++++++++++--------
 benchmark/vcluster_portals.h  | 50 +++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 11 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index db4a1e0..9068a91 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -8,38 +8,65 @@
 ****************************************************************************/
 #include "vcluster_portals.h"
 #include "error.h"
-extern "C" {
-#include <minipmi.h>
-#include <portals4.h>
-}
 
 int VirtualClusterPortals::init()
 {
-    auto ret = PtlInit();
-    if (ret == PTL_FAIL) {
-        fatal("Portals failed to initialize");
-        return ERROR;
-    }
     helper_ = nullptr;
     set_helper_pointer(VirtualCluster::factory("tcp"));
     if (helper_ == nullptr) {
         fatal("Portals helper (VirtualClusterTCP) not constructed");
         return ERROR;
     }
-    ret = helper_->init();
+    auto ret = helper_->init();
     if (ret != SUCCESS) {
         fatal("Portals helper (VirtualClusterTCP) failed to initialize");
         return ERROR;
     }
+
+    if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) {
+        warn("Portals versions other than 4.0 may not be suppported");
+    }
+    ret = PTL_OK;
+    ret = PtlInit();
+    ret &= PtlNIInit(
+        PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README"
+        PTL_NI_PHYSICAL | PTL_NI_MATCHING,
+        rank(),
+        nullptr,// &mni_limits_desired
+        &mni_limits_actual,
+        &mni_handle);
+    ret &= PtlNIInit(
+        PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README"
+        PTL_NI_PHYSICAL | PTL_NI_NO_MATCHING, 
+        rank(), 
+        nullptr, // &ni_limits_desired
+        &nni_limits_actual, 
+        &nni_handle);
+    if (ret != PTL_OK) {
+        fatal("Portals failed to initialize");
+        return ERROR;
+    }
+
     return SUCCESS;
 }
 
 int VirtualClusterPortals::finalize()
 {
-    PtlFini();
+    auto ret = PtlNIFini(nni_handle);
+    ret &= PtlNIFini(mni_handle);
+    ret &= PtlFini();
+    if (ret != PTL_OK) {
+        fatal("Portals failed to finalize");
+        return ERROR;
+    }
+    this->helper_->finalize();
     return SUCCESS;
 }
 
+int VirtualClusterPortals::kpingpong_send(int other, MemoryBuffer& buf) {
+    return -1;
+}
+
 
 VirtualClusterPortals::VirtualClusterPortals()
 : VirtualClusterWithHelper(VirtualClusterPortals::NAME)
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 7b91507..e89f17b 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -9,6 +9,11 @@
 #ifndef LINKTEST_VCLUSTER_PORTALS_H
 #define LINKTEST_VCLUSTER_PORTALS_H
 
+extern "C" {
+#include <minipmi.h>
+#include <portals4.h>
+}
+
 #include "vcluster.h"
 
 // VirtualCluster implementation based on a Portals 4
@@ -20,6 +25,51 @@ public:
     VirtualClusterPortals();
     virtual int init() override;
     virtual int finalize() override;
+
+    virtual int kpingpong(const int from, const int to, MemoryBuffer& buf, 
+                          const int num_msg, double* const timing) override {
+        throw("Not Implemented");
+    };
+
+    virtual int kUniDir(const int from, const int to,
+                        MemoryBuffer& buf1, MemoryBuffer& buf2,
+                        const int num_msg, double* const timing,
+                        const bool doBarrier) override {
+        throw("Not Implemented");
+    };
+    virtual int kUniDirMultiBuf(const int from,const int to,
+                                MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
+                                const int num_msg, double* const timing,
+                                const bool doBarrier) override {
+        throw("Not Implemented");
+    };
+    virtual int kUniDirLimitedMultiBuf(const int from,const int to,
+                                       MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
+                                       const int num_msg, double* const timing,
+                                       const bool doBarrier) override {
+        throw("Not Implemented");
+    };
+
+    virtual int kbipingpong(const int from, const int to,
+                            MemoryBuffer& buf1, MemoryBuffer& buf2,
+                            const int num_msg, double* const timing) override {
+        throw("Not Implemented");
+    };
+
+private:
+    int kpingpong_send(int other, MemoryBuffer& buf);
+    int kpingpong_recv(int other, MemoryBuffer& buf);
+
+    // matching (send/recv) Network Interface (ni)
+    ptl_ni_limits_t mni_limits_requested;
+    ptl_ni_limits_t mni_limits_actual;
+    ptl_handle_ni_t mni_handle;
+
+    // Non matching (put) Network Interface (ni)
+    ptl_ni_limits_t nni_limits_requested;
+    ptl_ni_limits_t nni_limits_actual;
+    ptl_handle_ni_t nni_handle;
+
 };
 
 #endif
-- 
GitLab


From 87c0fcf13a042362fadf874ac1fc7d24f1b3604a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 15 Jul 2022 17:52:50 +0200
Subject: [PATCH 04/47] To be reverted

---
 benchmark/Makefile | 4 ++--
 exampleBuild.sh    | 2 +-
 exampleRun.sh      | 9 +++++----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index 05f9e24..c7a2d10 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -27,8 +27,8 @@ SYSTEM   = generic
 GIT_HASH = $(shell git rev-parse --verify HEAD)
 GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
 CC       = mpicxx
-CFLAGS   = -std=c++17 -Wall -O2
-CPPFLAGS = -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
+CFLAGS   = -std=c++17 -Wall -g -rdynamic
+CPPFLAGS = -DDEBUG_PORTALS -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
 LD       = $(CC)
 LDFLAGS  =
 LIBS     =
diff --git a/exampleBuild.sh b/exampleBuild.sh
index 00ac73b..492d10b 100755
--- a/exampleBuild.sh
+++ b/exampleBuild.sh
@@ -23,7 +23,7 @@ export CPATH=$CPATH:~/.local/include/;
 mkdir -p install;
 cd benchmark;
 make clean
-make -j HAVE_TCP=1 HAVE_IBVERBS=1 HAVE_UCP=1 PREFIX=../install install;
+make -j HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install;
 make clean
 cd ..;
 # Install linktest-report
diff --git a/exampleRun.sh b/exampleRun.sh
index 0600c26..fc15465 100755
--- a/exampleRun.sh
+++ b/exampleRun.sh
@@ -10,13 +10,14 @@
 ml GCC ParaStationMPI SIONlib
 
 salloc \
---partition devel \
---account cstao \
+--partition dp-bxi \
+--reservation maint-bxi \
+--account deepsea \
 --nodes 2 \
 	srun \
 	--ntasks 4 \
 		install/linktest \
-		--mode mpi \
+		--mode portals \
 		--num-warmup-messages 10 \
 		--num-messages 100 \
-		--size-messages $((16*1024*1024));
\ No newline at end of file
+		--size-messages $((16));
\ No newline at end of file
-- 
GitLab


From 74a13f4befd532a4ef3ea03b9def157014f88b90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 15 Jul 2022 17:54:18 +0200
Subject: [PATCH 05/47] Implemented Initialization incl nid/pid to rank mapping

---
 benchmark/portals4_macros.h   |  17 ++++++
 benchmark/vcluster_portals.cc | 109 +++++++++++++++++++++++-----------
 benchmark/vcluster_portals.h  |  14 ++---
 3 files changed, 96 insertions(+), 44 deletions(-)
 create mode 100644 benchmark/portals4_macros.h

diff --git a/benchmark/portals4_macros.h b/benchmark/portals4_macros.h
new file mode 100644
index 0000000..6175265
--- /dev/null
+++ b/benchmark/portals4_macros.h
@@ -0,0 +1,17 @@
+#ifndef LINKTEST_PORTALS4MACROS_H
+#define LINKTEST_PORTALS4MACROS_H
+
+#define CHECK_RETURNVAL(x) do { int ret; \
+    switch (ret = x) { \
+        case PTL_IGNORED: \
+        case PTL_OK: break; \
+        case PTL_FAIL: fprintf(stderr, "=> %s returned PTL_FAIL (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
+        case PTL_NO_SPACE: fprintf(stderr, "=> %s returned PTL_NO_SPACE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
+        case PTL_ARG_INVALID: fprintf(stderr, "=> %s returned PTL_ARG_INVALID (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
+        case PTL_NO_INIT: fprintf(stderr, "=> %s returned PTL_NO_INIT (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
+        case PTL_PT_IN_USE: fprintf(stderr, "=> %s returned PTL_PT_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
+        case PTL_IN_USE: fprintf(stderr, "=> %s returned PTL_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
+        default: fprintf(stderr, "=> %s returned failcode %i (line %u)\n", #x, ret, (unsigned int)__LINE__); abort(); break; \
+    } } while (0)
+
+#endif //PORTALS4MACROS
\ No newline at end of file
diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 9068a91..b798479 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -7,54 +7,57 @@
 **  See the file COPYRIGHT in the package base directory for details       **
 ****************************************************************************/
 #include "vcluster_portals.h"
+#include "portals4_macros.h"
+#include "memory.h"
 #include "error.h"
 
+void VirtualClusterPortals::initPhysicalFromRank() {
+    ptl_process_t physId;
+    CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) );
+
+    #if defined(DEBUG_PORTALS)
+    info("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d",
+        rank(),
+        hostname().c_str(),
+        physId.phys.nid,
+        physId.phys.pid);
+    #endif
+    
+    physicalFromRank.resize(size());
+    gather(0, physicalFromRank.data(), &physId, 1);
+    bcast(0, physicalFromRank.data(), size());
+
+    if(physicalFromRank.at(rank()).phys.nid != physId.phys.nid) fatal("Failed to broadcast physicalFromRank");
+    if(physicalFromRank.at(rank()).phys.pid != physId.phys.pid) fatal("Failed to broadcast physicalFromRank");
+}
+
 int VirtualClusterPortals::init()
 {
-    helper_ = nullptr;
     set_helper_pointer(VirtualCluster::factory("tcp"));
-    if (helper_ == nullptr) {
-        fatal("Portals helper (VirtualClusterTCP) not constructed");
-        return ERROR;
-    }
-    auto ret = helper_->init();
-    if (ret != SUCCESS) {
-        fatal("Portals helper (VirtualClusterTCP) failed to initialize");
-        return ERROR;
-    }
-
+    EXEC_NOFAIL(helper_->init());
+    
     if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) {
         warn("Portals versions other than 4.0 may not be suppported");
     }
-    ret = PTL_OK;
-    ret = PtlInit();
-    ret &= PtlNIInit(
+    CHECK_RETURNVAL(PtlInit());
+    CHECK_RETURNVAL(PtlNIInit(
         PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README"
-        PTL_NI_PHYSICAL | PTL_NI_MATCHING,
-        rank(),
+        PTL_NI_LOGICAL | PTL_NI_MATCHING, // Logical => using ranks, Matching => using send/recv semantics
+        PTL_PID_ANY,
         nullptr,// &mni_limits_desired
         &mni_limits_actual,
-        &mni_handle);
-    ret &= PtlNIInit(
-        PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README"
-        PTL_NI_PHYSICAL | PTL_NI_NO_MATCHING, 
-        rank(), 
-        nullptr, // &ni_limits_desired
-        &nni_limits_actual, 
-        &nni_handle);
-    if (ret != PTL_OK) {
-        fatal("Portals failed to initialize");
-        return ERROR;
-    }
+        &mni_handle));
+
+    initPhysicalFromRank();
+    CHECK_RETURNVAL(PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()));
 
     return SUCCESS;
 }
 
 int VirtualClusterPortals::finalize()
 {
-    auto ret = PtlNIFini(nni_handle);
-    ret &= PtlNIFini(mni_handle);
-    ret &= PtlFini();
+    auto ret = PtlNIFini(mni_handle);
+    PtlFini();
     if (ret != PTL_OK) {
         fatal("Portals failed to finalize");
         return ERROR;
@@ -63,9 +66,47 @@ int VirtualClusterPortals::finalize()
     return SUCCESS;
 }
 
-int VirtualClusterPortals::kpingpong_send(int other, MemoryBuffer& buf) {
-    return -1;
-}
+// int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
+// {   
+//     std::printf("VirtualClusterPortals::kpingpong");
+//     // Sender only
+//     ptl_md_t        md; // Memory Descriptor
+//     ptl_handle_md_t md_handle;
+//     if (rank() == from) {
+//         md.start  = buf.p();
+//         md.length = buf.len();
+//         md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
+//         md.eq_handle = PTL_EQ_NONE;
+//         #if defined(DEBUG_PORTALS)
+//         std::printf("PtlMDBind");
+//         #endif
+//         PtlMDBind(this->mni_handle, nullptr, &md_handle);
+//     }
+//     // Receiver only
+//     ptl_me_t        me; // Match List Entry
+//     ptl_handle_me_t me_handle; 
+//     if (rank() == to) {
+//         // Allow puts to buf from anyone
+//         me.start  = buf.p();
+//         me.length = buf.len();
+//         me.uid    = PTL_UID_ANY;
+//         me.match_id.phys.nid = PTL_NID_ANY;
+//         me.match_id.phys.pid = PTL_PID_ANY;
+//         me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
+//         #if defined(DEBUG_PORTALS)
+//         std::printf("PtlMEAppend");
+//         #endif
+//         PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle);
+//     }
+
+//     if (rank() == to) {
+//         ptl_ct_event_t  ct; // event counter
+//         //ret = PtlCTWait(me.ct_handle, num_msg, &ct); //TODO this is unidirectional by default
+//     } else if (rank() == from) {
+//         //PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, rankToNidPid.at(rank()), pt_index, 1, 0, NULL, 0));
+//         //PtlCTWait(write_md.ct_handle, 2, &ctc));
+//     }
+// }
 
 
 VirtualClusterPortals::VirtualClusterPortals()
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index e89f17b..e3eadf3 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -13,7 +13,7 @@ extern "C" {
 #include <minipmi.h>
 #include <portals4.h>
 }
-
+#include <vector>
 #include "vcluster.h"
 
 // VirtualCluster implementation based on a Portals 4
@@ -57,19 +57,13 @@ public:
     };
 
 private:
-    int kpingpong_send(int other, MemoryBuffer& buf);
-    int kpingpong_recv(int other, MemoryBuffer& buf);
-
     // matching (send/recv) Network Interface (ni)
-    ptl_ni_limits_t mni_limits_requested;
+    ptl_ni_limits_t mni_limits_desired;
     ptl_ni_limits_t mni_limits_actual;
     ptl_handle_ni_t mni_handle;
+    std::vector<ptl_process_t> physicalFromRank;
 
-    // Non matching (put) Network Interface (ni)
-    ptl_ni_limits_t nni_limits_requested;
-    ptl_ni_limits_t nni_limits_actual;
-    ptl_handle_ni_t nni_handle;
-
+    void initPhysicalFromRank();
 };
 
 #endif
-- 
GitLab


From 111608eef935312bea0ea25152e84bf4f4abb003 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Mon, 18 Jul 2022 14:44:24 +0200
Subject: [PATCH 06/47] Implemented put

---
 benchmark/vcluster_portals.cc | 116 +++++++++++++++++++++-------------
 benchmark/vcluster_portals.h  |   8 +--
 2 files changed, 75 insertions(+), 49 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index b798479..8254d2c 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -11,7 +11,7 @@
 #include "memory.h"
 #include "error.h"
 
-void VirtualClusterPortals::initPhysicalFromRank() {
+std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
     ptl_process_t physId;
     CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) );
 
@@ -23,12 +23,14 @@ void VirtualClusterPortals::initPhysicalFromRank() {
         physId.phys.pid);
     #endif
     
-    physicalFromRank.resize(size());
+    std::vector<ptl_process_t> physicalFromRank(size());
     gather(0, physicalFromRank.data(), &physId, 1);
     bcast(0, physicalFromRank.data(), size());
 
     if(physicalFromRank.at(rank()).phys.nid != physId.phys.nid) fatal("Failed to broadcast physicalFromRank");
     if(physicalFromRank.at(rank()).phys.pid != physId.phys.pid) fatal("Failed to broadcast physicalFromRank");
+
+    return physicalFromRank;
 }
 
 int VirtualClusterPortals::init()
@@ -48,7 +50,7 @@ int VirtualClusterPortals::init()
         &mni_limits_actual,
         &mni_handle));
 
-    initPhysicalFromRank();
+    auto physicalFromRank = getPhysicalFromRank();
     CHECK_RETURNVAL(PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()));
 
     return SUCCESS;
@@ -66,47 +68,75 @@ int VirtualClusterPortals::finalize()
     return SUCCESS;
 }
 
-// int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
-// {   
-//     std::printf("VirtualClusterPortals::kpingpong");
-//     // Sender only
-//     ptl_md_t        md; // Memory Descriptor
-//     ptl_handle_md_t md_handle;
-//     if (rank() == from) {
-//         md.start  = buf.p();
-//         md.length = buf.len();
-//         md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
-//         md.eq_handle = PTL_EQ_NONE;
-//         #if defined(DEBUG_PORTALS)
-//         std::printf("PtlMDBind");
-//         #endif
-//         PtlMDBind(this->mni_handle, nullptr, &md_handle);
-//     }
-//     // Receiver only
-//     ptl_me_t        me; // Match List Entry
-//     ptl_handle_me_t me_handle; 
-//     if (rank() == to) {
-//         // Allow puts to buf from anyone
-//         me.start  = buf.p();
-//         me.length = buf.len();
-//         me.uid    = PTL_UID_ANY;
-//         me.match_id.phys.nid = PTL_NID_ANY;
-//         me.match_id.phys.pid = PTL_PID_ANY;
-//         me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
-//         #if defined(DEBUG_PORTALS)
-//         std::printf("PtlMEAppend");
-//         #endif
-//         PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle);
-//     }
+int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
+{
+    ptl_pt_index_t  pt_index;
+    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) );
+
+    // Sender only
+    ptl_md_t        md; // Memory Descriptor
+    ptl_handle_md_t md_handle;
+    if (rank() == from) {
+        md.start  = buf.p();
+        md.length = buf.len();
+        md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
+        md.eq_handle = PTL_EQ_NONE;
+        #if defined(DEBUG_PORTALS)
+        std::printf("PtlMDBind");
+        #endif
+        CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md.ct_handle) );  // Activate event counter
+        CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO
+    }
+    // Receiver only
+    ptl_me_t        me; // Match List Entry
+    ptl_handle_me_t me_handle; 
+    if (rank() == to) {
+        me.start  = buf.p();
+        me.length = buf.len();
+        me.uid    = PTL_UID_ANY;
+        me.match_id.phys.nid = PTL_NID_ANY;
+        me.match_id.phys.pid = PTL_PID_ANY;
+        me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
+        #if defined(DEBUG_PORTALS)
+        std::printf("PtlMEAppend");
+        #endif
+        CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle) );                                   // Activate event counter
+        CHECK_RETURNVAL( PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
+    }
+    barrier();
+    ptl_ct_event_t  ct; // event counter
+    if (rank() == to) {
+        for(auto n = 0; n < num_msg; n++) {
+            #if defined(DEBUG_PORTALS)
+            barrier();
+            PtlCTGet(md.ct_handle, &ct);
+            info("Received puts %d. Failed puts %d", ct.success, ct.failure);
+            #endif
+        }
+        CHECK_RETURNVAL( PtlCTWait(me.ct_handle, num_msg, &ct) ); //TODO is this unidirectional by default
+    } else if (rank() == from) {
+        ptl_process_t target;
+        target.rank = to;
+        const ptl_size_t localOffset = 0;
+        const ptl_size_t remoteOffset = 0;
+        const ptl_match_bits_t match_bits = 1; //TDOD why?
+        const ptl_hdr_data_t header_data = 0;
+        for(auto n = 1; n <= num_msg; n++) {
+            #if defined(DEBUG_PORTALS)
+            info("Sending message number %d", n);
+            #endif
+            CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
+            #if defined(DEBUG_PORTALS)
+            barrier();
+            PtlCTGet(md.ct_handle, &ct);
+            info("Successful puts %d. Failed puts %d", ct.success, ct.failure);
+            #endif
+        }
+        CHECK_RETURNVAL( PtlCTWait(md.ct_handle, num_msg, &ct) );
+    }
 
-//     if (rank() == to) {
-//         ptl_ct_event_t  ct; // event counter
-//         //ret = PtlCTWait(me.ct_handle, num_msg, &ct); //TODO this is unidirectional by default
-//     } else if (rank() == from) {
-//         //PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, rankToNidPid.at(rank()), pt_index, 1, 0, NULL, 0));
-//         //PtlCTWait(write_md.ct_handle, 2, &ctc));
-//     }
-// }
+    return SUCCESS;
+}
 
 
 VirtualClusterPortals::VirtualClusterPortals()
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index e3eadf3..4afa003 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -10,7 +10,6 @@
 #define LINKTEST_VCLUSTER_PORTALS_H
 
 extern "C" {
-#include <minipmi.h>
 #include <portals4.h>
 }
 #include <vector>
@@ -27,9 +26,7 @@ public:
     virtual int finalize() override;
 
     virtual int kpingpong(const int from, const int to, MemoryBuffer& buf, 
-                          const int num_msg, double* const timing) override {
-        throw("Not Implemented");
-    };
+                          const int num_msg, double* const timing) override;
 
     virtual int kUniDir(const int from, const int to,
                         MemoryBuffer& buf1, MemoryBuffer& buf2,
@@ -61,9 +58,8 @@ private:
     ptl_ni_limits_t mni_limits_desired;
     ptl_ni_limits_t mni_limits_actual;
     ptl_handle_ni_t mni_handle;
-    std::vector<ptl_process_t> physicalFromRank;
 
-    void initPhysicalFromRank();
+    std::vector<ptl_process_t> getPhysicalFromRank();
 };
 
 #endif
-- 
GitLab


From aff5aa34f7f8541617c9b534b871d054a9205eee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 5 Aug 2022 14:10:16 +0200
Subject: [PATCH 07/47] Added quick test scripts

---
 build.sh | 12 ++++++++++++
 run.sh   | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100755 build.sh
 create mode 100755 run.sh

diff --git a/build.sh b/build.sh
new file mode 100755
index 0000000..24d12ec
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,12 @@
+# 1. Set-Up Environment
+ml Stages/2020  GCC/10.3.0  ParaStationMPI/5.4.9-1 SIONlib/1.7.7;
+
+# 2. Use locallly installed minipmi, needed since we build with UCX and IBVerbs support
+#export LIBRARY_PATH=$LIBRARY_PATH:~/.local/lib/;
+#export CPATH=$CPATH:~/.local/include/;
+
+# 3. Install linktest in folder install
+mkdir -p install;
+cd benchmark;
+make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install clean;
+cd ..;
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..621c112
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+#SBATCH --partition dp-bxi
+#SBATCH --reservation maint-bxi
+#SBATCH --account deepsea
+#SBATCH --nodes 2 
+ml Stages/2020  GCC/10.3.0  ParaStationMPI/5.4.9-1 SIONlib/1.7.7;
+
+export PORTALS4_DEBUG=3
+
+srun \
+--label \
+--ntasks 4 \
+	install/linktest \
+	--mode portals \
+	--num-warmup-messages 10 \
+	--num-messages 100 \
+	--size-messages $((16)) \
+	--no-sion-file;
\ No newline at end of file
-- 
GitLab


From 089dd17ab1eb1d46d0c2562204d731fdecced7a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 5 Aug 2022 15:05:30 +0200
Subject: [PATCH 08/47] switch to on partition install for openmpi

---
 build.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/build.sh b/build.sh
index 24d12ec..6173108 100755
--- a/build.sh
+++ b/build.sh
@@ -1,10 +1,14 @@
 # 1. Set-Up Environment
-ml Stages/2020  GCC/10.3.0  ParaStationMPI/5.4.9-1 SIONlib/1.7.7;
+ml Stages/2020 GCC/10.3.0 SIONlib/1.7.7;
 
 # 2. Use locallly installed minipmi, needed since we build with UCX and IBVerbs support
 #export LIBRARY_PATH=$LIBRARY_PATH:~/.local/lib/;
 #export CPATH=$CPATH:~/.local/include/;
 
+source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh
+## BXI profile
+export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf
+
 # 3. Install linktest in folder install
 mkdir -p install;
 cd benchmark;
-- 
GitLab


From 5372dcd463b6f9ec3364e4806f9c88908ba4b0dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Mon, 8 Aug 2022 13:56:14 +0200
Subject: [PATCH 09/47] Update to build run config

---
 build.sh    | 11 -----------
 loadEnv.sh  | 10 ++++++++++
 loadPath.sh |  7 +++++++
 run.sh      |  1 -
 4 files changed, 17 insertions(+), 12 deletions(-)
 create mode 100644 loadEnv.sh
 create mode 100644 loadPath.sh

diff --git a/build.sh b/build.sh
index 6173108..745a9e4 100755
--- a/build.sh
+++ b/build.sh
@@ -1,14 +1,3 @@
-# 1. Set-Up Environment
-ml Stages/2020 GCC/10.3.0 SIONlib/1.7.7;
-
-# 2. Use locallly installed minipmi, needed since we build with UCX and IBVerbs support
-#export LIBRARY_PATH=$LIBRARY_PATH:~/.local/lib/;
-#export CPATH=$CPATH:~/.local/include/;
-
-source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh
-## BXI profile
-export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf
-
 # 3. Install linktest in folder install
 mkdir -p install;
 cd benchmark;
diff --git a/loadEnv.sh b/loadEnv.sh
new file mode 100644
index 0000000..05a4e2f
--- /dev/null
+++ b/loadEnv.sh
@@ -0,0 +1,10 @@
+#!/bin/bash -x
+
+
+## MPI
+#source loadPath.sh /opt/mpi/openmpi/4.1.1.2/
+source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh
+## BXI profile
+export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf
+## SIONlib and linktest
+source loadPath.sh install
\ No newline at end of file
diff --git a/loadPath.sh b/loadPath.sh
new file mode 100644
index 0000000..332eec7
--- /dev/null
+++ b/loadPath.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+INSTALL_DIR=$(readlink -mn $1)
+echo Loading $INSTALL_DIR
+export LIBRARY_PATH=$LIBRARY_PATH:$INSTALL_DIR/lib/;
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$INSTALL_DIR/lib/;
+export CPATH=$CPATH:$INSTALL_DIR/include/;
+export PATH=$PATH:$INSTALL_DIR/bin;
\ No newline at end of file
diff --git a/run.sh b/run.sh
index 621c112..1fb98c8 100755
--- a/run.sh
+++ b/run.sh
@@ -3,7 +3,6 @@
 #SBATCH --reservation maint-bxi
 #SBATCH --account deepsea
 #SBATCH --nodes 2 
-ml Stages/2020  GCC/10.3.0  ParaStationMPI/5.4.9-1 SIONlib/1.7.7;
 
 export PORTALS4_DEBUG=3
 
-- 
GitLab


From 9bb35f360a73c18514518e70ce95d6357ad5c50a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Mon, 8 Aug 2022 14:50:51 +0200
Subject: [PATCH 10/47] Added timing measurement

---
 benchmark/vcluster_portals.cc | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 8254d2c..692efdb 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -10,6 +10,7 @@
 #include "portals4_macros.h"
 #include "memory.h"
 #include "error.h"
+#include "timing.h"
 
 std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
     ptl_process_t physId;
@@ -103,6 +104,7 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
         CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle) );                                   // Activate event counter
         CHECK_RETURNVAL( PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
     }
+
     barrier();
     ptl_ct_event_t  ct; // event counter
     if (rank() == to) {
@@ -121,9 +123,10 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
         const ptl_size_t remoteOffset = 0;
         const ptl_match_bits_t match_bits = 1; //TDOD why?
         const ptl_hdr_data_t header_data = 0;
+        auto sendTime = walltime();
         for(auto n = 1; n <= num_msg; n++) {
             #if defined(DEBUG_PORTALS)
-            info("Sending message number %d", n);
+            info("Put from %d to %d #%d", from, to, n);
             #endif
             CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
             #if defined(DEBUG_PORTALS)
@@ -132,9 +135,13 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
             info("Successful puts %d. Failed puts %d", ct.success, ct.failure);
             #endif
         }
+        sendTime = walltime() - sendTime;
+        if(timing) *timing = sendTime;
         CHECK_RETURNVAL( PtlCTWait(md.ct_handle, num_msg, &ct) );
     }
 
+    barrier();
+    info("Kernel finished");
     return SUCCESS;
 }
 
-- 
GitLab


From 3a19ab5030ee71c2dcde49b7ae14b02b998bff12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Mon, 15 Aug 2022 16:37:06 +0200
Subject: [PATCH 11/47] attach gdb

---
 run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run.sh b/run.sh
index 1fb98c8..70a1514 100755
--- a/run.sh
+++ b/run.sh
@@ -9,7 +9,7 @@ export PORTALS4_DEBUG=3
 srun \
 --label \
 --ntasks 4 \
-	install/linktest \
+	gdb -ex run -ex bt -ex cont -ex quit --args install/linktest \
 	--mode portals \
 	--num-warmup-messages 10 \
 	--num-messages 100 \
-- 
GitLab


From 5484b9f542d298a0875bdde1fff393a2e8b79757 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 29 Sep 2022 16:32:00 +0200
Subject: [PATCH 12/47] Fix DEBUG defines

---
 build.sh | 2 +-
 run.sh   | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/build.sh b/build.sh
index 745a9e4..363483c 100755
--- a/build.sh
+++ b/build.sh
@@ -1,5 +1,5 @@
 # 3. Install linktest in folder install
 mkdir -p install;
 cd benchmark;
-make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install clean;
+make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -DDEBUG_PORTALS=True" PREFIX=../install install;
 cd ..;
\ No newline at end of file
diff --git a/run.sh b/run.sh
index 70a1514..846f282 100755
--- a/run.sh
+++ b/run.sh
@@ -8,10 +8,10 @@ export PORTALS4_DEBUG=3
 
 srun \
 --label \
---ntasks 4 \
-	gdb -ex run -ex bt -ex cont -ex quit --args install/linktest \
+--ntasks 2 \
+	install/linktest \
 	--mode portals \
-	--num-warmup-messages 10 \
-	--num-messages 100 \
+	--num-warmup-messages 0 \
+	--num-messages 3 \
 	--size-messages $((16)) \
 	--no-sion-file;
\ No newline at end of file
-- 
GitLab


From e8bdcb0439833b2041ceb5b547e0bec2cf423489 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 29 Sep 2022 16:52:21 +0200
Subject: [PATCH 13/47] Moved initialization of structs in seperate functions

---
 benchmark/vcluster_portals.cc | 86 ++++++++++++++---------------------
 benchmark/vcluster_portals.h  | 27 +++++++++++
 2 files changed, 61 insertions(+), 52 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 692efdb..bbacd9c 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -69,54 +69,43 @@ int VirtualClusterPortals::finalize()
     return SUCCESS;
 }
 
+void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) {
+    CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md_ct_handle) );
+    CHECK_RETURNVAL( PtlEQAlloc(mni_handle, num_msg, &md_eq_handle) );
+    md.start  = buf.p();
+    md.length = buf.len();
+    md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
+    md.eq_handle = md_eq_handle;
+    md.ct_handle = md_ct_handle;
+    CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO
+}
+
+void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
+    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) );
+    CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me_ct_handle) );
+    me.start  = buf.p();
+    me.length = buf.len();
+    me.uid    = PTL_UID_ANY;
+    me.match_id.phys.nid = PTL_NID_ANY;
+    me.match_id.phys.pid = PTL_PID_ANY;
+    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_EVENT_CT_COMM;
+    me.ct_handle = me_ct_handle;
+    CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
+}
+
 int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
 {
-    ptl_pt_index_t  pt_index;
-    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) );
+    if (rank() == from) prepareSendStructs(buf, num_msg);
+    if (rank() == to)   prepareRecvStructs(buf);
+    barrier();
 
-    // Sender only
-    ptl_md_t        md; // Memory Descriptor
-    ptl_handle_md_t md_handle;
-    if (rank() == from) {
-        md.start  = buf.p();
-        md.length = buf.len();
-        md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
-        md.eq_handle = PTL_EQ_NONE;
-        #if defined(DEBUG_PORTALS)
-        std::printf("PtlMDBind");
-        #endif
-        CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md.ct_handle) );  // Activate event counter
-        CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO
-    }
-    // Receiver only
-    ptl_me_t        me; // Match List Entry
-    ptl_handle_me_t me_handle; 
     if (rank() == to) {
-        me.start  = buf.p();
-        me.length = buf.len();
-        me.uid    = PTL_UID_ANY;
-        me.match_id.phys.nid = PTL_NID_ANY;
-        me.match_id.phys.pid = PTL_PID_ANY;
-        me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
+        PtlCTWait(me_ct_handle, num_msg, &recv_ct);
         #if defined(DEBUG_PORTALS)
-        std::printf("PtlMEAppend");
+        info("Recv: success %d - failure %d", recv_ct.success, recv_ct.failure);
         #endif
-        CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle) );                                   // Activate event counter
-        CHECK_RETURNVAL( PtlMEAppend(mni_handle, 0, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
     }
-
-    barrier();
-    ptl_ct_event_t  ct; // event counter
-    if (rank() == to) {
-        for(auto n = 0; n < num_msg; n++) {
-            #if defined(DEBUG_PORTALS)
-            barrier();
-            PtlCTGet(md.ct_handle, &ct);
-            info("Received puts %d. Failed puts %d", ct.success, ct.failure);
-            #endif
-        }
-        CHECK_RETURNVAL( PtlCTWait(me.ct_handle, num_msg, &ct) ); //TODO is this unidirectional by default
-    } else if (rank() == from) {
+    if (rank() == from) {
         ptl_process_t target;
         target.rank = to;
         const ptl_size_t localOffset = 0;
@@ -125,23 +114,16 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
         const ptl_hdr_data_t header_data = 0;
         auto sendTime = walltime();
         for(auto n = 1; n <= num_msg; n++) {
-            #if defined(DEBUG_PORTALS)
-            info("Put from %d to %d #%d", from, to, n);
-            #endif
             CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
-            #if defined(DEBUG_PORTALS)
-            barrier();
-            PtlCTGet(md.ct_handle, &ct);
-            info("Successful puts %d. Failed puts %d", ct.success, ct.failure);
-            #endif
         }
+        PtlCTWait(md_ct_handle, num_msg, &send_ct);
         sendTime = walltime() - sendTime;
+        #if defined(DEBUG_PORTALS)
+        info("Send: success %d - failure %d", send_ct.success, send_ct.failure);
+        #endif
         if(timing) *timing = sendTime;
-        CHECK_RETURNVAL( PtlCTWait(md.ct_handle, num_msg, &ct) );
     }
 
-    barrier();
-    info("Kernel finished");
     return SUCCESS;
 }
 
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 4afa003..fc34c02 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -59,6 +59,33 @@ private:
     ptl_ni_limits_t mni_limits_actual;
     ptl_handle_ni_t mni_handle;
 
+    // Sender only
+    /** @brief Memory Descriptor (MD) */
+    ptl_md_t md;
+    /** @brief MD Handle */
+    ptl_handle_md_t md_handle;
+    /** @brief MD Event Counter Handle */
+    ptl_handle_ct_t md_ct_handle;
+    /** @brief MD Event Queue Handle */
+    ptl_handle_eq_t md_eq_handle;
+    /** @brief Send Counter */
+    ptl_ct_event_t send_ct;
+    void prepareSendStructs(const MemoryBuffer& buf, const int num_msg);
+
+    // Receiver only
+    /** @brief Portal Table Index */
+    ptl_pt_index_t  pt_index;
+    /** @brief Match List Entry (ME) */
+    ptl_me_t  me;
+    /** @brief ME Handle */
+    ptl_handle_me_t me_handle;
+    /** @brief ME Event Counter Handle */
+    ptl_handle_ct_t me_ct_handle;
+    /** @brief Receive Counter */
+    ptl_ct_event_t recv_ct;
+    void prepareRecvStructs(const MemoryBuffer& buf);
+
+
     std::vector<ptl_process_t> getPhysicalFromRank();
 };
 
-- 
GitLab


From 1ce3d6fa60d18fa8baeb8a99a43751377024a3d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 29 Sep 2022 16:53:03 +0200
Subject: [PATCH 14/47] Added tcp helper debug since i get barrier fails

---
 benchmark/vcluster_tcp.cc | 8 ++++----
 build.sh                  | 2 +-
 run.sh                    | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/benchmark/vcluster_tcp.cc b/benchmark/vcluster_tcp.cc
index 2e7d001..3dfeac9 100644
--- a/benchmark/vcluster_tcp.cc
+++ b/benchmark/vcluster_tcp.cc
@@ -113,12 +113,14 @@ int VirtualClusterTCP::read_tcp_environ_rank_and_size()
 
 #if 1 == HAVE_MINIPMI
     EXEC_IFFAIL(minipmi_get_size(pmi_, &size), error("minipmi_get_size() failed."); return ERROR);
-
     EXEC_IFFAIL(minipmi_get_rank(pmi_, &rank), error("minipmi_get_rank() failed."); return ERROR);
 
     rank_ = rank;
     size_ = size;
 
+    #if defined(DEBUG_MINIPMI)
+    info("PMI rank: %d, PMI size: %d", rank, size);
+    #endif
     return SUCCESS;
 #else
     EXEC_IFFAIL(read_environ_int(TCP_ENVIRON_PREFIX "SIZE", &size), error("Failed to read environment variable " TCP_ENVIRON_PREFIX "SIZE"); return ERROR);
@@ -606,9 +608,7 @@ int VirtualClusterTCP::init()
 #endif
 
     EXEC_NOFAIL(read_tcp_environ());
-
     EXEC_NOFAIL(read_tcp_environ_rank_and_size());
-
     EXEC_NOFAIL(connect_to_all());
 
     disable_nagles_algorithm();
@@ -665,7 +665,7 @@ int VirtualClusterTCP::send(int dst, MemoryBuffer& buf)
     }
 
     if (unlikely((dst < 0) || (dst >= size_ ))) {
-        error("Invalid rank.");
+        error("Invalid rank: 0 < %d < %d", dst, size_);
         return ERROR;
     }
 
diff --git a/build.sh b/build.sh
index 363483c..e80c062 100755
--- a/build.sh
+++ b/build.sh
@@ -1,5 +1,5 @@
 # 3. Install linktest in folder install
 mkdir -p install;
 cd benchmark;
-make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -DDEBUG_PORTALS=True" PREFIX=../install install;
+make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -D DEBUG_PORTALS=True -D DEBUG_MINIPMI=True" PREFIX=../install install;
 cd ..;
\ No newline at end of file
diff --git a/run.sh b/run.sh
index 846f282..34472ce 100755
--- a/run.sh
+++ b/run.sh
@@ -4,7 +4,7 @@
 #SBATCH --account deepsea
 #SBATCH --nodes 2 
 
-export PORTALS4_DEBUG=3
+#export PORTALS4_DEBUG=3
 
 srun \
 --label \
-- 
GitLab


From f5f319986cc92d80f5dca9d92e98fd973bde3199 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 4 Oct 2022 18:02:25 +0200
Subject: [PATCH 15/47] Fixed call to deleted constructor MemoryBuffer

---
 benchmark/memory.cc | 2 +-
 benchmark/memory.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmark/memory.cc b/benchmark/memory.cc
index e67dd17..b76663e 100644
--- a/benchmark/memory.cc
+++ b/benchmark/memory.cc
@@ -89,7 +89,7 @@ void MemoryBuffer::memory_copy(MemoryBuffer& dst, MemoryBuffer& src){
 }
 
 MemoryBuffer MemoryBuffer::wrap(void* p, std::size_t len, AddressSpace::ID addr_space_id){
-    return MemoryBuffer(p, len, addr_space_id);
+    return {p, len, addr_space_id};
 }
 
 void MemoryBuffer::fill(){
diff --git a/benchmark/memory.h b/benchmark/memory.h
index aa08943..6774f1a 100644
--- a/benchmark/memory.h
+++ b/benchmark/memory.h
@@ -165,8 +165,8 @@ class MemoryBuffer{
 
 		MemoryBuffer(const MemoryBuffer& other)             = delete;
 		MemoryBuffer& operator=(const MemoryBuffer& other)  = delete;
-		MemoryBuffer(MemoryBuffer&& other)                  = delete;
-		MemoryBuffer& operator=(const MemoryBuffer&& other) = delete;
+		MemoryBuffer(MemoryBuffer&& other)                  = default;
+		MemoryBuffer& operator=(MemoryBuffer&& other)       = default;
 
 		/* Wrap an existing pointer into a memory buffer. We do not know the allocator
 		 * and hence have to ingore it. This is acceptable since the allocator is not
-- 
GitLab


From f820ae7cdfcac2be4660bf690164328bb2961399 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 5 Oct 2022 17:18:36 +0200
Subject: [PATCH 16/47] Fixed a bug that caused empty impl list.

Fix makefile doube definitions

Fix CFLAGS overriding does not work

Fix circular dependency
---
 benchmark/Makefile           |  90 +++++++++------------------
 benchmark/cmdline.cc         |  10 +--
 benchmark/linktest.cc        |   6 +-
 benchmark/vcluster.cc        | 116 +++++++++++++++--------------------
 benchmark/vcluster.h         |  18 ++----
 benchmark/vcluster_portals.h |  14 ++---
 build.sh                     |   2 +-
 7 files changed, 96 insertions(+), 160 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index c7a2d10..d10be49 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -8,6 +8,7 @@
 #****************************************************************************/
 
 # DEFAULTS
+# =========================================
 PREFIX    = /usr/local/bin
 
 USE_POSIX     = 1
@@ -28,10 +29,16 @@ GIT_HASH = $(shell git rev-parse --verify HEAD)
 GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
 CC       = mpicxx
 CFLAGS   = -std=c++17 -Wall -g -rdynamic
-CPPFLAGS = -DDEBUG_PORTALS -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
+CPPFLAGS =	-DDEBUG_PORTALS=True \
+			-D_GNU_SOURCE \
+			-DLINKTEST_LINUX=1 \
+			-DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \
+			-DGIT_HASH=\"$(GIT_HASH)\" 
+			-DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
 LD       = $(CC)
 LDFLAGS  =
 LIBS     =
+# =========================================
 
 # Handle Dependencies
 # =========================================
@@ -56,7 +63,7 @@ ifeq (1, $(HAVE_PORTALS))
 	HAVE_TCP     = 1
 endif
 
-ifdef V
+ifdef VERBOSE
 $(info USE_POSIX    = $(USE_POSIX))
 $(info HAVE_SION    = $(HAVE_SION))
 $(info HAVE_MPI     = $(HAVE_MPI))
@@ -70,58 +77,6 @@ $(info HAVE_PORTALS = $(HAVE_PORTALS))
 endif
 # =========================================
 
-SYSTEM   = generic
-GIT_HASH = $(shell git rev-parse --verify HEAD)
-GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
-CC       = mpicxx
-CFLAGS   = -std=c++17 -Wall
-CPPFLAGS = -D_GNU_SOURCE -DLINKTEST_LINUX=1 -DLINKTEST_SYSTEM="\"$(SYSTEM)\"" -DGIT_HASH=\"$(GIT_HASH)\" -DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
-LD       = $(CC)
-LDFLAGS  = 
-LIBS     =
-
-# Use POSIX
-ifeq (1, ${USE_POSIX})
-	CPPFLAGS += -D__USE_POSIX
-endif
-
-# SIONlib Options
-ifeq (1, $(HAVE_SION))
-#	CFLAGS   +=
-	CPPFLAGS += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --cflags --mpi)
-#	LDFLAGS  +=
-	LIBS     += $(shell sionconfig --64 --gcc --libs --mpi)
-endif
-
-# MINIPMI Options
-ifeq (1, $(HAVE_MINIPMI))
-#	CFLAGS   +=
-	CPPFLAGS += -Iminipmi -DHAVE_MINIPMI=1
-	LDFLAGS  += -Lminipmi
-	LIBS     += -lminipmi
-endif
-
-# UCP Options
-ifeq (1, $(HAVE_UCP))
-#	CFLAGS   +=
-#	CPPFLAGS +=
-#	LDFLAGS  +=
-#	LIBS     +=
-endif
-
-# CUDA Options
-ifeq (1, $(HAVE_CUDA))
-	CU       = nvcc
-	CUARCH   = 
-	ifeq (, $(CUARCH))
-$(error CUARCH is not set)
-	endif
-	CUFLAGS  = --gpu-architecture $(CUARCH)
-#	CFLAGS   +=
-	CPPFLAGS += -I$(CUDA)/include -DHAVE_CUDA=1
-	LDFLAGS  += -L$(CUDA)/lib
-	LIBS     += -lcuda -lcudart
-endif
 
 # DEFINE EXECUTABLES
 # =========================================
@@ -150,7 +105,7 @@ ifeq (1, $(HAVE_MINIPMI))
 	endif
 endif
 
-ifdef V
+ifdef VERBOSE
 $(info linktest-versions = $(linktest-versions))
 endif
 # =========================================
@@ -181,11 +136,11 @@ linktest-obj = linktest.o \
 
 ifeq (1, $(HAVE_MPI))
 	linktest-obj += vcluster_mpi.o
-	CFLAGS       += -DHAVE_VCLUSTER_MPI=1
+	CPPFLAGS     += -DHAVE_VCLUSTER_MPI=1
 endif
 ifeq (1, $(HAVE_TCP))
 	linktest-obj += vcluster_tcp.o
-	CFLAGS       += -DHAVE_VCLUSTER_TCP=1
+	CPPFLAGS     += -DHAVE_VCLUSTER_TCP=1
 endif
 ifeq (1, $(HAVE_MINIPMI))
 	CPPFLAGS += -Iminipmi -DHAVE_MINIPMI=1
@@ -198,22 +153,22 @@ ifeq (1, $(HAVE_MINIPMI))
 		                ibverbs_cq.o \
 		                ibverbs_pd.o \
 		                ibverbs_ctx.o
-		CFLAGS       += -DHAVE_VCLUSTER_IBVERBS=1 -DIBVERBS_SEND_INLINE=1
+		CPPFLAGS     += -DHAVE_VCLUSTER_IBVERBS=1 -DIBVERBS_SEND_INLINE=1
 		LIBS         += -libverbs
 	endif
 	ifeq (1, $(HAVE_PSM2))
 		linktest-obj += vcluster_psm2.o
-		CFLAGS       += -DHAVE_VCLUSTER_PSM2=1
+		CPPFLAGS     += -DHAVE_VCLUSTER_PSM2=1
 		LIBS         += -lpsm2
 	endif
 	ifeq (1, $(HAVE_UCP))
 		linktest-obj += vcluster_ucp.o
-		CFLAGS       += -DHAVE_VCLUSTER_UCP=1
+		CPPFLAGS     += -DHAVE_VCLUSTER_UCP=1
 		LIBS         += -lucp
 	endif
 	ifeq (1, $(HAVE_PORTALS))
 		linktest-obj += vcluster_portals.o
-		CFLAGS       += -DHAVE_VCLUSTER_PORTALS=1 
+		CPPFLAGS     += -DHAVE_VCLUSTER_PORTALS=1 
 		LDFLAGS      += -Lportals
 		LIBS         += -lportals
 	endif
@@ -224,6 +179,9 @@ ifeq (1, $(HAVE_MINIPMI))
 		                memory_cuda.o
 		CU            = nvcc
 		CUARCH        = 
+		ifeq (, $(CUARCH))
+$(error CUARCH is not set)
+		endif
 		CUFLAGS       = --gpu-architecture $(CUARCH) -DHAVE_VCLUSTER_CUDA=1
 		CPPFLAGS     += -I$(CUDA)/include -DHAVE_CUDA=1
 		LDFLAGS      += -L$(CUDA)/lib
@@ -240,11 +198,19 @@ endif
 ifeq (1, ${USE_POSIX})
 	CPPFLAGS += -D__USE_POSIX
 endif
+
+ifdef VERBOSE
+$(info linktest-obj = $(linktest-obj))
+$(info CFLAGS = $(CFLAGS))
+$(info CPPFLAGS = $(CPPFLAGS))
+$(info LDFLAGS = $(LDFLAGS))
+$(info LIBS = $(LIBS))
+endif
 # =========================================
 
 # DEFINE MAKE RULES
 # =========================================
-ifdef V
+ifdef VERBOSE
 	Q =
 else
 	Q = @
diff --git a/benchmark/cmdline.cc b/benchmark/cmdline.cc
index 0baea8e..92063a0 100644
--- a/benchmark/cmdline.cc
+++ b/benchmark/cmdline.cc
@@ -782,11 +782,11 @@ void print_cmdline_usage(const std::string& prog)
         }
 
         std::string modeList = "[";
-        if(VirtualCluster::impls[0] != nullptr) {
-                for(auto i=0;VirtualCluster::impls[i];i++) {
-                        modeList = modeList + VirtualCluster::impls[i] + ", ";
-                }
-                modeList.erase(modeList.size()-2);
+        if(VirtualCluster::impls.size() > 0) {
+            for(const auto& name: VirtualCluster::impls) {
+                modeList = modeList + name + ", ";
+            }
+            modeList.erase(modeList.size()-2);
         }
         modeList += "]";
         std::fprintf(stderr,
diff --git a/benchmark/linktest.cc b/benchmark/linktest.cc
index 02d8623..16070dd 100644
--- a/benchmark/linktest.cc
+++ b/benchmark/linktest.cc
@@ -60,11 +60,7 @@ int main(int argc, char *argv[]){
 
         {
             /* Determine Virtual Cluster Type */
-            const auto name=get_vcluster_impl_name(argv,cmdline_args->virtual_cluster_implementation.c_str());
-            if (unlikely(!name)){
-                error("Failed to determine virtual-cluster implementation.");
-                return ERROR;
-            }
+            const auto name=VirtualCluster::get_vcluster_impl_name(argv,cmdline_args->virtual_cluster_implementation);
             
             /* Create Virtual Cluster */
             bench.cl.reset(VirtualCluster::factory(name));
diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc
index 735f8a3..bcc8fe5 100644
--- a/benchmark/vcluster.cc
+++ b/benchmark/vcluster.cc
@@ -396,6 +396,30 @@ int VirtualCluster::linktest_kbipingpong(const int from, const int to,
     return SUCCESS;
 }
 
+const std::vector<std::string> VirtualCluster::impls = {
+    #if 1 == HAVE_VCLUSTER_TCP
+    "tcp",
+    #endif
+    #if 1 == HAVE_VCLUSTER_MPI
+    "mpi",
+    #endif
+    #if 1 == HAVE_VCLUSTER_IBVERBS
+    "ibverbs",
+    #endif
+    #if 1 == HAVE_VCLUSTER_PSM2
+    "psm2",
+    #endif
+    #if 1 == HAVE_VCLUSTER_UCP
+    "ucp",
+    #endif
+    #if 1 == HAVE_VCLUSTER_PORTALS
+    VirtualClusterPortals::NAME,
+    #endif
+    #if 1 == HAVE_VCLUSTER_CUDA
+    "cuda",
+    #endif
+};
+
 VirtualCluster*  VirtualCluster::factory(const std::string& name){
 #if 1 == HAVE_VCLUSTER_TCP
     if ("tcp" == name) {
@@ -477,80 +501,38 @@ int VirtualClusterWithHelper::recv(int src, MemoryBuffer& buf)
     return helper_->recv(src, buf);
 }
 
-const char* VirtualCluster::impls[] = 
-    {
-        #if 1 == HAVE_VCLUSTER_TCP
-        "tcp",
-        #endif
-        #if 1 == HAVE_VCLUSTER_MPI
-        "mpi",
-        #endif
-        #if 1 == HAVE_VCLUSTER_IBVERBS
-        "ibverbs",
-        #endif
-        #if 1 == HAVE_VCLUSTER_PSM2
-        "psm2",
-        #endif
-        #if 1 == HAVE_VCLUSTER_UCP
-        "ucp",
-        #endif
-        #if 1 == HAVE_VCLUSTER_PORTALS
-        VirtualClusterPortals::NAME,
-        #endif
-        #if 1 == HAVE_VCLUSTER_CUDA
-        "cuda",
-        #endif
-        nullptr
-    };
-
-const char* get_vcluster_impl_name(char** argv, const char* name)
+const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std::string& name)
 {
-    /* Check If Virtual-Cluster Implementation Given In 'name' */
-    if(name[0]){ //Check if 'name' is a null string
-    for(auto i=0;VirtualCluster::impls[i];i++){ //Loop over possible virtual cluster implementations
-        /* Loop termination is handled by the fact that the last 'VirtualCluster::impls' is
-         * null pointer.
-         */
-        if (!strcmp(VirtualCluster::impls[i],name)){ //Compare command-line implementation to possible implementations
-        return VirtualCluster::impls[i];
-        }
+
+    std::string requestedImpl;
+
+    // check 'name'
+    if(name != "") {
+        requestedImpl = name;
     }
-    // If this point is reached an unknown/unsupported implementation was encountered.
-    error("Unknown/Unsupported command-line implementation encountered.");
-    return(NULL);
+
+    // check executable extension
+    std::string executableName(argv[0]);
+    const std::string dot{"."};
+    auto pos = executableName.find(dot);
+    if(pos != std::string::npos) {
+        requestedImpl = executableName.substr(pos);
     }
 
-    /* Check Executable Extension For Virtual-Cluster Implementation */
-    // Determine suffix start
-    int i=std::strlen(argv[0])-1;
-    if(likely(i>=0)){
-    int suffix_offset=-1; //Integer indicating suffix start
-    while(true){
-        if(unlikely(argv[0][i]=='.')){
-        suffix_offset=i; //Identify suffix start.
-        break;
-        }
-        if(i--==0) break;
+    // Check environment
+    const char* envName;
+    if(read_environ_str(LINKTEST_ENVIRON_PREFIX "VCLUSTER_IMPL", &envName) == SUCCESS) {
+        requestedImpl = {envName};
     }
-    if(likely(suffix_offset>=0)){ //Check if suffix was encountered
-        // Compare suffix to supported virtual cluster implementations
-        for(auto i=0;VirtualCluster::impls[i];i++){ //Loop over possible virtual cluster implementations
-        /* Loop termination is handled by the fact that the last 'VirtualCluster::impls' is
-         * null pointer.
-         */
-        if (unlikely(!strcmp(VirtualCluster::impls[i],&(argv[0][suffix_offset+1])))){ //Compare suffix to possible implementations
-            return VirtualCluster::impls[i]; //Return identified virtual-cluster implementation
-        }
+
+    for(const auto& impl : VirtualCluster::impls) {
+        if(impl == requestedImpl) {
+            return impl;
         }
-    }else if(unlikely(suffix_offset<-1)){ //This should not happen logically!
-        error("Internal Error: Unexpected negative executable-name-suffix offset encountered.");
-        return(NULL);
     }
-    }
-
-    /* Check If Environment Specifies Virtual-Cluster Implementation */
-    if(unlikely(read_environ_str(LINKTEST_ENVIRON_PREFIX "VCLUSTER_IMPL",&name))) return NULL;
-    return name;
+    
+    error("Internal Error: Unexpected negative executable-name-suffix offset encountered.");
+    return VirtualCluster::impls.at(0); // unreachable code to supress warning
 }
 
 int VirtualCluster::write_parallel(const linktest_args* args, const std::vector<LinktestStats>& statsVec)
diff --git a/benchmark/vcluster.h b/benchmark/vcluster.h
index f758b75..b516073 100644
--- a/benchmark/vcluster.h
+++ b/benchmark/vcluster.h
@@ -16,7 +16,6 @@
 #include <string>
 #include <memory>
 
-
 struct linktest_args;
 
 /* A virtual view of our cluster. The VirtualCluster
@@ -275,12 +274,11 @@ public:
     virtual int write_parallel(const linktest_args* args, const std::vector<LinktestStats>& statsVec);
     virtual int write_funnelled(const linktest_args* args, const std::vector<LinktestStats>& statsVec);
 
-    /* Given the name of the vcluster implementation create an instance. This
-        * function accesses an internal database to map the name of the implementation
-        * to a function that creates the instance. In order for this to work, the
-        * implementation needs to be properly registered (see linktest_vcluster.c).
-        */
-    static const char* impls[];
+    /** \brief List of supported transport protocol (build at compile time) */
+    static const std::vector<std::string> impls;
+    /** \return name of the requested transport protocol aka the virtual cluster imlplementation */
+    static const std::string& get_vcluster_impl_name(char** argv, const std::string& name);
+    /** \return pointer to requested VirtualCluster implementation */
     static VirtualCluster* factory(const std::string& name);
 
 private:
@@ -339,12 +337,6 @@ enum vcluster_reduce_op {
     SUM_DOUBLE
 };
 
-/* Get the name of the vcluster implementation to be used. The implementation
- * can be chosen by means of argv[0] or an environment variable set by the
- * spawner.
- */
-const char* get_vcluster_impl_name(char** argv, const char* name);
-
 template<typename T>
 int VirtualCluster::send(const int dst,const T* const vals,const int len){
     auto tmp = MemoryBuffer::wrap<T>(const_cast<T*>(vals), len, AddressSpace::ID::Local);
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index fc34c02..5acafcb 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -22,32 +22,32 @@ class VirtualClusterPortals : public VirtualClusterWithHelper
 public:
     inline static const char * NAME = "portals";
     VirtualClusterPortals();
-    virtual int init() override;
-    virtual int finalize() override;
+    int init() override;
+    int finalize() override;
 
-    virtual int kpingpong(const int from, const int to, MemoryBuffer& buf, 
+    int kpingpong(const int from, const int to, MemoryBuffer& buf, 
                           const int num_msg, double* const timing) override;
 
-    virtual int kUniDir(const int from, const int to,
+    int kUniDir(const int from, const int to,
                         MemoryBuffer& buf1, MemoryBuffer& buf2,
                         const int num_msg, double* const timing,
                         const bool doBarrier) override {
         throw("Not Implemented");
     };
-    virtual int kUniDirMultiBuf(const int from,const int to,
+    int kUniDirMultiBuf(const int from,const int to,
                                 MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
                                 const int num_msg, double* const timing,
                                 const bool doBarrier) override {
         throw("Not Implemented");
     };
-    virtual int kUniDirLimitedMultiBuf(const int from,const int to,
+    int kUniDirLimitedMultiBuf(const int from,const int to,
                                        MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
                                        const int num_msg, double* const timing,
                                        const bool doBarrier) override {
         throw("Not Implemented");
     };
 
-    virtual int kbipingpong(const int from, const int to,
+    int kbipingpong(const int from, const int to,
                             MemoryBuffer& buf1, MemoryBuffer& buf2,
                             const int num_msg, double* const timing) override {
         throw("Not Implemented");
diff --git a/build.sh b/build.sh
index e80c062..cd2cb08 100755
--- a/build.sh
+++ b/build.sh
@@ -1,5 +1,5 @@
 # 3. Install linktest in folder install
 mkdir -p install;
 cd benchmark;
-make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 CFLAGS="-std=c++17 -Wall -D DEBUG_PORTALS=True -D DEBUG_MINIPMI=True" PREFIX=../install install;
+make VERBOSE=1 HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install;
 cd ..;
\ No newline at end of file
-- 
GitLab


From e4a0fa250ead2e1fac71020dd64200ffcfb2b182 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 5 Oct 2022 17:42:59 +0200
Subject: [PATCH 17/47] add sionlib install description

---
 benchmark/installSIONlib.sh | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 benchmark/installSIONlib.sh

diff --git a/benchmark/installSIONlib.sh b/benchmark/installSIONlib.sh
new file mode 100644
index 0000000..01e0cb9
--- /dev/null
+++ b/benchmark/installSIONlib.sh
@@ -0,0 +1,8 @@
+wget http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7
+tar -axf 'download.php?version=1.7.7'
+mkdir install
+cd sionlib
+./configure --prefix=/p/project/deepsea/mueller24/linktest/install --disable-fortran # Check/Change Path
+cd build-linux-gomp10-openmpi
+make
+make install
\ No newline at end of file
-- 
GitLab


From 04c03fda706bb1eb154c4a03ee4413f60bce9307 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 6 Oct 2022 12:10:17 +0200
Subject: [PATCH 18/47] remove verbosity

---
 build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.sh b/build.sh
index cd2cb08..5c082d1 100755
--- a/build.sh
+++ b/build.sh
@@ -1,5 +1,5 @@
 # 3. Install linktest in folder install
 mkdir -p install;
 cd benchmark;
-make VERBOSE=1 HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install;
+make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install;
 cd ..;
\ No newline at end of file
-- 
GitLab


From 0a857909fbd0b1404316f421040fb6014d38161d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 6 Oct 2022 12:36:33 +0200
Subject: [PATCH 19/47] Merged gitignore

---
 .gitignore           | 11 +++++++++++
 benchmark/.gitignore | 10 ----------
 2 files changed, 11 insertions(+), 10 deletions(-)
 delete mode 100644 benchmark/.gitignore

diff --git a/.gitignore b/.gitignore
index 78a13cd..ff11029 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,14 @@
 *.egg-info
 install/
 **.ipynb_checkpoints/
+# Ignore generated source files
+benchmark/cuda_kernels.cc
+# Ignore executables
+**/linktest
+**/linktest.mpi
+**/linktest.tcp
+**/linktest.cuda
+**/linktest.psm2
+**/linktest.ucp
+**/linktest.ibverbs
+**/linktest.portals
diff --git a/benchmark/.gitignore b/benchmark/.gitignore
deleted file mode 100644
index fe3166c..0000000
--- a/benchmark/.gitignore
+++ /dev/null
@@ -1,10 +0,0 @@
-# Ignore generated source files
-cuda_kernels.cc
-# Ignore executables
-**/linktest
-**/linktest.mpi
-**/linktest.tcp
-**/linktest.cuda
-**/linktest.psm2
-**/linktest.ucp
-**/linktest.ibverbs
\ No newline at end of file
-- 
GitLab


From f88453710d10b5284f992655234f0360487eb503 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 6 Oct 2022 17:08:54 +0200
Subject: [PATCH 20/47] Fixed me initialization Implemented testPut for
 comparison

---
 benchmark/vcluster_portals.cc | 158 +++++++++++++++++++++++++++-------
 benchmark/vcluster_portals.h  |  13 ++-
 2 files changed, 137 insertions(+), 34 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index bbacd9c..c41dc4d 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -11,6 +11,7 @@
 #include "memory.h"
 #include "error.h"
 #include "timing.h"
+#include <assert.h>
 
 std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
     ptl_process_t physId;
@@ -42,8 +43,8 @@ int VirtualClusterPortals::init()
     if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) {
         warn("Portals versions other than 4.0 may not be suppported");
     }
-    CHECK_RETURNVAL(PtlInit());
-    CHECK_RETURNVAL(PtlNIInit(
+    CHECK_RETURNVAL( PtlInit() );
+    CHECK_RETURNVAL( PtlNIInit(
         PTL_IFACE_DEFAULT, // Manual 3.3.5: "Check README"
         PTL_NI_LOGICAL | PTL_NI_MATCHING, // Logical => using ranks, Matching => using send/recv semantics
         PTL_PID_ANY,
@@ -52,7 +53,9 @@ int VirtualClusterPortals::init()
         &mni_handle));
 
     auto physicalFromRank = getPhysicalFromRank();
-    CHECK_RETURNVAL(PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()));
+    CHECK_RETURNVAL( PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()) );
+    const auto DEFAULT_OPTIONS = 0;
+    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, DEFAULT_OPTIONS, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) );
 
     return SUCCESS;
 }
@@ -70,6 +73,9 @@ int VirtualClusterPortals::finalize()
 }
 
 void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) {
+    // send_ct.success = 0;
+    // send_ct.failure = 0;
+    // CHECK_RETURNVAL( PtlCTSet(md_ct_handle, send_ct) );
     CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md_ct_handle) );
     CHECK_RETURNVAL( PtlEQAlloc(mni_handle, num_msg, &md_eq_handle) );
     md.start  = buf.p();
@@ -78,19 +84,129 @@ void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const in
     md.eq_handle = md_eq_handle;
     md.ct_handle = md_ct_handle;
     CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO
+
+    #if defined(DEBUG_PORTALS)
+    CHECK_RETURNVAL( PtlCTGet(md_ct_handle, &send_ct) );
+    info("Send: before success %d - failure %d", recv_ct.success, recv_ct.failure);
+    #endif
 }
 
 void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
-    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, 0, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) );
+    // recv_ct.success = 0;
+    // recv_ct.failure = 0;
+    // CHECK_RETURNVAL( PtlCTSet(me_ct_handle, recv_ct) );
     CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me_ct_handle) );
     me.start  = buf.p();
     me.length = buf.len();
     me.uid    = PTL_UID_ANY;
-    me.match_id.phys.nid = PTL_NID_ANY;
-    me.match_id.phys.pid = PTL_PID_ANY;
-    me.options = PTL_ME_OP_PUT | PTL_ME_OP_GET | PTL_ME_EVENT_CT_COMM;
+    me.match_id.rank = PTL_RANK_ANY;
+    me.match_bits    = 1; //TDOD why?
+    me.ignore_bits   = 0; //TDOD why?
+    me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
     me.ct_handle = me_ct_handle;
     CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
+
+    #if defined(DEBUG_PORTALS)
+    CHECK_RETURNVAL( PtlCTGet(me_ct_handle, &recv_ct) );
+    info("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure);
+    #endif
+}
+
+void VirtualClusterPortals::recvMessages(const int num_msg) {
+    CHECK_RETURNVAL( PtlCTWait(me_ct_handle, num_msg, &recv_ct) );
+    #if defined(DEBUG_PORTALS)
+    info("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure);
+    #endif
+}
+
+void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing) {
+    ptl_process_t target;
+    target.rank = to;
+    const ptl_size_t localOffset = 0;
+    const ptl_size_t remoteOffset = 0;
+    const ptl_match_bits_t match_bits = 1; //TDOD why?
+    const ptl_hdr_data_t header_data = 0;
+    auto sendTime = walltime();
+    for(auto n = 1; n <= num_msg; n++) {
+        CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
+    }
+    CHECK_RETURNVAL( PtlCTWait(md_ct_handle, num_msg, &send_ct) );
+    sendTime = walltime() - sendTime;
+    #if defined(DEBUG_PORTALS)
+    info("Send: after success %d - failure %d", send_ct.success, send_ct.failure);
+    #endif
+    if(timing) *timing = sendTime;
+}
+
+int VirtualClusterPortals::testPut() 
+{
+    if(not first) return 100;
+    first = false;
+    ptl_handle_ni_t ni_h = this->mni_handle;
+    ptl_pt_index_t  pt_index = this->pt_index;
+    uint64_t        value;
+    ptl_me_t        value_e;
+    ptl_handle_me_t value_e_handle;
+    ptl_md_t        write_md;
+    ptl_handle_md_t write_md_handle;
+    int             num_procs = this->size();
+    ptl_ct_event_t ctc;
+    int rank = this->rank();
+    int ret;
+    ptl_process_t *procs;
+
+    /* This test only succeeds if we have more than one rank */
+    if (num_procs < 2) return 77;
+
+    info("pt_index=%d",pt_index);
+    assert(pt_index == 0);
+
+    if (1 == rank) {
+        value_e.start  = &value;
+        value_e.length = sizeof(uint64_t);
+        value_e.uid    = PTL_UID_ANY;
+        value_e.match_id.rank = PTL_RANK_ANY;
+        value_e.match_bits    = 1;
+        value_e.ignore_bits   = 0;
+        value_e.options = (PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM);
+        CHECK_RETURNVAL(PtlCTAlloc(ni_h, &value_e.ct_handle));
+        CHECK_RETURNVAL(PtlMEAppend(ni_h, 0, &value_e, PTL_PRIORITY_LIST, NULL,
+                               &value_e_handle));
+        value = 0;
+    } else if (0 == rank) {
+        /* set up the landing pad so that I can read others' values */
+        write_md.start     = &value;
+        write_md.length    = sizeof(uint64_t);
+        write_md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
+        write_md.eq_handle = PTL_EQ_NONE;   // i.e. don't queue send events
+        CHECK_RETURNVAL(PtlCTAlloc(ni_h, &write_md.ct_handle));
+        CHECK_RETURNVAL(PtlMDBind(ni_h, &write_md, &write_md_handle));
+
+        value = 0xdeadbeef;
+    }
+
+    barrier();
+
+    /* 0 writes to 1 */
+    if (1 == rank) {
+        /* wait for write to arrive */
+        ret = PtlCTWait(value_e.ct_handle, 1, &ctc);
+        assert(ctc.failure == 0);
+        assert(value == 0xdeadbeef);
+    } else if (0 == rank) {
+        /* write to rank 1 */
+        ptl_process_t peer;
+	peer.rank = 1;
+        CHECK_RETURNVAL(PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, peer,
+                               pt_index, 1, 0, NULL, 0));
+        CHECK_RETURNVAL(PtlCTWait(write_md.ct_handle, 2, &ctc));
+        assert(ctc.failure == 0);
+    }
+
+    barrier();
+
+    info("testPut succeeded");
+    return 0;
 }
 
 int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
@@ -98,33 +214,11 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
     if (rank() == from) prepareSendStructs(buf, num_msg);
     if (rank() == to)   prepareRecvStructs(buf);
     barrier();
-
-    if (rank() == to) {
-        PtlCTWait(me_ct_handle, num_msg, &recv_ct);
-        #if defined(DEBUG_PORTALS)
-        info("Recv: success %d - failure %d", recv_ct.success, recv_ct.failure);
-        #endif
-    }
-    if (rank() == from) {
-        ptl_process_t target;
-        target.rank = to;
-        const ptl_size_t localOffset = 0;
-        const ptl_size_t remoteOffset = 0;
-        const ptl_match_bits_t match_bits = 1; //TDOD why?
-        const ptl_hdr_data_t header_data = 0;
-        auto sendTime = walltime();
-        for(auto n = 1; n <= num_msg; n++) {
-            CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
-        }
-        PtlCTWait(md_ct_handle, num_msg, &send_ct);
-        sendTime = walltime() - sendTime;
-        #if defined(DEBUG_PORTALS)
-        info("Send: success %d - failure %d", send_ct.success, send_ct.failure);
-        #endif
-        if(timing) *timing = sendTime;
-    }
+    if (rank() == from) sendMessages(to, buf, num_msg, timing);
+    if (rank() == to)   recvMessages(num_msg);
 
     return SUCCESS;
+
 }
 
 
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 5acafcb..9e75163 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -59,6 +59,11 @@ private:
     ptl_ni_limits_t mni_limits_actual;
     ptl_handle_ni_t mni_handle;
 
+    /** @brief Portal Table (PT) Index */
+    ptl_pt_index_t  pt_index;
+    /** @brief PT Event Queue Handle  */
+    ptl_handle_eq_t pt_eq_handle;
+
     // Sender only
     /** @brief Memory Descriptor (MD) */
     ptl_md_t md;
@@ -73,8 +78,6 @@ private:
     void prepareSendStructs(const MemoryBuffer& buf, const int num_msg);
 
     // Receiver only
-    /** @brief Portal Table Index */
-    ptl_pt_index_t  pt_index;
     /** @brief Match List Entry (ME) */
     ptl_me_t  me;
     /** @brief ME Handle */
@@ -85,6 +88,12 @@ private:
     ptl_ct_event_t recv_ct;
     void prepareRecvStructs(const MemoryBuffer& buf);
 
+    void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing);
+    void recvMessages(const int num_msg);
+
+    bool first = true;
+    int testPut();
+
 
     std::vector<ptl_process_t> getPhysicalFromRank();
 };
-- 
GitLab


From d859830098301594f28045593d3681ea4bdc36fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 7 Oct 2022 11:49:48 +0200
Subject: [PATCH 21/47] Fixed succes counter reading assumes start at 0

---
 benchmark/vcluster_portals.cc | 33 ++++++++++++++++++---------------
 benchmark/vcluster_portals.h  |  8 ++++----
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index c41dc4d..6efbe63 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -72,7 +72,7 @@ int VirtualClusterPortals::finalize()
     return SUCCESS;
 }
 
-void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) {
+ptl_size_t VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) {
     // send_ct.success = 0;
     // send_ct.failure = 0;
     // CHECK_RETURNVAL( PtlCTSet(md_ct_handle, send_ct) );
@@ -84,14 +84,14 @@ void VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const in
     md.eq_handle = md_eq_handle;
     md.ct_handle = md_ct_handle;
     CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO
-
-    #if defined(DEBUG_PORTALS)
     CHECK_RETURNVAL( PtlCTGet(md_ct_handle, &send_ct) );
-    info("Send: before success %d - failure %d", recv_ct.success, recv_ct.failure);
+    #if defined(DEBUG_PORTALS)
+    info("Send: before success %d - failure %d", send_ct.success, send_ct.failure);
     #endif
+    return send_ct.success;
 }
 
-void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
+ptl_size_t VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
     // recv_ct.success = 0;
     // recv_ct.failure = 0;
     // CHECK_RETURNVAL( PtlCTSet(me_ct_handle, recv_ct) );
@@ -105,21 +105,21 @@ void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
     me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
     me.ct_handle = me_ct_handle;
     CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
-
-    #if defined(DEBUG_PORTALS)
     CHECK_RETURNVAL( PtlCTGet(me_ct_handle, &recv_ct) );
+    #if defined(DEBUG_PORTALS)
     info("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure);
     #endif
+    return recv_ct.success;
 }
 
-void VirtualClusterPortals::recvMessages(const int num_msg) {
-    CHECK_RETURNVAL( PtlCTWait(me_ct_handle, num_msg, &recv_ct) );
+void VirtualClusterPortals::recvMessages(const int num_msg, ptl_size_t start_count) {
+    CHECK_RETURNVAL( PtlCTWait(me_ct_handle, start_count + num_msg, &recv_ct) );
     #if defined(DEBUG_PORTALS)
     info("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure);
     #endif
 }
 
-void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing) {
+void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing) {
     ptl_process_t target;
     target.rank = to;
     const ptl_size_t localOffset = 0;
@@ -130,7 +130,7 @@ void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const
     for(auto n = 1; n <= num_msg; n++) {
         CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
     }
-    CHECK_RETURNVAL( PtlCTWait(md_ct_handle, num_msg, &send_ct) );
+    CHECK_RETURNVAL( PtlCTWait(md_ct_handle, start_count + num_msg, &send_ct) );
     sendTime = walltime() - sendTime;
     #if defined(DEBUG_PORTALS)
     info("Send: after success %d - failure %d", send_ct.success, send_ct.failure);
@@ -211,11 +211,14 @@ int VirtualClusterPortals::testPut()
 
 int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
 {
-    if (rank() == from) prepareSendStructs(buf, num_msg);
-    if (rank() == to)   prepareRecvStructs(buf);
+    ptl_size_t success_count;
+    if (rank() == from) success_count = prepareSendStructs(buf, num_msg);
+    if (rank() == to)   success_count = prepareRecvStructs(buf);
     barrier();
-    if (rank() == from) sendMessages(to, buf, num_msg, timing);
-    if (rank() == to)   recvMessages(num_msg);
+    if (rank() == from) sendMessages(to, buf, num_msg, success_count, timing);
+    if (rank() == to)   recvMessages(num_msg, success_count);
+
+    info("kpingpong ended");
 
     return SUCCESS;
 
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 9e75163..7fb6309 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -75,7 +75,7 @@ private:
     ptl_handle_eq_t md_eq_handle;
     /** @brief Send Counter */
     ptl_ct_event_t send_ct;
-    void prepareSendStructs(const MemoryBuffer& buf, const int num_msg);
+    ptl_size_t prepareSendStructs(const MemoryBuffer& buf, const int num_msg);
 
     // Receiver only
     /** @brief Match List Entry (ME) */
@@ -86,10 +86,10 @@ private:
     ptl_handle_ct_t me_ct_handle;
     /** @brief Receive Counter */
     ptl_ct_event_t recv_ct;
-    void prepareRecvStructs(const MemoryBuffer& buf);
+    ptl_size_t prepareRecvStructs(const MemoryBuffer& buf);
 
-    void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, double* const timing);
-    void recvMessages(const int num_msg);
+    void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing);
+    void recvMessages(const int num_msg, ptl_size_t start_count);
 
     bool first = true;
     int testPut();
-- 
GitLab


From 7ba5fcb6e5dbeeafc9fdbca2f0eb0221c9b31d69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 14 Apr 2023 15:22:47 +0200
Subject: [PATCH 22/47] Updated to ParaStationMPI module

---
 build.sh   |  3 ++-
 loadEnv.sh |  2 +-
 run.sh     | 12 ++++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/build.sh b/build.sh
index 5c082d1..f1726d1 100755
--- a/build.sh
+++ b/build.sh
@@ -1,5 +1,6 @@
 # 3. Install linktest in folder install
+ml GCC ParaStationMPI SIONlib
 mkdir -p install;
 cd benchmark;
-make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install;
+make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install;
 cd ..;
\ No newline at end of file
diff --git a/loadEnv.sh b/loadEnv.sh
index 05a4e2f..0e3a2e1 100644
--- a/loadEnv.sh
+++ b/loadEnv.sh
@@ -7,4 +7,4 @@ source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh
 ## BXI profile
 export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf
 ## SIONlib and linktest
-source loadPath.sh install
\ No newline at end of file
+source ./loadPath.sh install
\ No newline at end of file
diff --git a/run.sh b/run.sh
index 34472ce..a2e8029 100755
--- a/run.sh
+++ b/run.sh
@@ -1,8 +1,10 @@
 #!/bin/sh
 #SBATCH --partition dp-bxi
-#SBATCH --reservation maint-bxi
 #SBATCH --account deepsea
-#SBATCH --nodes 2 
+#SBATCH --nodes 2
+#SBATCH --tasks-per-node 1
+
+ml GCC ParaStationMPI SIONlib
 
 #export PORTALS4_DEBUG=3
 
@@ -12,6 +14,8 @@ srun \
 	install/linktest \
 	--mode portals \
 	--num-warmup-messages 0 \
-	--num-messages 3 \
-	--size-messages $((16)) \
+	--num-messages 1 \
+	--size-messages 16777216 \
+	--serial-tests \
+	--num-slowest 0 \
 	--no-sion-file;
\ No newline at end of file
-- 
GitLab


From b19b86c75a00b6abbd485e6fff2d7079b315a077 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 26 May 2023 12:49:52 +0200
Subject: [PATCH 23/47] Rewrote as in portals-test Implemented kpingpong
 kpingpong synchronizes before send recv Added slow retest workaround for
 global barrier in portals kernel Marked future TODOs/Discussions

---
 benchmark/Makefile            |   3 +-
 benchmark/benchmark.cc        |  45 +++----
 benchmark/benchmark.h         |   2 +
 benchmark/linktest.cc         |   4 +-
 benchmark/vcluster.cc         |   5 +-
 benchmark/vcluster_portals.cc | 213 ++++++++++++++--------------------
 benchmark/vcluster_portals.h  |  21 ++--
 run.sh                        |  13 +--
 8 files changed, 128 insertions(+), 178 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index d10be49..df3b16d 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -29,8 +29,7 @@ GIT_HASH = $(shell git rev-parse --verify HEAD)
 GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
 CC       = mpicxx
 CFLAGS   = -std=c++17 -Wall -g -rdynamic
-CPPFLAGS =	-DDEBUG_PORTALS=True \
-			-D_GNU_SOURCE \
+CPPFLAGS =	-D_GNU_SOURCE \
 			-DLINKTEST_LINUX=1 \
 			-DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \
 			-DGIT_HASH=\"$(GIT_HASH)\" 
diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc
index d913e12..acaa6e3 100644
--- a/benchmark/benchmark.cc
+++ b/benchmark/benchmark.cc
@@ -19,6 +19,7 @@
 #include "environ.h"
 #include "format_units.h"
 #include "format_print.h"
+#include "vcluster_portals.h"
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
@@ -479,38 +480,18 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
 int Benchmark::work_pingpong_serial(const int partner, double* const time_per_msg){
     for (auto i = 0; i < size(); ++i) {
         barrier();
-
         if (i == rank()) {
             EXEC_NOFAIL(kernel(rank(), partner, time_per_msg, false));
-            if ( 0 != i ){
-                EXEC_NOFAIL(cl->send(0, &partner     , 1));
-                EXEC_NOFAIL(cl->send(0,  time_per_msg, 1));
-            }
-        } else if (i == partner) {
-            EXEC_NOFAIL(kernel(partner, rank(), nullptr, false));
-        }
-
-        if (rank() == 0){
-            int buddy;
-            double time;
-            if(likely(0!=i)){
-                EXEC_NOFAIL(cl->recv(i, &buddy, 1));
-                EXEC_NOFAIL(cl->recv(i, &time , 1));
-            }else{
-                buddy=partner;
-                time =*time_per_msg;
-            }
-            std::printf("%6d->%6d: %ss (%sB/s) (l=%d)\n",
+            std::printf("%6d->%6d: %ss (%sB/s)\n",
                 i, 
-                buddy,
-                UnitPrefix::SI_prefix(time, 12).get(), 
-                UnitPrefix::IEC_prefix(args->len_msg/time, 10).get(), 
-                i
+                partner,
+                UnitPrefix::SI_prefix(*time_per_msg, 12).get(), 
+                UnitPrefix::IEC_prefix(args->len_msg / *time_per_msg, 10).get()
             );
-            std::fflush(stdout);
+        } else if (i == partner) {
+            EXEC_NOFAIL(kernel(partner, rank(), nullptr, false));
         }
     }
-
     barrier();
 
     return SUCCESS;
@@ -533,7 +514,6 @@ int Benchmark::work_pingpong(const int step,double* const min_time,double* const
     } else {
         EXEC_IFFAIL(work_pingpong_serial(partner, &time_per_msg), fatal("work_pingpong_serial failed."));
     }
-
     stats->accesspattern[partner] = step + 1; // In SION file steps are numbered starting with 1.
     stats->ptimings[partner] = time_per_msg;
 
@@ -649,14 +629,21 @@ int Benchmark::gather_slow_pairs(struct slow_pair* const sp,const int n){
 }
 
 int Benchmark::retest_one_slow_pair(const int from,const int to, double* const time){
+    debug("Benchmark::retest_one_slow_pair(%d, %d)",from, to);
     double tv;
-
     barrier();
 
     if ((from == rank()) || (to == rank())) {
         EXEC_NOFAIL(kernel(from, to, &tv, false));
+    } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels
+        if(cl->nameRef() == VirtualClusterPortals::NAME) {
+            // 2 per kernel, warup kernel and measuring kernel
+            barrier();
+            barrier();
+            barrier();
+            barrier();
+        }
     }
-
     barrier();
 
     if (0 != from) {
diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h
index 2c68f8f..feaabc1 100644
--- a/benchmark/benchmark.h
+++ b/benchmark/benchmark.h
@@ -15,6 +15,7 @@
 #include <random>
 #include "stats.h"
 #include "slow_pairs.h"
+#include "error.h"
 
 #if HAVE_CUDA == 1
     #include "gpu_nvidia.h"
@@ -38,6 +39,7 @@ namespace linktest{
             Benchmark() = default;
             Benchmark(const Benchmark&) = delete;
             Benchmark(Benchmark&&) = delete;
+            ~Benchmark() = default;
             int  main_cmdline();
             int  benchmark(); // Run the main benchmark
             [[nodiscard]] int                    rank()           const;
diff --git a/benchmark/linktest.cc b/benchmark/linktest.cc
index 16070dd..1282211 100644
--- a/benchmark/linktest.cc
+++ b/benchmark/linktest.cc
@@ -16,6 +16,7 @@
 #include "system.h"
 #include <thread>
 #include <memory>
+#include <iostream>
 
 void print_linktest_version()
 {
@@ -90,7 +91,6 @@ int main(int argc, char *argv[]){
         error("Failed to execute benchmark.");
         return ERROR;
     }
-
     /* Finalize Benchmark */
     if (unlikely(bench.cl->finalize())) {
         error("Failed to finalize communication operations.");
@@ -103,5 +103,5 @@ int main(int argc, char *argv[]){
      * until the very end.
      */
 
-     return SUCCESS;
+    return SUCCESS;
 }
diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc
index bcc8fe5..d5a9a29 100644
--- a/benchmark/vcluster.cc
+++ b/benchmark/vcluster.cc
@@ -241,7 +241,7 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to,
                                        const struct linktest_args* const args,
                                        double* const time){
     int err;
-
+    info("kpingpong from %d to %d", from, to);
     // Warmup
     if(args->num_warmup_msg!=0){ //No use doing this if there are no warm-up messages.
         double dummy;
@@ -250,8 +250,9 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to,
                       &dummy);
         if(unlikely(err))return ERROR;
     }
-
+    info("kpingpong warmup complete");
     err=kpingpong(from,to,buf,args->num_msg,time);
+    info("kpingpong kernel complete");
     if(unlikely(err))return err;
     if(args->check_buffers){
         if(unlikely(buf.check())){
diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 6efbe63..656b915 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -11,6 +11,7 @@
 #include "memory.h"
 #include "error.h"
 #include "timing.h"
+#include "stopwatch.h"
 #include <assert.h>
 
 std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
@@ -18,7 +19,7 @@ std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
     CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) );
 
     #if defined(DEBUG_PORTALS)
-    info("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d",
+    debug("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d",
         rank(),
         hostname().c_str(),
         physId.phys.nid,
@@ -37,7 +38,7 @@ std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
 
 int VirtualClusterPortals::init()
 {
-    set_helper_pointer(VirtualCluster::factory("tcp"));
+    set_helper_pointer(VirtualCluster::factory("mpi"));
     EXEC_NOFAIL(helper_->init());
     
     if(PTL_MAJOR_VERSION != 4 || PTL_MINOR_VERSION != 0) {
@@ -55,171 +56,135 @@ int VirtualClusterPortals::init()
     auto physicalFromRank = getPhysicalFromRank();
     CHECK_RETURNVAL( PtlSetMap(mni_handle, physicalFromRank.size(), physicalFromRank.data()) );
     const auto DEFAULT_OPTIONS = 0;
-    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, DEFAULT_OPTIONS, PTL_EQ_NONE, PTL_PT_ANY, &pt_index) );
+    CHECK_RETURNVAL( PtlEQAlloc(mni_handle, 1000, &pt_eq_handle) );
+    CHECK_RETURNVAL( PtlPTAlloc(mni_handle, DEFAULT_OPTIONS, pt_eq_handle, PTL_PT_ANY, &pt_index) );
 
     return SUCCESS;
 }
 
 int VirtualClusterPortals::finalize()
 {
-    auto ret = PtlNIFini(mni_handle);
+    CHECK_RETURNVAL( PtlPTFree(mni_handle, pt_index) );
+    CHECK_RETURNVAL( PtlNIFini(mni_handle) );
     PtlFini();
-    if (ret != PTL_OK) {
-        fatal("Portals failed to finalize");
-        return ERROR;
-    }
-    this->helper_->finalize();
+    EXEC_NOFAIL(helper_->finalize());
     return SUCCESS;
 }
 
-ptl_size_t VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf, const int num_msg) {
-    // send_ct.success = 0;
-    // send_ct.failure = 0;
-    // CHECK_RETURNVAL( PtlCTSet(md_ct_handle, send_ct) );
-    CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md_ct_handle) );
-    CHECK_RETURNVAL( PtlEQAlloc(mni_handle, num_msg, &md_eq_handle) );
+void  VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf) {
     md.start  = buf.p();
     md.length = buf.len();
-    md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
-    md.eq_handle = md_eq_handle;
-    md.ct_handle = md_ct_handle;
-    CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor, aka. ??? TODO
-    CHECK_RETURNVAL( PtlCTGet(md_ct_handle, &send_ct) );
-    #if defined(DEBUG_PORTALS)
-    info("Send: before success %d - failure %d", send_ct.success, send_ct.failure);
-    #endif
-    return send_ct.success;
+    md.options   = PTL_MD_EVENT_CT_ACK;
+    md.eq_handle = PTL_EQ_NONE;   // i.e. don't queue send events
+    CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &md.ct_handle) ); 
+    CHECK_RETURNVAL( PtlMDBind(mni_handle, &md, &md_handle) ); // Bind memory descriptor
 }
 
-ptl_size_t VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
-    // recv_ct.success = 0;
-    // recv_ct.failure = 0;
-    // CHECK_RETURNVAL( PtlCTSet(me_ct_handle, recv_ct) );
-    CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me_ct_handle) );
+void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
     me.start  = buf.p();
     me.length = buf.len();
     me.uid    = PTL_UID_ANY;
     me.match_id.rank = PTL_RANK_ANY;
-    me.match_bits    = 1; //TDOD why?
-    me.ignore_bits   = 0; //TDOD why?
-    me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM;
-    me.ct_handle = me_ct_handle;
-    CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle) ); // Append match entry, aka. allow puts to buf from anyone
-    CHECK_RETURNVAL( PtlCTGet(me_ct_handle, &recv_ct) );
+    me.match_bits    = MATCH_BITS;
+    me.ignore_bits   = IGNORE_BITS;
+    me.options = (PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_COMM_DISABLE ); // React to puts, count communication events, do not generate full communication events
+    CHECK_RETURNVAL( PtlCTAlloc(mni_handle, &me.ct_handle));
+    CHECK_RETURNVAL( PtlMEAppend(mni_handle, pt_index, &me, PTL_PRIORITY_LIST, nullptr, &me_handle));
+    ptl_event_t event;
+    CHECK_RETURNVAL( PtlEQWait(pt_eq_handle, &event) ); // TODO allow PTL_EQ_DROPPED 
+    if (! (event.type == PTL_EVENT_LINK && event.ni_fail_type == PTL_NI_OK)) {
+        // TODO Check for overflow/dropped events
+        error("PtlMEAppend failed");
+    }
+}
+
+void VirtualClusterPortals::recvMessages(const int num_msg) {
+    CHECK_RETURNVAL( PtlCTGet(me.ct_handle, &recv_ct) );
+    const ptl_size_t start_count = recv_ct.success;
+
     #if defined(DEBUG_PORTALS)
-    info("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure);
+    debug("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure);
     #endif
-    return recv_ct.success;
-}
 
-void VirtualClusterPortals::recvMessages(const int num_msg, ptl_size_t start_count) {
-    CHECK_RETURNVAL( PtlCTWait(me_ct_handle, start_count + num_msg, &recv_ct) );
+    CHECK_RETURNVAL( PtlCTWait(me.ct_handle, start_count + static_cast<unsigned long>(num_msg), &recv_ct) );
+
     #if defined(DEBUG_PORTALS)
-    info("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure);
+    debug("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure);
     #endif
 }
 
-void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing) {
+void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg) {
     ptl_process_t target;
     target.rank = to;
     const ptl_size_t localOffset = 0;
     const ptl_size_t remoteOffset = 0;
-    const ptl_match_bits_t match_bits = 1; //TDOD why?
     const ptl_hdr_data_t header_data = 0;
-    auto sendTime = walltime();
+
+    #if defined(DEBUG_PORTALS)
+    CHECK_RETURNVAL( PtlCTGet(md.ct_handle, &send_ct) );
+    const ptl_size_t start_count = send_ct.success;
+    debug("Send: before success %d - failure %d", send_ct.success, send_ct.failure);
+    #endif
+    
     for(auto n = 1; n <= num_msg; n++) {
-        CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, match_bits, remoteOffset, nullptr, header_data) );
+        CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, MATCH_BITS, remoteOffset, nullptr, header_data) );
     }
-    CHECK_RETURNVAL( PtlCTWait(md_ct_handle, start_count + num_msg, &send_ct) );
-    sendTime = walltime() - sendTime;
+
+    // TODO Discuss if checking for send error (and handling) makes sense
     #if defined(DEBUG_PORTALS)
-    info("Send: after success %d - failure %d", send_ct.success, send_ct.failure);
+    CHECK_RETURNVAL( PtlCTWait(md.ct_handle, start_count + static_cast<unsigned long>(num_msg), &send_ct) );
+    debug("Send: after success %d - failure %d", send_ct.success, send_ct.failure);
     #endif
-    if(timing) *timing = sendTime;
 }
 
-int VirtualClusterPortals::testPut() 
-{
-    if(not first) return 100;
-    first = false;
-    ptl_handle_ni_t ni_h = this->mni_handle;
-    ptl_pt_index_t  pt_index = this->pt_index;
-    uint64_t        value;
-    ptl_me_t        value_e;
-    ptl_handle_me_t value_e_handle;
-    ptl_md_t        write_md;
-    ptl_handle_md_t write_md_handle;
-    int             num_procs = this->size();
-    ptl_ct_event_t ctc;
-    int rank = this->rank();
-    int ret;
-    ptl_process_t *procs;
-
-    /* This test only succeeds if we have more than one rank */
-    if (num_procs < 2) return 77;
-
-    info("pt_index=%d",pt_index);
-    assert(pt_index == 0);
-
-    if (1 == rank) {
-        value_e.start  = &value;
-        value_e.length = sizeof(uint64_t);
-        value_e.uid    = PTL_UID_ANY;
-        value_e.match_id.rank = PTL_RANK_ANY;
-        value_e.match_bits    = 1;
-        value_e.ignore_bits   = 0;
-        value_e.options = (PTL_ME_OP_PUT | PTL_ME_EVENT_CT_COMM);
-        CHECK_RETURNVAL(PtlCTAlloc(ni_h, &value_e.ct_handle));
-        CHECK_RETURNVAL(PtlMEAppend(ni_h, 0, &value_e, PTL_PRIORITY_LIST, NULL,
-                               &value_e_handle));
-        value = 0;
-    } else if (0 == rank) {
-        /* set up the landing pad so that I can read others' values */
-        write_md.start     = &value;
-        write_md.length    = sizeof(uint64_t);
-        write_md.options   = PTL_MD_EVENT_CT_SEND | PTL_MD_EVENT_CT_ACK;
-        write_md.eq_handle = PTL_EQ_NONE;   // i.e. don't queue send events
-        CHECK_RETURNVAL(PtlCTAlloc(ni_h, &write_md.ct_handle));
-        CHECK_RETURNVAL(PtlMDBind(ni_h, &write_md, &write_md_handle));
-
-        value = 0xdeadbeef;
-    }
-
-    barrier();
-
-    /* 0 writes to 1 */
-    if (1 == rank) {
-        /* wait for write to arrive */
-        ret = PtlCTWait(value_e.ct_handle, 1, &ctc);
-        assert(ctc.failure == 0);
-        assert(value == 0xdeadbeef);
-    } else if (0 == rank) {
-        /* write to rank 1 */
-        ptl_process_t peer;
-	peer.rank = 1;
-        CHECK_RETURNVAL(PtlPut(write_md_handle, 0, sizeof(uint64_t), PTL_CT_ACK_REQ, peer,
-                               pt_index, 1, 0, NULL, 0));
-        CHECK_RETURNVAL(PtlCTWait(write_md.ct_handle, 2, &ctc));
-        assert(ctc.failure == 0);
-    }
-
-    barrier();
+void VirtualClusterPortals::releaseRecvStructs() {
+    CHECK_RETURNVAL( PtlMEUnlink(me_handle) );
+    CHECK_RETURNVAL( PtlCTFree(me.ct_handle) );
+};
 
-    info("testPut succeeded");
-    return 0;
-}
+void VirtualClusterPortals::releaseSendStructs() {
+    CHECK_RETURNVAL( PtlMDRelease(md_handle) );
+    CHECK_RETURNVAL( PtlCTFree(md.ct_handle) );
+};
 
 int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer& buf, const int num_msg, double* const timing)
 {
-    ptl_size_t success_count;
-    if (rank() == from) success_count = prepareSendStructs(buf, num_msg);
-    if (rank() == to)   success_count = prepareRecvStructs(buf);
+    bool isSender = rank() == from;
+    bool isReceiver = rank() == to;
+
+    auto watch = Stopwatchfactory::getRankWatch(rank(), from);
+    debug("kpingpong: prepareSendStructs");
+    prepareSendStructs(buf);
+    debug("kpingpong: prepareRecvStructs");
+    prepareRecvStructs(buf);
     barrier();
-    if (rank() == from) sendMessages(to, buf, num_msg, success_count, timing);
-    if (rank() == to)   recvMessages(num_msg, success_count);
 
-    info("kpingpong ended");
+    if(isSender) {
+        watch->start();
+        debug("kpingpong: send");
+        sendMessages(to, buf, num_msg);
+        debug("kpingpong: recv");
+        recvMessages(num_msg);
+        watch->stop();
+    }
+    if(isReceiver) {
+        watch->start();
+        debug("kpingpong: recv");
+        recvMessages(num_msg);
+        debug("kpingpong: send");
+        sendMessages(from, buf, num_msg);
+        watch->stop();
+    }
 
+    barrier();
+    debug("kpingpong: releaseRecvStructs");
+    releaseRecvStructs();
+    debug("kpingpong: releaseSendStructs");
+    releaseSendStructs();
+    if(timing) { 
+        *timing = watch->getDuration().count() / (2*num_msg);
+    }
+    
     return SUCCESS;
 
 }
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 7fb6309..abc02c5 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -38,7 +38,7 @@ public:
                                 MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
                                 const int num_msg, double* const timing,
                                 const bool doBarrier) override {
-        throw("Not Implemented");
+        throw("Not Implemented"); // Use PTL_ME_MANAGE_LOCAL
     };
     int kUniDirLimitedMultiBuf(const int from,const int to,
                                        MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
@@ -64,32 +64,31 @@ private:
     /** @brief PT Event Queue Handle  */
     ptl_handle_eq_t pt_eq_handle;
 
+    const ptl_match_bits_t MATCH_BITS = 1; // TODO when/how to use these?
+    const ptl_match_bits_t IGNORE_BITS = ~0; // ignore all bits
+
     // Sender only
     /** @brief Memory Descriptor (MD) */
     ptl_md_t md;
     /** @brief MD Handle */
     ptl_handle_md_t md_handle;
-    /** @brief MD Event Counter Handle */
-    ptl_handle_ct_t md_ct_handle;
-    /** @brief MD Event Queue Handle */
-    ptl_handle_eq_t md_eq_handle;
     /** @brief Send Counter */
     ptl_ct_event_t send_ct;
-    ptl_size_t prepareSendStructs(const MemoryBuffer& buf, const int num_msg);
+    void prepareSendStructs(const MemoryBuffer& buf);
+    void releaseSendStructs();
 
     // Receiver only
     /** @brief Match List Entry (ME) */
     ptl_me_t  me;
     /** @brief ME Handle */
     ptl_handle_me_t me_handle;
-    /** @brief ME Event Counter Handle */
-    ptl_handle_ct_t me_ct_handle;
     /** @brief Receive Counter */
     ptl_ct_event_t recv_ct;
-    ptl_size_t prepareRecvStructs(const MemoryBuffer& buf);
+    void prepareRecvStructs(const MemoryBuffer& buf);
+    void releaseRecvStructs();
 
-    void sendMessages(const int to, MemoryBuffer& buf, const int num_msg, ptl_size_t start_count, double* const timing);
-    void recvMessages(const int num_msg, ptl_size_t start_count);
+    void sendMessages(const int to, MemoryBuffer& buf, const int num_msg);
+    void recvMessages(const int num_msg);
 
     bool first = true;
     int testPut();
diff --git a/run.sh b/run.sh
index a2e8029..bb7818c 100755
--- a/run.sh
+++ b/run.sh
@@ -1,21 +1,18 @@
 #!/bin/sh
 #SBATCH --partition dp-bxi
 #SBATCH --account deepsea
-#SBATCH --nodes 2
+#SBATCH --nodes 4
 #SBATCH --tasks-per-node 1
 
 ml GCC ParaStationMPI SIONlib
 
 #export PORTALS4_DEBUG=3
 
-srun \
---label \
---ntasks 2 \
-	install/linktest \
+srun install/linktest \
 	--mode portals \
-	--num-warmup-messages 0 \
-	--num-messages 1 \
+	--num-warmup-messages 3 \
+	--num-messages 100 \
 	--size-messages 16777216 \
 	--serial-tests \
-	--num-slowest 0 \
+	--num-slowest 1 \
 	--no-sion-file;
\ No newline at end of file
-- 
GitLab


From 7255cb00d4400d1005177217e8ee1733a8b81594 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 21 Jun 2023 11:57:50 +0200
Subject: [PATCH 24/47] Added report level control

---
 benchmark/error.cc | 20 ++++++++++++++++----
 benchmark/error.h  | 11 +++++++++++
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/benchmark/error.cc b/benchmark/error.cc
index aaaae93..8915d20 100644
--- a/benchmark/error.cc
+++ b/benchmark/error.cc
@@ -28,54 +28,66 @@ static void report(const char* prefix, const char* file,
 
 void linktest_fatal(const char* file, const char* func, long line, const char* fmt, ...)
 {
+    #if REPORT_LEVEL >= REPORT_FATAL
     va_list vl;
 
     va_start(vl, fmt);
     report("fatal: ", file, func, line, fmt, vl);
     va_end(vl);
 
-    std::fflush(NULL);
+    std::fflush(nullptr);
+    #endif
     std::terminate();
 }
 
 void linktest_error(const char* file, const char* func, long line, const char* fmt, ...)
 {
+    #if REPORT_LEVEL >= REPORT_ERROR
     va_list vl;
 
     va_start(vl, fmt);
     report("error: ", file, func, line, fmt, vl);
     va_end(vl);
 
-    std::fflush(NULL);
+    std::fflush(nullptr);
+    #endif
 }
 
 void linktest_warn(const char* file, const char* func, long line, const char* fmt, ...)
 {
+    #if REPORT_LEVEL >= REPORT_WARN
     va_list vl;
 
     va_start(vl, fmt);
     report("warning: ", file, func, line, fmt, vl);
     va_end(vl);
 
-    std::fflush(NULL);
+    std::fflush(nullptr);
+    #endif
 }
 
 void linktest_info(const char* file, const char* func, long line, const char* fmt, ...)
 {
+    #if REPORT_LEVEL >= REPORT_INFO
     va_list vl;
 
     va_start(vl, fmt);
     report("info: ", file, func, line, fmt, vl);
     va_end(vl);
+
+    std::fflush(nullptr);
+    #endif
 }
 
 void linktest_debug(const char* file, const char* func, long line, const char* fmt, ...)
 {
+    #if REPORT_LEVEL >= REPORT_DEBUG
     va_list vl;
 
     va_start(vl, fmt);
     report("debug: ", file, func, line, fmt, vl);
     va_end(vl);
 
-    std::fflush(NULL);
+    std::fflush(nullptr);
+    #endif
 }
\ No newline at end of file
diff --git a/benchmark/error.h b/benchmark/error.h
index 92160ac..2807e34 100644
--- a/benchmark/error.h
+++ b/benchmark/error.h
@@ -9,6 +9,17 @@
 #ifndef LINKTEST_ERROR_H
 #define LINKTEST_ERROR_H
 
+#define REPORT_NONE 0
+#define REPORT_FATAL 1
+#define REPORT_ERROR 2
+#define REPORT_WARN 3
+#define REPORT_INFO 4
+#define REPORT_DEBUG 5
+
+#ifndef REPORT_LEVEL
+#define REPORT_LEVEL REPORT_WARN
+#endif
+
 constexpr int SUCCESS = 0;
 constexpr int ERROR   = 1;
 
-- 
GitLab


From 45bbc3726fc6baed77fd07560cdbf97b5c501728 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 22 Jun 2023 13:16:58 +0200
Subject: [PATCH 25/47] Added DEBUG_BARRIER

---
 benchmark/Makefile           | 3 ++-
 benchmark/vcluster.cc        | 4 +---
 benchmark/vcluster_helper.cc | 7 ++++++-
 benchmark/vcluster_mpi.cc    | 7 ++++++-
 4 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index df3b16d..8e8e07b 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -29,7 +29,8 @@ GIT_HASH = $(shell git rev-parse --verify HEAD)
 GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
 CC       = mpicxx
 CFLAGS   = -std=c++17 -Wall -g -rdynamic
-CPPFLAGS =	-D_GNU_SOURCE \
+CPPFLAGS =	-DREPORT_LEVEL=5 \
+			-D_GNU_SOURCE \
 			-DLINKTEST_LINUX=1 \
 			-DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \
 			-DGIT_HASH=\"$(GIT_HASH)\" 
diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc
index d5a9a29..b334bde 100644
--- a/benchmark/vcluster.cc
+++ b/benchmark/vcluster.cc
@@ -241,7 +241,7 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to,
                                        const struct linktest_args* const args,
                                        double* const time){
     int err;
-    info("kpingpong from %d to %d", from, to);
+    debug("VirtualCluster::linktest_kpingpong from %d to %d", from, to);
     // Warmup
     if(args->num_warmup_msg!=0){ //No use doing this if there are no warm-up messages.
         double dummy;
@@ -250,9 +250,7 @@ int VirtualCluster::linktest_kpingpong(const int from, const int to,
                       &dummy);
         if(unlikely(err))return ERROR;
     }
-    info("kpingpong warmup complete");
     err=kpingpong(from,to,buf,args->num_msg,time);
-    info("kpingpong kernel complete");
     if(unlikely(err))return err;
     if(args->check_buffers){
         if(unlikely(buf.check())){
diff --git a/benchmark/vcluster_helper.cc b/benchmark/vcluster_helper.cc
index c284fa9..28bd22e 100644
--- a/benchmark/vcluster_helper.cc
+++ b/benchmark/vcluster_helper.cc
@@ -32,11 +32,16 @@ std::string determineHostname(){
 int determineCPUID(){
     return (std::int32_t)sched_getcpu();
 }
-
+#ifdef DEBUG_BARRIER
+static int barrierCounter=1;
+#endif
 int vcluster_helper_barrier(VirtualCluster* cl){
     /* We do not actually send data but we still need to have
      * a non-NULL buffer pointer
      */
+    #ifdef DEBUG_BARRIER
+    info("vcluster_helper_barrier %d", barrierCounter++);
+    #endif
     char sp = 0;
     MemoryBuffer buf = MemoryBuffer::wrap<char>(&sp, 0, AddressSpace::ID::Local);
 
diff --git a/benchmark/vcluster_mpi.cc b/benchmark/vcluster_mpi.cc
index b481925..e7efc78 100644
--- a/benchmark/vcluster_mpi.cc
+++ b/benchmark/vcluster_mpi.cc
@@ -133,9 +133,14 @@ int VirtualClusterMPI::recv(int src, MemoryBuffer& buf)
                   src, 0, world_,
                   MPI_STATUS_IGNORE));
 }
-
+#ifdef DEBUG_BARRIER
+static int counter = 1;
+#endif
 int VirtualClusterMPI::barrier()
 {
+    #ifdef DEBUG_BARRIER
+    debug("VirtualClusterMPI::barrier %d",counter++);
+    #endif
     return _mpi_(MPI_Barrier(world_));
 }
 
-- 
GitLab


From 8fe76cceaa6266c4dd477c95268177d6814dd6f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 22 Jun 2023 14:36:56 +0200
Subject: [PATCH 26/47] Fix some weird indentations

---
 benchmark/cmdline.cc  | 2 +-
 benchmark/linktest.cc | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/benchmark/cmdline.cc b/benchmark/cmdline.cc
index 92063a0..9b51db3 100644
--- a/benchmark/cmdline.cc
+++ b/benchmark/cmdline.cc
@@ -467,7 +467,7 @@ static bool special_cmdline_args(const std::string& prog, const std::vector<std:
         }
     }
 
-        return false;
+    return false;
 }
 
 static bool arg_match(const std::string& arg, const Argument& argdef){
diff --git a/benchmark/linktest.cc b/benchmark/linktest.cc
index 1282211..1968a08 100644
--- a/benchmark/linktest.cc
+++ b/benchmark/linktest.cc
@@ -20,10 +20,10 @@
 
 void print_linktest_version()
 {
-        std::fprintf(stderr, "LinkTest (version %d.%d.%d)\n",
-                VERSION_MAJOR,
-                VERSION_MINOR,
-                VERSION_PATCH);
+    std::fprintf(stderr, "LinkTest (version %d.%d.%d)\n",
+        VERSION_MAJOR,
+        VERSION_MINOR,
+        VERSION_PATCH);
 }
 
 /* Errors are propagated up the backtrace as far as possible until
-- 
GitLab


From af388a956fa061e2fbd2f93f93cc1b559414a6a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 27 Jun 2023 13:53:05 +0200
Subject: [PATCH 27/47] Turned of debug messages

---
 benchmark/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index 8e8e07b..df3b16d 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -29,8 +29,7 @@ GIT_HASH = $(shell git rev-parse --verify HEAD)
 GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
 CC       = mpicxx
 CFLAGS   = -std=c++17 -Wall -g -rdynamic
-CPPFLAGS =	-DREPORT_LEVEL=5 \
-			-D_GNU_SOURCE \
+CPPFLAGS =	-D_GNU_SOURCE \
 			-DLINKTEST_LINUX=1 \
 			-DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \
 			-DGIT_HASH=\"$(GIT_HASH)\" 
-- 
GitLab


From e0da49d0c378328fff4c5e6b997bad736b9e3dcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 7 Jul 2023 16:59:00 +0200
Subject: [PATCH 28/47] Implemented kUnidir Done Implemented kbipingpong WIP

---
 benchmark/benchmark.cc        |  2 +-
 benchmark/vcluster_portals.cc | 90 +++++++++++++++++++++++++++++++++++
 benchmark/vcluster_portals.h  |  9 ++--
 3 files changed, 94 insertions(+), 7 deletions(-)

diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc
index acaa6e3..182c1a5 100644
--- a/benchmark/benchmark.cc
+++ b/benchmark/benchmark.cc
@@ -637,7 +637,7 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t
         EXEC_NOFAIL(kernel(from, to, &tv, false));
     } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels
         if(cl->nameRef() == VirtualClusterPortals::NAME) {
-            // 2 per kernel, warup kernel and measuring kernel
+            // 2 per kernel, warmup kernel and measuring kernel
             barrier();
             barrier();
             barrier();
diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 656b915..7ce01fe 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -185,6 +185,96 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
         *timing = watch->getDuration().count() / (2*num_msg);
     }
     
+    return SUCCESS;
+}
+
+int VirtualClusterPortals::kUniDir(
+    const int from, const int to,
+    MemoryBuffer& buf1, MemoryBuffer& buf2,
+    const int num_msg, double* const timing,
+    const bool doBarrier)
+{
+    bool isSender = rank() == from;
+    bool isReceiver = rank() == to;
+
+    auto watch = Stopwatchfactory::getRankWatch(rank(), from);
+    debug("kUnidir: prepareSendStructs");
+    prepareSendStructs(buf1);
+    debug("kUnidir: prepareRecvStructs");
+    prepareRecvStructs(buf2);
+    barrier();
+
+    if(isSender) {
+        watch->start();
+        debug("kUnidir: send");
+        sendMessages(to, buf1, num_msg);
+        debug("kUnidir: recv");
+        recvMessages(1);
+        watch->stop();
+    }
+    if(isReceiver) {
+        watch->start();
+        debug("kUnidir: recv");
+        recvMessages(num_msg);
+        debug("kUnidir: send");
+        sendMessages(from, buf1, 1);
+        watch->stop();
+    }
+
+    barrier();
+    debug("kUnidir: releaseRecvStructs");
+    releaseRecvStructs();
+    debug("kUnidir: releaseSendStructs");
+    releaseSendStructs();
+    if(timing) { 
+        *timing = watch->getDuration().count() / num_msg;
+    }
+    
+    return SUCCESS;
+
+}
+
+int VirtualClusterPortals::kbipingpong(
+    const int from, const int to,
+    MemoryBuffer& buf1, MemoryBuffer& buf2,
+    const int num_msg, double* const timing)
+{
+    bool isSender = rank() == from;
+    bool isReceiver = rank() == to;
+
+    auto watch = Stopwatchfactory::getRankWatch(rank(), from);
+    debug("kbipingpong: prepareSendStructs");
+    prepareSendStructs(buf1);
+    debug("kbipingpong: prepareRecvStructs");
+    prepareRecvStructs(buf2);
+    barrier();
+
+    if(isSender) {
+        watch->start();
+        debug("kbipingpong: send");
+        sendMessages(to, buf1, num_msg);
+        debug("kbipingpong: recv");
+        recvMessages(num_msg);
+        watch->stop();
+    }
+    if(isReceiver) {
+        watch->start();
+        debug("kbipingpong: recv");
+        sendMessages(from, buf1, num_msg);
+        debug("kbipingpong: send");
+        recvMessages(num_msg);
+        watch->stop();
+    }
+
+    barrier();
+    debug("kbipingpong: releaseRecvStructs");
+    releaseRecvStructs();
+    debug("kbipingpong: releaseSendStructs");
+    releaseSendStructs();
+    if(timing) { 
+        *timing = watch->getDuration().count() / (2.0 * num_msg);
+    }
+    
     return SUCCESS;
 
 }
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index abc02c5..f58bbb2 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -31,9 +31,8 @@ public:
     int kUniDir(const int from, const int to,
                         MemoryBuffer& buf1, MemoryBuffer& buf2,
                         const int num_msg, double* const timing,
-                        const bool doBarrier) override {
-        throw("Not Implemented");
-    };
+                        const bool doBarrier) override;
+                        
     int kUniDirMultiBuf(const int from,const int to,
                                 MemoryBufferMulti& buf_multi, MemoryBuffer& buf2,
                                 const int num_msg, double* const timing,
@@ -49,9 +48,7 @@ public:
 
     int kbipingpong(const int from, const int to,
                             MemoryBuffer& buf1, MemoryBuffer& buf2,
-                            const int num_msg, double* const timing) override {
-        throw("Not Implemented");
-    };
+                            const int num_msg, double* const timing) override;
 
 private:
     // matching (send/recv) Network Interface (ni)
-- 
GitLab


From c4179376c7bf779380e86d73eec3c53512467ea6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 11 Jul 2023 15:14:55 +0200
Subject: [PATCH 29/47] Fixed synchronization missing in pingpong_serial

---
 benchmark/benchmark.cc        | 27 ++++++++++++++++++++++++++-
 benchmark/output_sion.cc      |  5 ++++-
 benchmark/vcluster_helper.cc  |  2 +-
 benchmark/vcluster_portals.cc | 25 ++++++-------------------
 4 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc
index 182c1a5..8f9d85f 100644
--- a/benchmark/benchmark.cc
+++ b/benchmark/benchmark.cc
@@ -100,7 +100,7 @@ int Benchmark::kernel(const int from, const int to, double* const time_per_msg,
         return cl->linktest_kbipingpong(from, to, *buf1, *buf2, args, time_per_msg);
     } else if (args->do_unidir) {
         if (args->use_multi_buf) {
-            auto M=((args->num_msg>args->num_warmup_msg)?args->num_msg:args->num_warmup_msg);
+            auto M = std::max(args->num_msg, args->num_warmup_msg);
             if(args->num_multi_buf==M){
                 return cl->linktest_kUniDirMultiBuf(from, to, *buf_multi, *buf2, args, time_per_msg, doBarrier);
             }else{
@@ -398,6 +398,7 @@ int Benchmark::printIterationResults(const int iter){
         }
     }
 
+    debug("Benchmark::printIterationResults->barrier()");
     EXEC_NOFAIL(cl->barrier());
 
     return SUCCESS;
@@ -438,6 +439,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
     auto from = (sign < 0) ? partner : rank();
     auto to   = (sign < 0) ? rank() : partner;
 
+    debug("Benchmark::work_pingpong_parallel 1->barrier()");
     barrier();
 #ifdef DEBUG_KERNEL_SYNCHRONIZATION
     std::unique_ptr<StopwatchI> rootWatch = Stopwatchfactory::getRootWatch(rank());
@@ -449,6 +451,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
     rootWatch->stop();
     tBeforeBarrier=rootWatch->getDuration();
 #endif
+    debug("Benchmark::work_pingpong_parallel 2->barrier()");
     barrier();
 #ifdef DEBUG_KERNEL_SYNCHRONIZATION
     rootWatch->stop();
@@ -462,6 +465,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
     rootWatch->stop();
     tBeforeBarrier=rootWatch->getDuration();
 #endif
+    debug("Benchmark::work_pingpong_parallel 3->barrier()");
     barrier();
 #ifdef DEBUG_KERNEL_SYNCHRONIZATION
     rootWatch->stop();
@@ -479,6 +483,7 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
  */
 int Benchmark::work_pingpong_serial(const int partner, double* const time_per_msg){
     for (auto i = 0; i < size(); ++i) {
+        debug("Benchmark::work_pingpong_serial 1->barrier()");
         barrier();
         if (i == rank()) {
             EXEC_NOFAIL(kernel(rank(), partner, time_per_msg, false));
@@ -490,8 +495,21 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms
             );
         } else if (i == partner) {
             EXEC_NOFAIL(kernel(partner, rank(), nullptr, false));
+        } else {
+            if(cl->nameRef() == VirtualClusterPortals::NAME) {
+                // 2 per kernel, warmup kernel and measuring kernel
+                debug("Benchmark::work_pingpong_serial 2->barrier()");
+                barrier();
+                debug("Benchmark::work_pingpong_serial 3->barrier()");
+                barrier();
+                debug("Benchmark::work_pingpong_serial 4->barrier()");
+                barrier();
+                debug("Benchmark::work_pingpong_serial 5->barrier()");
+                barrier();
+            }
         }
     }
+    debug("Benchmark::work_pingpong_serial 6->barrier()");
     barrier();
 
     return SUCCESS;
@@ -549,6 +567,7 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain
     double runningSumBandwidth=0.0;
 
     /* Execute all-to-all test if desired */
+    debug("Benchmark::run_iteration 1->barrier()");
     barrier();
     if (args->do_alltoall){
         EXEC_NOFAIL(work_alltoall());
@@ -570,6 +589,7 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain
             if(!args->do_serial) std::printf(fmt[!!args->do_serial], nDigits, step+1); //Start print out early so user knows step has started
             std::fflush(stdout);
             const double stepStartTime = walltime();
+            debug("Benchmark::run_iteration 2->barrier()");
             barrier();
             EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth));
             *tWork += (walltime() - stepStartTime);
@@ -579,12 +599,14 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain
         }
     }else{
         for (unsigned int step: stepPermutation) {
+            debug("Benchmark::run_iteration 3->barrier()");
             barrier();
             EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth));
         }
     }
 
     /* Execute all-to-all test if desired */
+    debug("Benchmark::run_iteration 4->barrier()");
     barrier();
     if (args->do_alltoall){
         EXEC_NOFAIL(work_alltoall());
@@ -638,12 +660,14 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t
     } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels
         if(cl->nameRef() == VirtualClusterPortals::NAME) {
             // 2 per kernel, warmup kernel and measuring kernel
+            debug("Benchmark::retest_one_slow_pair 1,2,3,4->barrier()");
             barrier();
             barrier();
             barrier();
             barrier();
         }
     }
+    debug("Benchmark::retest_one_slow_pair 5->barrier()");
     barrier();
 
     if (0 != from) {
@@ -969,6 +993,7 @@ int Benchmark::benchmark(){
     prepareBuffers();
 
     rootWatch->start();
+    debug("Benchmark::benchmark->barrier()");
     cl->barrier();
 
     rootWatch->stop();
diff --git a/benchmark/output_sion.cc b/benchmark/output_sion.cc
index f671e1f..0748a50 100644
--- a/benchmark/output_sion.cc
+++ b/benchmark/output_sion.cc
@@ -199,7 +199,7 @@ static int linktest_output_sion_funnelled_root(VirtualCluster* cl,
             return ERROR;
         }
     }
-
+    debug("linktest_output_sion_funnelled_root->barrier");
     EXEC_NOFAIL(cl->barrier());
 
     printTimingIfRoot(cl->rank(), "[sioncollect]", std::chrono::duration<double>(walltime() - begin));
@@ -326,11 +326,13 @@ int linktest_output_sion_parallel(VirtualCluster* cl,
     };
 
     auto sion_api = create_and_register_api(args->virtual_cluster_implementation);
+    debug("linktest_output_sion_parallel 1->barrier");
     cl->barrier();
 
     char* buffer;
     long long sz;
     EXEC_IFFAIL(linktest_output_sion_collect_local_data(cl, args, statsVec, &buffer, &sz), error("linktest_output_sion_collect_local_data failed."); return ERROR);
+    debug("linktest_output_sion_parallel 2->barrier");
     cl->barrier();
 
     auto filename = args->output.c_str();
@@ -362,6 +364,7 @@ int linktest_output_sion_parallel(VirtualCluster* cl,
         &fp, //fileptr
         &newfname //newfname
     );
+    debug("linktest_output_sion_parallel 3->barrier");
     cl->barrier();
     rootWatch->stop();
     printTiming("[sionopen]");
diff --git a/benchmark/vcluster_helper.cc b/benchmark/vcluster_helper.cc
index 28bd22e..b977cf1 100644
--- a/benchmark/vcluster_helper.cc
+++ b/benchmark/vcluster_helper.cc
@@ -40,7 +40,7 @@ int vcluster_helper_barrier(VirtualCluster* cl){
      * a non-NULL buffer pointer
      */
     #ifdef DEBUG_BARRIER
-    info("vcluster_helper_barrier %d", barrierCounter++);
+    debug("vcluster_helper_barrier %d", barrierCounter++);
     #endif
     char sp = 0;
     MemoryBuffer buf = MemoryBuffer::wrap<char>(&sp, 0, AddressSpace::ID::Local);
diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 7ce01fe..0f6cdd5 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -239,9 +239,6 @@ int VirtualClusterPortals::kbipingpong(
     MemoryBuffer& buf1, MemoryBuffer& buf2,
     const int num_msg, double* const timing)
 {
-    bool isSender = rank() == from;
-    bool isReceiver = rank() == to;
-
     auto watch = Stopwatchfactory::getRankWatch(rank(), from);
     debug("kbipingpong: prepareSendStructs");
     prepareSendStructs(buf1);
@@ -249,22 +246,12 @@ int VirtualClusterPortals::kbipingpong(
     prepareRecvStructs(buf2);
     barrier();
 
-    if(isSender) {
-        watch->start();
-        debug("kbipingpong: send");
-        sendMessages(to, buf1, num_msg);
-        debug("kbipingpong: recv");
-        recvMessages(num_msg);
-        watch->stop();
-    }
-    if(isReceiver) {
-        watch->start();
-        debug("kbipingpong: recv");
-        sendMessages(from, buf1, num_msg);
-        debug("kbipingpong: send");
-        recvMessages(num_msg);
-        watch->stop();
-    }
+    watch->start();
+    debug("kbipingpong: send");
+    sendMessages(from, buf1, num_msg);
+    debug("kbipingpong: recv");
+    recvMessages(num_msg);
+    watch->stop();
 
     barrier();
     debug("kbipingpong: releaseRecvStructs");
-- 
GitLab


From 52f38e1d7a0a0c9ead6b96eb92adcd05e37602b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 14 Jul 2023 00:35:18 +0200
Subject: [PATCH 30/47] Fixed initial counter value for bidir Moved all debugs
 inside function calls Removed DEBUG_PORTALS

---
 benchmark/vcluster_portals.cc | 136 +++++++++++++++++-----------------
 benchmark/vcluster_portals.h  |  20 +++--
 run.sh                        |  20 +++++
 3 files changed, 102 insertions(+), 74 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index 0f6cdd5..d5f494f 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -18,13 +18,11 @@ std::vector<ptl_process_t> VirtualClusterPortals::getPhysicalFromRank() {
     ptl_process_t physId;
     CHECK_RETURNVAL( PtlGetPhysId(mni_handle, &physId) );
 
-    #if defined(DEBUG_PORTALS)
     debug("PMI Rank=%d, Hostname=%10s, Portals NID=%d PID=%d",
         rank(),
         hostname().c_str(),
         physId.phys.nid,
         physId.phys.pid);
-    #endif
     
     std::vector<ptl_process_t> physicalFromRank(size());
     gather(0, physicalFromRank.data(), &physId, 1);
@@ -64,6 +62,7 @@ int VirtualClusterPortals::init()
 
 int VirtualClusterPortals::finalize()
 {
+    debug("VirtualClusterPortals::finalize()");
     CHECK_RETURNVAL( PtlPTFree(mni_handle, pt_index) );
     CHECK_RETURNVAL( PtlNIFini(mni_handle) );
     PtlFini();
@@ -72,6 +71,7 @@ int VirtualClusterPortals::finalize()
 }
 
 void  VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf) {
+    debug("VirtualClusterPortals::prepareSendStructs(%p)", buf.p());
     md.start  = buf.p();
     md.length = buf.len();
     md.options   = PTL_MD_EVENT_CT_ACK;
@@ -81,6 +81,7 @@ void  VirtualClusterPortals::prepareSendStructs(const MemoryBuffer& buf) {
 }
 
 void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
+    debug("VirtualClusterPortals::prepareRecvStructs(%p)", buf.p());
     me.start  = buf.p();
     me.length = buf.len();
     me.uid    = PTL_UID_ANY;
@@ -98,51 +99,53 @@ void VirtualClusterPortals::prepareRecvStructs(const MemoryBuffer& buf) {
     }
 }
 
-void VirtualClusterPortals::recvMessages(const int num_msg) {
-    CHECK_RETURNVAL( PtlCTGet(me.ct_handle, &recv_ct) );
-    const ptl_size_t start_count = recv_ct.success;
-
-    #if defined(DEBUG_PORTALS)
-    debug("Recv: before success %d - failure %d", recv_ct.success, recv_ct.failure);
-    #endif
+ptl_size_t VirtualClusterPortals::getSendCounter() {
+    debug("VirtualClusterPortals::getSendCounter()");
+    CHECK_RETURNVAL( PtlCTGet(md.ct_handle, &send_ct) );
+    debug("Send (MD): success %d - failure %d", send_ct.success, send_ct.failure);
+    if(send_ct.failure > 0) {
+        error("Failed operation on MD");
+    }
+    return send_ct.success;
+}
 
-    CHECK_RETURNVAL( PtlCTWait(me.ct_handle, start_count + static_cast<unsigned long>(num_msg), &recv_ct) );
+ptl_size_t VirtualClusterPortals::getRecvCounter() {
+    debug("VirtualClusterPortals::getRecvCounter()");
+    CHECK_RETURNVAL( PtlCTGet(me.ct_handle, &recv_ct) );
+    debug("Recv (ME): success %d - failure %d", recv_ct.success, recv_ct.failure);
+    if(recv_ct.failure > 0) {
+        error("Failed operation on ME");
+    }
+    return recv_ct.success;
+}
 
-    #if defined(DEBUG_PORTALS)
-    debug("Recv: after success %d - failure %d", recv_ct.success, recv_ct.failure);
-    #endif
+void VirtualClusterPortals::recvMessages(const unsigned long num_msg, const unsigned long counter_start) {
+    debug("VirtualClusterPortals::recvMessages(%lu, %lu)", num_msg, counter_start);
+    CHECK_RETURNVAL( PtlCTWait(me.ct_handle, counter_start + num_msg, &recv_ct) );
 }
 
-void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const int num_msg) {
-    ptl_process_t target;
-    target.rank = to;
+void VirtualClusterPortals::sendMessages(const int to, MemoryBuffer& buf, const unsigned long num_msg, const unsigned long counter_start) {
+    debug("VirtualClusterPortals::sendMessages(%d, %p, %lu, %lu)", to, num_msg, buf.p() , counter_start);
     const ptl_size_t localOffset = 0;
     const ptl_size_t remoteOffset = 0;
     const ptl_hdr_data_t header_data = 0;
-
-    #if defined(DEBUG_PORTALS)
-    CHECK_RETURNVAL( PtlCTGet(md.ct_handle, &send_ct) );
-    const ptl_size_t start_count = send_ct.success;
-    debug("Send: before success %d - failure %d", send_ct.success, send_ct.failure);
-    #endif
+    ptl_process_t target;
+    target.rank = to;
     
-    for(auto n = 1; n <= num_msg; n++) {
+    for(unsigned long n = 1; n <= num_msg; n++) {
         CHECK_RETURNVAL( PtlPut(md_handle, localOffset, buf.len(), PTL_CT_ACK_REQ, target, pt_index, MATCH_BITS, remoteOffset, nullptr, header_data) );
     }
-
-    // TODO Discuss if checking for send error (and handling) makes sense
-    #if defined(DEBUG_PORTALS)
-    CHECK_RETURNVAL( PtlCTWait(md.ct_handle, start_count + static_cast<unsigned long>(num_msg), &send_ct) );
-    debug("Send: after success %d - failure %d", send_ct.success, send_ct.failure);
-    #endif
+    CHECK_RETURNVAL( PtlCTWait(md.ct_handle, counter_start + num_msg, &send_ct) );
 }
 
 void VirtualClusterPortals::releaseRecvStructs() {
+    debug("releaseRecvStructs()");
     CHECK_RETURNVAL( PtlMEUnlink(me_handle) );
     CHECK_RETURNVAL( PtlCTFree(me.ct_handle) );
 };
 
 void VirtualClusterPortals::releaseSendStructs() {
+    debug("releaseSendStructs()");
     CHECK_RETURNVAL( PtlMDRelease(md_handle) );
     CHECK_RETURNVAL( PtlCTFree(md.ct_handle) );
 };
@@ -151,36 +154,33 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
 {
     bool isSender = rank() == from;
     bool isReceiver = rank() == to;
-
     auto watch = Stopwatchfactory::getRankWatch(rank(), from);
-    debug("kpingpong: prepareSendStructs");
     prepareSendStructs(buf);
-    debug("kpingpong: prepareRecvStructs");
     prepareRecvStructs(buf);
+    auto sendCounterBeforeKernel = getSendCounter();
+    auto recvCounterBeforeKernel = getRecvCounter();
+    
     barrier();
 
     if(isSender) {
         watch->start();
-        debug("kpingpong: send");
-        sendMessages(to, buf, num_msg);
-        debug("kpingpong: recv");
-        recvMessages(num_msg);
+        sendMessages(to, buf, num_msg, sendCounterBeforeKernel);
+        recvMessages(num_msg, recvCounterBeforeKernel);
         watch->stop();
     }
     if(isReceiver) {
         watch->start();
-        debug("kpingpong: recv");
-        recvMessages(num_msg);
-        debug("kpingpong: send");
-        sendMessages(from, buf, num_msg);
+        recvMessages(num_msg, recvCounterBeforeKernel);
+        sendMessages(from, buf, num_msg, sendCounterBeforeKernel);
         watch->stop();
     }
 
     barrier();
-    debug("kpingpong: releaseRecvStructs");
-    releaseRecvStructs();
-    debug("kpingpong: releaseSendStructs");
+
+    getSendCounter();
+    getRecvCounter();
     releaseSendStructs();
+    releaseRecvStructs();
     if(timing) { 
         *timing = watch->getDuration().count() / (2*num_msg);
     }
@@ -196,36 +196,33 @@ int VirtualClusterPortals::kUniDir(
 {
     bool isSender = rank() == from;
     bool isReceiver = rank() == to;
-
     auto watch = Stopwatchfactory::getRankWatch(rank(), from);
-    debug("kUnidir: prepareSendStructs");
     prepareSendStructs(buf1);
-    debug("kUnidir: prepareRecvStructs");
     prepareRecvStructs(buf2);
+    auto sendCounterBeforeKernel = getSendCounter();
+    auto recvCounterBeforeKernel = getRecvCounter();
+
     barrier();
 
     if(isSender) {
         watch->start();
-        debug("kUnidir: send");
-        sendMessages(to, buf1, num_msg);
-        debug("kUnidir: recv");
-        recvMessages(1);
+        sendMessages(to, buf1, num_msg, sendCounterBeforeKernel);
+        recvMessages(1, recvCounterBeforeKernel);
         watch->stop();
     }
     if(isReceiver) {
         watch->start();
-        debug("kUnidir: recv");
-        recvMessages(num_msg);
-        debug("kUnidir: send");
-        sendMessages(from, buf1, 1);
+        recvMessages(num_msg, recvCounterBeforeKernel);
+        sendMessages(from, buf1, 1, sendCounterBeforeKernel);
         watch->stop();
     }
 
     barrier();
-    debug("kUnidir: releaseRecvStructs");
-    releaseRecvStructs();
-    debug("kUnidir: releaseSendStructs");
+
+    getSendCounter();
+    getRecvCounter();
     releaseSendStructs();
+    releaseRecvStructs();
     if(timing) { 
         *timing = watch->getDuration().count() / num_msg;
     }
@@ -239,25 +236,32 @@ int VirtualClusterPortals::kbipingpong(
     MemoryBuffer& buf1, MemoryBuffer& buf2,
     const int num_msg, double* const timing)
 {
+    int partner;
+    if(rank() == from) {
+        partner = to;
+    }
+    if(rank() == to) {
+        partner = from;
+    }
     auto watch = Stopwatchfactory::getRankWatch(rank(), from);
-    debug("kbipingpong: prepareSendStructs");
     prepareSendStructs(buf1);
-    debug("kbipingpong: prepareRecvStructs");
     prepareRecvStructs(buf2);
+    auto sendCounterBeforeKernel = getSendCounter();
+    auto recvCounterBeforeKernel = getRecvCounter();
+
     barrier();
 
     watch->start();
-    debug("kbipingpong: send");
-    sendMessages(from, buf1, num_msg);
-    debug("kbipingpong: recv");
-    recvMessages(num_msg);
+    sendMessages(partner, buf1, num_msg, sendCounterBeforeKernel);
+    recvMessages(num_msg, recvCounterBeforeKernel);
     watch->stop();
 
     barrier();
-    debug("kbipingpong: releaseRecvStructs");
-    releaseRecvStructs();
-    debug("kbipingpong: releaseSendStructs");
+
+    getSendCounter();
+    getRecvCounter();
     releaseSendStructs();
+    releaseRecvStructs();
     if(timing) { 
         *timing = watch->getDuration().count() / (2.0 * num_msg);
     }
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index f58bbb2..0002629 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -64,32 +64,36 @@ private:
     const ptl_match_bits_t MATCH_BITS = 1; // TODO when/how to use these?
     const ptl_match_bits_t IGNORE_BITS = ~0; // ignore all bits
 
-    // Sender only
+    // Sender
     /** @brief Memory Descriptor (MD) */
     ptl_md_t md;
     /** @brief MD Handle */
     ptl_handle_md_t md_handle;
     /** @brief Send Counter */
     ptl_ct_event_t send_ct;
+    /** @brief Prepare portals data structures on sender side (MD) */
     void prepareSendStructs(const MemoryBuffer& buf);
+    /** @brief Read current send (MD) counter value */
+    ptl_size_t getSendCounter();
+    /** @brief Free portals data structures on sender side (MD) */
     void releaseSendStructs();
 
-    // Receiver only
+    // Receiver
     /** @brief Match List Entry (ME) */
     ptl_me_t  me;
     /** @brief ME Handle */
     ptl_handle_me_t me_handle;
     /** @brief Receive Counter */
     ptl_ct_event_t recv_ct;
+    /** @brief Prepare portals data structures on receiver side (ME) */
     void prepareRecvStructs(const MemoryBuffer& buf);
+    /** @brief Read current recv (ME) counter value */
+    ptl_size_t getRecvCounter();
+    /** @brief Free portals data structures on receiver side (ME) */
     void releaseRecvStructs();
 
-    void sendMessages(const int to, MemoryBuffer& buf, const int num_msg);
-    void recvMessages(const int num_msg);
-
-    bool first = true;
-    int testPut();
-
+    void sendMessages(const int to, MemoryBuffer& buf, const unsigned long num_msg, const unsigned long counter_start);
+    void recvMessages(const unsigned long num_msg, const unsigned long counter_start);
 
     std::vector<ptl_process_t> getPhysicalFromRank();
 };
diff --git a/run.sh b/run.sh
index bb7818c..e8f908f 100755
--- a/run.sh
+++ b/run.sh
@@ -9,6 +9,26 @@ ml GCC ParaStationMPI SIONlib
 #export PORTALS4_DEBUG=3
 
 srun install/linktest \
+	--mode portals \
+	--num-warmup-messages 3 \
+	--num-messages 100 \
+	--size-messages 16777216 \
+	--serial-tests \
+	--num-slowest 1 \
+	--no-sion-file;
+
+srun install/linktest \
+	--unidirectional \
+	--mode portals \
+	--num-warmup-messages 3 \
+	--num-messages 100 \
+	--size-messages 16777216 \
+	--serial-tests \
+	--num-slowest 1 \
+	--no-sion-file;
+
+srun install/linktest \
+	--bidirectional \
 	--mode portals \
 	--num-warmup-messages 3 \
 	--num-messages 100 \
-- 
GitLab


From 3e0907d28bad2f59d2aa982defd992057ca43b98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Thu, 27 Jul 2023 16:59:21 +0200
Subject: [PATCH 31/47] Add missing ifdefs for portals code

---
 benchmark/benchmark.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc
index 8f9d85f..17a1d2f 100644
--- a/benchmark/benchmark.cc
+++ b/benchmark/benchmark.cc
@@ -19,7 +19,9 @@
 #include "environ.h"
 #include "format_units.h"
 #include "format_print.h"
+#if HAVE_VCLUSTER_PORTALS == 1
 #include "vcluster_portals.h"
+#endif
 #include <cstdlib>
 #include <cstdio>
 #include <cstring>
@@ -496,6 +498,7 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms
         } else if (i == partner) {
             EXEC_NOFAIL(kernel(partner, rank(), nullptr, false));
         } else {
+            #if HAVE_VCLUSTER_PORTALS == 1
             if(cl->nameRef() == VirtualClusterPortals::NAME) {
                 // 2 per kernel, warmup kernel and measuring kernel
                 debug("Benchmark::work_pingpong_serial 2->barrier()");
@@ -507,6 +510,7 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms
                 debug("Benchmark::work_pingpong_serial 5->barrier()");
                 barrier();
             }
+            #endif
         }
     }
     debug("Benchmark::work_pingpong_serial 6->barrier()");
@@ -658,6 +662,7 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t
     if ((from == rank()) || (to == rank())) {
         EXEC_NOFAIL(kernel(from, to, &tv, false));
     } else { //TODO remove this hack. Seperating MemoryBuffer preparation from kernels. Bann all barriers from kernels
+        #if HAVE_VCLUSTER_PORTALS == 1
         if(cl->nameRef() == VirtualClusterPortals::NAME) {
             // 2 per kernel, warmup kernel and measuring kernel
             debug("Benchmark::retest_one_slow_pair 1,2,3,4->barrier()");
@@ -666,6 +671,7 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t
             barrier();
             barrier();
         }
+        #endif
     }
     debug("Benchmark::retest_one_slow_pair 5->barrier()");
     barrier();
-- 
GitLab


From 4720cfb033b5c0e27320713be71f425f8b390b73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 2 Aug 2023 17:10:13 +0200
Subject: [PATCH 32/47] Added compile failure table

---
 test/LinktestMain.xml | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml
index a55d84c..74c8d51 100644
--- a/test/LinktestMain.xml
+++ b/test/LinktestMain.xml
@@ -60,7 +60,7 @@
 				if [ $? -eq 0 ]; then
 					touch ../ready;
 				else
-					echo "Linktest compile failed" >> ../error;
+					echo "linktest compile failed" >> ../error;
 				fi
 				set +x
 			</do>
@@ -99,7 +99,7 @@
 				if [ $? -eq 0 ]; then
 					touch ready;
 				else
-					echo "linktest-report compile failed" >> error;
+					echo "inktest-report compile failed" >> error;
 				fi
 				deactivate
 				set +x
@@ -123,7 +123,7 @@
 				if [ $? -eq 0 ]; then
 					touch ready;
 				else
-					echo "python-report run failed" >> error;
+					echo "linktest-report run failed" >> error;
 				fi
 				deactivate
 				set +x
@@ -148,6 +148,12 @@
 		</patternset>
 
 		<!-- Analyse -->
+		<analyser name="analyseCompiles">
+			<analyse step="Compile">
+				<file use="errorFilePatterns">error</file>
+			</analyse>
+		</analyser>
+
 		<analyser name="analyseRuns">
 			<analyse step="LayerTest" tag="!noLayerTest">
 				<file use="LinktestOutPatterns">linktest.log</file>
@@ -202,6 +208,19 @@
 				<column title="Errors">error_msg</column>
 			</table>
 		</result>
+		<result>
+			<use>analyseCompiles</use>
+			<table name="CompileErrors" style="pretty" sort="jube_step_name">
+				<column title="Test">jube_step_name</column>
+				<column title="Compiler">Compiler</column>
+				<column title="MPI">MPI</column>
+				<column title="Setting">Transport_Layer_Settings</column>
+				<column title="Layer">Messaging_Layer</column>
+				<column title="Srun Args">SRUN_Arguments</column>
+				<column title="Options">Options</column>
+				<column title="Errors">error_msg</column>
+			</table>
+		</result>
 
 	</benchmark>
 </jube>
-- 
GitLab


From de721ba9ffca9527a43564be18ecaf1d8d0499c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 4 Aug 2023 12:41:53 +0200
Subject: [PATCH 33/47] Ported JUBE tests to deep. Added Portals Layer Test

---
 test/Default.xml      | 25 +++++++++++++++++++------
 test/LayerTest.xml    | 25 +++++++++++++++++++++++--
 test/LinktestMain.xml |  4 ++--
 3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/test/Default.xml b/test/Default.xml
index c0abbfe..6e5a458 100644
--- a/test/Default.xml
+++ b/test/Default.xml
@@ -29,9 +29,9 @@
     <parameter name="DefaultCompiler">GCC</parameter>
     <parameter name="Compiler" tag="!noCompileRunTest">GCC,Intel,NVHPC</parameter>
     <parameter name="Compiler" tag="noCompileRunTest">${DefaultCompiler}</parameter>
-    <parameter name="DefaultMPI">OpenMPI</parameter>
+    <parameter name="DefaultMPI">ParaStationMPI</parameter>
     <parameter name="MPI" mode="python" tag="!noCompileRunTest">
-        {
+        "ParaStationMPI" if "${System_Name}" == "deep" else {
         "GCC":   "ParaStationMPI,OpenMPI",
         "Intel": "ParaStationMPI,OpenMPI,IntelMPI",
         "NVHPC": "ParaStationMPI,OpenMPI"
@@ -68,7 +68,13 @@
     </parameter>
 </parameterset>
 <parameterset name="Slurm"> <!-- depends on Linktest_Args, System and Environment parameters -->
-    <parameter name="Account">cstao</parameter>
+    <parameter name="Account" mode="python">
+        {
+            "juwels": "cstao",
+            "jurecadc": "cstao",
+            "deep": "deepsea"
+        }["${System_Name}"]
+    </parameter>
     <parameter name="Partition" mode="python">
         {
             "juwels": {
@@ -78,14 +84,20 @@
             "jurecadc": {
                 False: "dc-cpu-devel",
                 True : "dc-gpu-devel"
+            },
+            "deep": {
+                False: "dp-cn",
+                True : "dp-esb"
             }
         }["${System_Name}"][ ${WithGPUs} ]
     </parameter>
     <parameter name="Max_WallClock_Time">00:01:00</parameter>
     <parameter name="Number_Of_Nodes" mode="python">1 if "${Messaging_Layer}" == "cuda" else 2</parameter>
-    <parameter name="Number_Of_Tasks_Per_Node">4</parameter>
+    <parameter name="Number_Of_Tasks_Per_Node" mode="python">
+        "1" if (${WithGPUs} and "${System_Name}" == "deep") else "4"
+    </parameter>
     <parameter name="Number_Of_Cores_Per_Task">1</parameter>
-    <parameter name="Gres" mode="python">"#SBATCH --gres=gpu:4" if ${WithGPUs} else ""</parameter>
+    <parameter name="Gres" mode="python">"#SBATCH --gres=gpu:${Number_Of_Tasks_Per_Node}" if ${WithGPUs} else ""</parameter>
     <parameter name="SRUN_Arguments" mode="python">
         "" if "${Messaging_Layer}" == "mpi" else {
             "ParaStationMPI": "--mpi=pspmi",
@@ -98,7 +110,8 @@
     <parameter name="CuArch" mode="python">
         {
             "juwels":   "sm_70",
-            "jurecadc": "sm_80"
+            "jurecadc": "sm_80",
+            "deep":     "sm_70",
         }[ "${System_Name}" ]
     </parameter>
     <parameter name="Enable_Layer" mode="python">
diff --git a/test/LayerTest.xml b/test/LayerTest.xml
index 5ca1410..4594ede 100644
--- a/test/LayerTest.xml
+++ b/test/LayerTest.xml
@@ -4,8 +4,29 @@
     <parameter name="Messaging_Layer" mode="python">
     {
     "juwels": "ibverbs,ucp,tcp,cuda",
-    "jurecadc": "ibverbs,ucp,tcp,cuda"  <!-- TODO add psm2 which is available only on jureca booster which shares login node -->
+    "jurecadc": "ibverbs,ucp,tcp,cuda",
+    "deep": "ibverbs,portals"
     }[ "${System_Name}" ]
-    </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,tcp -->
+    </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,portals,tcp -->
+    
+</parameterset>
+<parameterset name="Slurm" init_with="Default.xml"> 	
+    <parameter name="Partition" mode="python">
+    "dp-bxi" if "${System_Name}" == "portals" else {
+        "juwels": {
+            False: "devel",
+            True : "develgpus"
+        },
+        "jurecadc": {
+            False: "dc-cpu-devel",
+            True : "dc-gpu-devel"
+        },
+        "deep": {
+            False: "dp-cn",
+            True : "dc-esb"
+        }
+    }["${System_Name}"][ ${WithGPUs} ]
+    </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,portals,tcp -->
+    
 </parameterset>
 </jube>
\ No newline at end of file
diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml
index 74c8d51..50b23c2 100644
--- a/test/LinktestMain.xml
+++ b/test/LinktestMain.xml
@@ -71,8 +71,8 @@
 		</step>
 
 		<step name="LayerTest" depend="Compile" active="'$Stack' == '$Default_Stack' and ${WithCUDA} == ${WithGPUs}" suffix="${Messaging_Layer}" tag="!noLayerTest">
-			<use from="LayerTest.xml">Linktest_Args</use>
-			<use from="Default.xml">System, Environment, Slurm, Misc</use>
+			<use from="LayerTest.xml">Linktest_Args, Slurm</use>
+			<use from="Default.xml">System, Environment, Misc</use>
 			<use>ExecutionScript</use>
 			<use>SubstituteInputParameters</use>
 			<do done_file="ready" error_file="error" tag="!dryRun">sbatch execute.sbatch</do>
-- 
GitLab


From f14e93c0f7336860db0a3ce4c39a3828bcb3ad01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Mon, 7 Aug 2023 16:50:28 +0200
Subject: [PATCH 34/47] Fixed HAVE_CUDA -> HAVE_VCLUSTER_CUDA

---
 benchmark/Makefile           |  2 +-
 benchmark/benchmark.cc       |  2 +-
 benchmark/benchmark.h        |  4 ++--
 benchmark/cmdline.cc         |  6 +++---
 benchmark/gpu_nvidia.h       |  4 ++--
 benchmark/memory.cc          |  8 ++++----
 benchmark/memory.h           |  4 ++--
 benchmark/memory_multi.cc    | 10 +++++-----
 benchmark/vcluster.cc        | 24 +++++++++++++++++++++++-
 benchmark/vcluster_cuda.cc   |  2 +-
 benchmark/vcluster_portals.h |  2 +-
 11 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index df3b16d..8d3404a 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -182,7 +182,7 @@ ifeq (1, $(HAVE_MINIPMI))
 $(error CUARCH is not set)
 		endif
 		CUFLAGS       = --gpu-architecture $(CUARCH) -DHAVE_VCLUSTER_CUDA=1
-		CPPFLAGS     += -I$(CUDA)/include -DHAVE_CUDA=1
+		CPPFLAGS     += -I$(CUDA)/include -DHAVE_VCLUSTER_CUDA=1
 		LDFLAGS      += -L$(CUDA)/lib
 		LIBS         += -lcuda -lcudart
 	endif
diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc
index 17a1d2f..7b22af7 100644
--- a/benchmark/benchmark.cc
+++ b/benchmark/benchmark.cc
@@ -844,7 +844,7 @@ int Benchmark::init() {
             alloc.reset(new PosixMemAlignedAllocator());
             break;
         case(AllocatorCUDA):
-            #if HAVE_CUDA == 1
+            #if HAVE_VCLUSTER_CUDA == 1
                 if(cl->rank()==0){info("Using CUDA memory allocator"); std::fflush(stdout);}
                 gpudev.reset(new cuda::GpuDevice(System::singleton()->closest_gpu_device()));
                 gpuctx.reset(new cuda::GpuContext(gpudev.get()));
diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h
index feaabc1..22bfe18 100644
--- a/benchmark/benchmark.h
+++ b/benchmark/benchmark.h
@@ -17,7 +17,7 @@
 #include "slow_pairs.h"
 #include "error.h"
 
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
     #include "gpu_nvidia.h"
 #endif
 
@@ -52,7 +52,7 @@ namespace linktest{
             void barrier() const;
             const struct linktest_args* args;
             std::unique_ptr<VirtualCluster> cl;
-            #if HAVE_CUDA == 1
+            #if HAVE_VCLUSTER_CUDA == 1
                 std::unique_ptr<cuda::GpuDevice>  gpudev;
                 std::unique_ptr<cuda::GpuContext> gpuctx; // Declaration order important! MemoryBuffer~ needs to be called before before GpuContext~
             #endif
diff --git a/benchmark/cmdline.cc b/benchmark/cmdline.cc
index 9b51db3..6c3ec9b 100644
--- a/benchmark/cmdline.cc
+++ b/benchmark/cmdline.cc
@@ -661,13 +661,13 @@ const struct linktest_args* parse_cmdline_args(int argc, char **argv){
     }
 
     if(cmdline_args.alloc_typ==AllocatorCUDA){
-        #if HAVE_CUDA == 1
+        #if HAVE_VCLUSTER_CUDA == 1
         #else
             fatal("Requested CUDA memory-allocator type, but compiled without CUDA support.");
         #endif
     }else{
         if(cmdline_args.do_use_gpus||cmdline_args.virtual_cluster_implementation=="cuda"){
-            #if HAVE_CUDA == 1
+            #if HAVE_VCLUSTER_CUDA == 1
                 if(cmdline_args.alloc_typ==AllocatorDefault){
                     cmdline_args.alloc_typ=AllocatorCUDA;
                 } else {
@@ -821,7 +821,7 @@ void print_cmdline_args(const struct linktest_args* args){
             case(AllocatorPOSIXAlignedMalloc):
                 return "posix_memalign";
             case(AllocatorCUDA):
-                #if HAVE_CUDA == 1
+                #if HAVE_VCLUSTER_CUDA == 1
                     return "CUDA";
                 #else
                     return "No CUDA";
diff --git a/benchmark/gpu_nvidia.h b/benchmark/gpu_nvidia.h
index fbf77a9..0d3b386 100644
--- a/benchmark/gpu_nvidia.h
+++ b/benchmark/gpu_nvidia.h
@@ -9,8 +9,8 @@
 #ifndef LINKTEST_GPU_NVIDIA_H
 #define LINKTEST_GPU_NVIDIA_H
 
-#if 1 != HAVE_CUDA
-#error gpu_nvidia can only compile with HAVE_CUDA=1
+#if 1 != HAVE_VCLUSTER_CUDA
+#error gpu_nvidia can only compile with HAVE_VCLUSTER_CUDA=1
 #endif
 
 #include "config.h"
diff --git a/benchmark/memory.cc b/benchmark/memory.cc
index b76663e..a9245b5 100644
--- a/benchmark/memory.cc
+++ b/benchmark/memory.cc
@@ -8,7 +8,7 @@
 ****************************************************************************/
 #include "memory.h"
 #include "compiler.h"
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
     #include "memory_cuda.h"
     #include "gpu_nvidia.h"
 #endif
@@ -109,7 +109,7 @@ void MemoryBuffer::fill(){
             if(use_mt()){
                 throw std::runtime_error("Not Implemented!");
             }else{
-                #if HAVE_CUDA == 1
+                #if HAVE_VCLUSTER_CUDA == 1
                     linktest::cuda::fill<char>(linktest::cuda::GpuContext::singleton(),
                                                pointer<char>(),
                                                pointer<char>() + len(), (char )0xff);
@@ -147,7 +147,7 @@ int MemoryBuffer::check(){
             }
             break;
         case AddressSpace::ID::CudaDeviceLocal:
-            #if HAVE_CUDA == 1
+            #if HAVE_VCLUSTER_CUDA == 1
                 throw std::runtime_error("check() called for local CUDA address space");
             #else
                 throw std::runtime_error("check() called on a CUDA address space but LinkTest was compiled without CUDA support");
@@ -231,7 +231,7 @@ int PosixMemAlignedAllocator::free(void* p, std::size_t len){
     return SUCCESS;
 }
 
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
     CudaDeviceAllocator::CudaDeviceAllocator(linktest::cuda::GpuContext* ctx):ctx_(ctx){}
     AddressSpace::ID CudaDeviceAllocator::address_space_id() const{
         return AddressSpace::ID::CudaDeviceLocal;
diff --git a/benchmark/memory.h b/benchmark/memory.h
index 6774f1a..1e15a67 100644
--- a/benchmark/memory.h
+++ b/benchmark/memory.h
@@ -14,7 +14,7 @@
 #include <cstdint>
 #include <unistd.h>
 
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
 	namespace linktest{
 		namespace cuda{
 			class Allocator;
@@ -129,7 +129,7 @@ class PosixMemAlignedAllocator : public Allocator {
 		size_t       pgsize_ = sysconf(_SC_PAGESIZE);
 };
 
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
 // A memory allocation on a GPU
 class CudaDeviceAllocator : public Allocator{
 	public:
diff --git a/benchmark/memory_multi.cc b/benchmark/memory_multi.cc
index ab85a0f..488486e 100644
--- a/benchmark/memory_multi.cc
+++ b/benchmark/memory_multi.cc
@@ -8,7 +8,7 @@
 ****************************************************************************/
 #include "memory_multi.h"
 #include "compiler.h"
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
 	#include "memory_cuda.h"
 	#include "gpu_nvidia.h"
 #endif
@@ -91,13 +91,13 @@ void MemoryBufferMulti::fill(){
 			break;
 		}case AddressSpace::ID::CudaDeviceLocal:{
 			if(use_mt()){
-				#if HAVE_CUDA == 1
+				#if HAVE_VCLUSTER_CUDA == 1
 					throw std::runtime_error("fill() for multiple buffers in CUDA address spaces not yet implemented");
 				#else
 					throw std::runtime_error("fill() called on CUDA address space but linktest was compiled without CUDA");
 				#endif
 			}else{
-				#if HAVE_CUDA == 1
+				#if HAVE_VCLUSTER_CUDA == 1
 					throw std::runtime_error("fill() for multiple buffers in CUDA address spaces not yet implemented");
 				#else
 					throw std::runtime_error("fill() called on CUDA address space but linktest was compiled without CUDA");
@@ -146,13 +146,13 @@ int MemoryBufferMulti::check(std::size_t* buffer, std::size_t* byte){
 			break;
 		}case AddressSpace::ID::CudaDeviceLocal:{
 			if(use_mt()){
-					#if HAVE_CUDA == 1
+					#if HAVE_VCLUSTER_CUDA == 1
 					throw std::runtime_error("check(buffer,byte) for multiple buffers in CUDA address spaces not yet implemented");
 				#else
 					throw std::runtime_error("check(buffer,byte) called on CUDA address space but linktest was compiled without CUDA");
 				#endif
 			}else{
-				#if HAVE_CUDA == 1
+				#if HAVE_VCLUSTER_CUDA == 1
 					throw std::runtime_error("check(buffer,byte) for multiple buffers in CUDA address spaces not yet implemented");
 				#else
 					throw std::runtime_error("check(buffer,byte) called on CUDA address space but linktest was compiled without CUDA");
diff --git a/benchmark/vcluster.cc b/benchmark/vcluster.cc
index b334bde..90d40b3 100644
--- a/benchmark/vcluster.cc
+++ b/benchmark/vcluster.cc
@@ -502,7 +502,6 @@ int VirtualClusterWithHelper::recv(int src, MemoryBuffer& buf)
 
 const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std::string& name)
 {
-
     std::string requestedImpl;
 
     // check 'name'
@@ -524,6 +523,29 @@ const std::string& VirtualCluster::get_vcluster_impl_name(char** argv, const std
         requestedImpl = {envName};
     }
 
+    #if 1 == HAVE_VCLUSTER_TCP
+    debug("HAVE_VCLUSTER_TCP == 1");
+    #endif
+    #if 1 == HAVE_VCLUSTER_MPI
+    debug("HAVE_VCLUSTER_MPI == 1");
+    #endif
+    #if 1 == HAVE_VCLUSTER_IBVERBS
+    debug("HAVE_VCLUSTER_IBVERBS == 1");
+    #endif
+    #if 1 == HAVE_VCLUSTER_PSM2
+    debug("HAVE_VCLUSTER_PSM2 == 1");
+    #endif
+    #if 1 == HAVE_VCLUSTER_UCP
+    debug("HAVE_VCLUSTER_UCP == 1");
+    #endif
+    #if 1 == HAVE_VCLUSTER_PORTALS
+    debug("HAVE_VCLUSTER_PORTALS == 1");
+    #endif
+    #if 1 == HAVE_VCLUSTER_CUDA
+    debug("HAVE_VCLUSTER_CUDA == 1");
+    #endif
+    debug("requestedImpl = %s", requestedImpl.c_str());
+
     for(const auto& impl : VirtualCluster::impls) {
         if(impl == requestedImpl) {
             return impl;
diff --git a/benchmark/vcluster_cuda.cc b/benchmark/vcluster_cuda.cc
index 57ccc21..970d892 100644
--- a/benchmark/vcluster_cuda.cc
+++ b/benchmark/vcluster_cuda.cc
@@ -16,7 +16,7 @@
 #include "error.h"
 #include "output_sion.h"
 #include "pmi.h"
-#if HAVE_CUDA == 1
+#if HAVE_VCLUSTER_CUDA == 1
 #include "gpu_nvidia.h"
 #endif
 #include <cassert>
diff --git a/benchmark/vcluster_portals.h b/benchmark/vcluster_portals.h
index 0002629..0a00c3d 100644
--- a/benchmark/vcluster_portals.h
+++ b/benchmark/vcluster_portals.h
@@ -20,7 +20,7 @@ class VirtualClusterPortals : public VirtualClusterWithHelper
 {
 
 public:
-    inline static const char * NAME = "portals";
+    static constexpr char NAME[] = "portals";
     VirtualClusterPortals();
     int init() override;
     int finalize() override;
-- 
GitLab


From 8bf55419f8f91d022437b8728a20014b00d50cf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 16 Aug 2023 14:39:53 +0200
Subject: [PATCH 35/47] Removed temporary helper files

---
 build.sh    |  6 ------
 loadEnv.sh  | 10 ----------
 loadPath.sh |  7 -------
 run.sh      | 38 --------------------------------------
 4 files changed, 61 deletions(-)
 delete mode 100755 build.sh
 delete mode 100644 loadEnv.sh
 delete mode 100644 loadPath.sh
 delete mode 100755 run.sh

diff --git a/build.sh b/build.sh
deleted file mode 100755
index f1726d1..0000000
--- a/build.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-# 3. Install linktest in folder install
-ml GCC ParaStationMPI SIONlib
-mkdir -p install;
-cd benchmark;
-make HAVE_MPI=1 HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install clean install;
-cd ..;
\ No newline at end of file
diff --git a/loadEnv.sh b/loadEnv.sh
deleted file mode 100644
index 0e3a2e1..0000000
--- a/loadEnv.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash -x
-
-
-## MPI
-#source loadPath.sh /opt/mpi/openmpi/4.1.1.2/
-source /opt/mpi/openmpi/4.1.1.2/bin/mpivars.sh
-## BXI profile
-export OMPI_MCA_mca_base_envar_file_prefix=/opt/mpi/openmpi/4.1.1.2/etc/profile/bxi_optimized.conf
-## SIONlib and linktest
-source ./loadPath.sh install
\ No newline at end of file
diff --git a/loadPath.sh b/loadPath.sh
deleted file mode 100644
index 332eec7..0000000
--- a/loadPath.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-INSTALL_DIR=$(readlink -mn $1)
-echo Loading $INSTALL_DIR
-export LIBRARY_PATH=$LIBRARY_PATH:$INSTALL_DIR/lib/;
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$INSTALL_DIR/lib/;
-export CPATH=$CPATH:$INSTALL_DIR/include/;
-export PATH=$PATH:$INSTALL_DIR/bin;
\ No newline at end of file
diff --git a/run.sh b/run.sh
deleted file mode 100755
index e8f908f..0000000
--- a/run.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-#SBATCH --partition dp-bxi
-#SBATCH --account deepsea
-#SBATCH --nodes 4
-#SBATCH --tasks-per-node 1
-
-ml GCC ParaStationMPI SIONlib
-
-#export PORTALS4_DEBUG=3
-
-srun install/linktest \
-	--mode portals \
-	--num-warmup-messages 3 \
-	--num-messages 100 \
-	--size-messages 16777216 \
-	--serial-tests \
-	--num-slowest 1 \
-	--no-sion-file;
-
-srun install/linktest \
-	--unidirectional \
-	--mode portals \
-	--num-warmup-messages 3 \
-	--num-messages 100 \
-	--size-messages 16777216 \
-	--serial-tests \
-	--num-slowest 1 \
-	--no-sion-file;
-
-srun install/linktest \
-	--bidirectional \
-	--mode portals \
-	--num-warmup-messages 3 \
-	--num-messages 100 \
-	--size-messages 16777216 \
-	--serial-tests \
-	--num-slowest 1 \
-	--no-sion-file;
\ No newline at end of file
-- 
GitLab


From af8a02921adda2488352728d050afaccafebdbae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 16 Aug 2023 14:40:29 +0200
Subject: [PATCH 36/47] fixed --group-processes-by-hostname used too often

---
 test/execute_base.sbatch | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/execute_base.sbatch b/test/execute_base.sbatch
index e89c7dd..02dbf38 100644
--- a/test/execute_base.sbatch
+++ b/test/execute_base.sbatch
@@ -50,7 +50,7 @@ fi
 if [ §NUM_RANDOMIZE_TASKS§ -ne 0 ]; then
 	args+=" --num-randomize-tasks §NUM_RANDOMIZE_TASKS§"
 fi
-if [ §HOSTNAME_GROUPING§ ]; then
+if [ §HOSTNAME_GROUPING§ -ne 0 ]; then
 	args+=" --group-processes-by-hostname"
 fi
 set -x # echos commands before executing
-- 
GitLab


From 0d601ea7295091c673271111aba7d5281fbd1526 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 16 Aug 2023 16:11:07 +0200
Subject: [PATCH 37/47] Revert exampleBuild and exampleRun

---
 exampleBuild.sh | 16 ++--------------
 exampleRun.sh   |  9 ++++-----
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/exampleBuild.sh b/exampleBuild.sh
index 492d10b..fd88411 100755
--- a/exampleBuild.sh
+++ b/exampleBuild.sh
@@ -23,19 +23,7 @@ export CPATH=$CPATH:~/.local/include/;
 mkdir -p install;
 cd benchmark;
 make clean
-make -j HAVE_TCP=1 HAVE_PORTALS=1 PREFIX=../install install;
+make -j 12 HAVE_TCP=1 HAVE_IBVERBS=1 HAVE_UCP=1 PREFIX=../install install;
 make clean
 cd ..;
-# Install linktest-report
-# FIX for JSC Systems 
-#export CPATH=/p/software/<SYSTEM>/stages/2022/software/SciPy-bundle/2021.10-gcccoremkl-11.2.0-2021.4.0/lib/python3.9/site-packages/numpy/core/include:$CPATH
-cd install;
-python3 -m venv linktest-report-venv;
-source linktest-report-venv/bin/activate
-cd ../python;
-python3 -m pip install .; #TODO: Add --use-feature=in-tree-build if using pip 21.0.X to 21.2.X (default from 21.3 onwards)
-deactivate;
-cd ..;
-# Notice that we close the virtual environment, since this script is likely not sourced
-# To use python-report one has to source linktest-report-venv/bin/activate again
-# To uninstall: pip uninstall linktest, or remove the virtual environment completly
+
diff --git a/exampleRun.sh b/exampleRun.sh
index fc15465..0600c26 100755
--- a/exampleRun.sh
+++ b/exampleRun.sh
@@ -10,14 +10,13 @@
 ml GCC ParaStationMPI SIONlib
 
 salloc \
---partition dp-bxi \
---reservation maint-bxi \
---account deepsea \
+--partition devel \
+--account cstao \
 --nodes 2 \
 	srun \
 	--ntasks 4 \
 		install/linktest \
-		--mode portals \
+		--mode mpi \
 		--num-warmup-messages 10 \
 		--num-messages 100 \
-		--size-messages $((16));
\ No newline at end of file
+		--size-messages $((16*1024*1024));
\ No newline at end of file
-- 
GitLab


From c702e66ae0797027337ffe01e8558efb6c05339b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Wed, 16 Aug 2023 16:26:00 +0200
Subject: [PATCH 38/47] Added back linktest-report-venv in exampleBuild

---
 exampleBuild.sh | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/exampleBuild.sh b/exampleBuild.sh
index fd88411..07eac17 100755
--- a/exampleBuild.sh
+++ b/exampleBuild.sh
@@ -26,4 +26,16 @@ make clean
 make -j 12 HAVE_TCP=1 HAVE_IBVERBS=1 HAVE_UCP=1 PREFIX=../install install;
 make clean
 cd ..;
-
+# Install linktest-report
+# FIX for JSC Systems 
+#export CPATH=/p/software/<SYSTEM>/stages/2022/software/SciPy-bundle/2021.10-gcccoremkl-11.2.0-2021.4.0/lib/python3.9/site-packages/numpy/core/include:$CPATH
+cd install;
+python3 -m venv linktest-report-venv;
+source linktest-report-venv/bin/activate
+cd ../python;
+python3 -m pip install .; #TODO: Add --use-feature=in-tree-build if using pip 21.0.X to 21.2.X (default from 21.3 onwards)
+deactivate;
+cd ..;
+# Notice that we close the virtual environment, since this script is likely not sourced
+# To use python-report one has to source linktest-report-venv/bin/activate again
+# To uninstall: pip uninstall linktest, or remove the virtual environment completly
-- 
GitLab


From 135bd4eb9164a57761341dd6fc9617880d794399 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 29 Aug 2023 16:12:52 +0200
Subject: [PATCH 39/47] Fixed mpi-settings no longer exists on deep

---
 test/Default.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/Default.xml b/test/Default.xml
index 6e5a458..a65c93e 100644
--- a/test/Default.xml
+++ b/test/Default.xml
@@ -50,7 +50,7 @@
     <parameter name="Default_Stack">$DefaultCompiler $DefaultMPI</parameter>
     <parameter name="Unload_CUDA" mode="python">"CUDA" if "${Compiler} ${MPI} ${CUDA} " == "Intel IntelMPI " else ""</parameter>
     <parameter name="Transport_Layer_Settings"  mode="python">
-        "" if not ${WithCUDA} else {
+        "" if not ${WithCUDA} or "${System_Name}" == "deep" else {
             "ParaStationMPI": "mpi-settings/CUDA",
             "OpenMPI":        "UCX-settings/RC-CUDA",
             "IntelMPI":       ""
@@ -126,7 +126,7 @@
             "":     ""
         }[ "${CUDA}" ]
     </parameter>
-    <parameter name="Make">make -j ${Enable_Layer} ${DefineCuArch}</parameter>
+    <parameter name="Make">make -j24 ${Enable_Layer} ${DefineCuArch}</parameter>
 </parameterset>
 <parameterset name="Misc"> <!-- depends on Linktest_Args parameters -->
     <parameter name="Report_Name">linktest_${Messaging_Layer}_${Number_Of_Nodes}nx${Number_Of_Tasks_Per_Node}c</parameter>
-- 
GitLab


From c9dd5ae17851f972c48ec6c2fac41d10ed872396 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 29 Aug 2023 16:14:09 +0200
Subject: [PATCH 40/47] Improved compile step suffix Renamed Makefile variables
 to reflect C++ usage

---
 benchmark/Makefile    | 32 ++++++++++++++++----------------
 test/LinktestMain.xml |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/benchmark/Makefile b/benchmark/Makefile
index 8d3404a..7a27cfc 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -27,14 +27,14 @@ FSANITIZE = address
 SYSTEM   = generic
 GIT_HASH = $(shell git rev-parse --verify HEAD)
 GIT_HASH_SHORT= $(shell git rev-parse --verify --short HEAD)
-CC       = mpicxx
-CFLAGS   = -std=c++17 -Wall -g -rdynamic
+CXX       = mpicxx
+CXXFLAGS = -std=c++17 -Wall -g -rdynamic
 CPPFLAGS =	-D_GNU_SOURCE \
 			-DLINKTEST_LINUX=1 \
 			-DLINKTEST_SYSTEM="\"$(SYSTEM)\"" \
 			-DGIT_HASH=\"$(GIT_HASH)\" 
 			-DGIT_HASH_SHORT=\"$(GIT_HASH_SHORT)\"
-LD       = $(CC)
+LD       = $(CXX)
 LDFLAGS  =
 LIBS     =
 # =========================================
@@ -190,7 +190,7 @@ endif
 
 ifeq (1, $(HAVE_SION))
 	linktest-obj += vcluster_sion_generic_adapter.o
-	CPPFLAGS     += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --cflags --mpi)
+	CPPFLAGS     += -D_FILE_OFFSET_BITS=64 -DUSE_SION=1 $(shell sionconfig --64 --gcc --CXXFLAGS --mpi)
 	LIBS         += $(shell sionconfig --64 --gcc --libs --mpi)
 endif
 
@@ -200,7 +200,7 @@ endif
 
 ifdef VERBOSE
 $(info linktest-obj = $(linktest-obj))
-$(info CFLAGS = $(CFLAGS))
+$(info CXXFLAGS = $(CXXFLAGS))
 $(info CPPFLAGS = $(CPPFLAGS))
 $(info LDFLAGS = $(LDFLAGS))
 $(info LIBS = $(LIBS))
@@ -210,12 +210,12 @@ endif
 # DEFINE MAKE RULES
 # =========================================
 ifdef VERBOSE
-	Q =
+	QUIET =
 else
-	Q = @
+	QUIET = @
 endif
 
-link = $(Q)ln -s linktest linktest.$(1)
+link = $(QUIET)ln -s linktest linktest.$(1)
 
 SYMB_EXE := $(shell find . -type l -iname "linktest.*")
 
@@ -226,36 +226,36 @@ all: optimized
 compile: linktest $(linktest-versions)
 
 .PHONY: optimized
-optimized: CFLAGS += -O3
+optimized: CXXFLAGS += -O3
 optimized: compile
 
 .PHONY: debug
-debug: CFLAGS += -O0 -g
+debug: CXXFLAGS += -O0 -g
 debug: compile
 
 .PHONY: sanitized
 sanitized: debug
-sanitized: CFLAGS   += -fsanitize=$(FSANITIZE) -static-libasan -fno-omit-frame-pointer
+sanitized: CXXFLAGS += -fsanitize=$(FSANITIZE) -static-libasan -fno-omit-frame-pointer
 sanitized: LDFLAGS  += -fsanitize=$(FSANITIZE) -static-libasan
 sanitized: compile
 
 memory_cuda.cc: cuda_kernels.cc
 
 %.o: %.cc
-	@echo " "CC $@
-	$(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@	
+	@echo " "CXX $@
+	$(QUIET)$(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@	
 
 %.cubin: %.cu
 	@echo " "CU $@
-	$(Q)$(CU) $(CUFLAGS) --cubin $< -o $@
+	$(QUIET)$(CU) $(CUFLAGS) --cubin $< -o $@
 
 %.cc: %.cubin convert.py
 	@echo " "CONVERT $@
-	$(Q)python3 convert.py $< $@ $(basename $@)
+	$(QUIET)python3 convert.py $< $@ $(basename $@)
 
 linktest: $(linktest-obj)
 	@echo " "LD $@
-	$(Q)$(LD) $(LDFLAGS) $^ $(LIBS) -o $@
+	$(QUIET)$(LD) $(LDFLAGS) $^ $(LIBS) -o $@
 
 linktest.tcp: linktest
 	@echo " "LN $@
diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml
index 50b23c2..54a1a69 100644
--- a/test/LinktestMain.xml
+++ b/test/LinktestMain.xml
@@ -49,7 +49,7 @@
 			<sub source="§SRUN_ARGS§"                dest="${SRUN_Arguments}" />
 		</substituteset>
 
-		<step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack}">
+		<step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack} ${CUDA}">
 			<use>Sources</use>
 			<use from="Default.xml">System, Environment, Build</use>
 			<do done_file="ready" error_file="error" tag="!dryRun">
-- 
GitLab


From b82056f72796cdc52a7fd200bc92ed0a258247e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 29 Aug 2023 16:59:54 +0200
Subject: [PATCH 41/47] Removed cluttering debug code

---
 benchmark/benchmark.cc | 42 ------------------------------------------
 1 file changed, 42 deletions(-)

diff --git a/benchmark/benchmark.cc b/benchmark/benchmark.cc
index 7b22af7..444c346 100644
--- a/benchmark/benchmark.cc
+++ b/benchmark/benchmark.cc
@@ -400,7 +400,6 @@ int Benchmark::printIterationResults(const int iter){
         }
     }
 
-    debug("Benchmark::printIterationResults->barrier()");
     EXEC_NOFAIL(cl->barrier());
 
     return SUCCESS;
@@ -441,39 +440,11 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
     auto from = (sign < 0) ? partner : rank();
     auto to   = (sign < 0) ? rank() : partner;
 
-    debug("Benchmark::work_pingpong_parallel 1->barrier()");
     barrier();
-#ifdef DEBUG_KERNEL_SYNCHRONIZATION
-    std::unique_ptr<StopwatchI> rootWatch = Stopwatchfactory::getRootWatch(rank());
-    duration_t tBeforeBarrier;
-    rootWatch->start();
-#endif
     EXEC_NOFAIL(kernel(from, to, &tmp1, true));
-#ifdef DEBUG_KERNEL_SYNCHRONIZATION
-    rootWatch->stop();
-    tBeforeBarrier=rootWatch->getDuration();
-#endif
-    debug("Benchmark::work_pingpong_parallel 2->barrier()");
     barrier();
-#ifdef DEBUG_KERNEL_SYNCHRONIZATION
-    rootWatch->stop();
-    printTimingIfRoot(rank(), "[Kernel A->B Before Barrier]", tBeforeBarrier          );
-    printTimingIfRoot(rank(), "[Kernel A->B After  Barrier]", rootWatch->getDuration());
-    barrier(); //Additional barrier to reduce desynchronization due to printing
-    rootWatch->start();
-#endif
     EXEC_NOFAIL(kernel(to, from, &tmp2, true));
-#ifdef DEBUG_KERNEL_SYNCHRONIZATION
-    rootWatch->stop();
-    tBeforeBarrier=rootWatch->getDuration();
-#endif
-    debug("Benchmark::work_pingpong_parallel 3->barrier()");
     barrier();
-#ifdef DEBUG_KERNEL_SYNCHRONIZATION
-    rootWatch->stop();
-    printTimingIfRoot(rank(), "[Kernel B->A Before Barrier]", tBeforeBarrier          );
-    printTimingIfRoot(rank(), "[Kernel B->A After  Barrier]", rootWatch->getDuration());
-#endif
 
     *time_per_msg = (sign > 0) ? tmp1 : tmp2;
 
@@ -485,7 +456,6 @@ int Benchmark::work_pingpong_parallel(const int partner,const int sign, double*
  */
 int Benchmark::work_pingpong_serial(const int partner, double* const time_per_msg){
     for (auto i = 0; i < size(); ++i) {
-        debug("Benchmark::work_pingpong_serial 1->barrier()");
         barrier();
         if (i == rank()) {
             EXEC_NOFAIL(kernel(rank(), partner, time_per_msg, false));
@@ -501,19 +471,14 @@ int Benchmark::work_pingpong_serial(const int partner, double* const time_per_ms
             #if HAVE_VCLUSTER_PORTALS == 1
             if(cl->nameRef() == VirtualClusterPortals::NAME) {
                 // 2 per kernel, warmup kernel and measuring kernel
-                debug("Benchmark::work_pingpong_serial 2->barrier()");
                 barrier();
-                debug("Benchmark::work_pingpong_serial 3->barrier()");
                 barrier();
-                debug("Benchmark::work_pingpong_serial 4->barrier()");
                 barrier();
-                debug("Benchmark::work_pingpong_serial 5->barrier()");
                 barrier();
             }
             #endif
         }
     }
-    debug("Benchmark::work_pingpong_serial 6->barrier()");
     barrier();
 
     return SUCCESS;
@@ -571,7 +536,6 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain
     double runningSumBandwidth=0.0;
 
     /* Execute all-to-all test if desired */
-    debug("Benchmark::run_iteration 1->barrier()");
     barrier();
     if (args->do_alltoall){
         EXEC_NOFAIL(work_alltoall());
@@ -593,7 +557,6 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain
             if(!args->do_serial) std::printf(fmt[!!args->do_serial], nDigits, step+1); //Start print out early so user knows step has started
             std::fflush(stdout);
             const double stepStartTime = walltime();
-            debug("Benchmark::run_iteration 2->barrier()");
             barrier();
             EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth));
             *tWork += (walltime() - stepStartTime);
@@ -603,14 +566,12 @@ int Benchmark::run_iteration(const std::size_t iter, const std::size_t numRemain
         }
     }else{
         for (unsigned int step: stepPermutation) {
-            debug("Benchmark::run_iteration 3->barrier()");
             barrier();
             EXEC_NOFAIL(work_pingpong(step, &minTimeForStep, &avgTimeForStep, &maxTimeForStep, &sumBandwidth));
         }
     }
 
     /* Execute all-to-all test if desired */
-    debug("Benchmark::run_iteration 4->barrier()");
     barrier();
     if (args->do_alltoall){
         EXEC_NOFAIL(work_alltoall());
@@ -665,7 +626,6 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t
         #if HAVE_VCLUSTER_PORTALS == 1
         if(cl->nameRef() == VirtualClusterPortals::NAME) {
             // 2 per kernel, warmup kernel and measuring kernel
-            debug("Benchmark::retest_one_slow_pair 1,2,3,4->barrier()");
             barrier();
             barrier();
             barrier();
@@ -673,7 +633,6 @@ int Benchmark::retest_one_slow_pair(const int from,const int to, double* const t
         }
         #endif
     }
-    debug("Benchmark::retest_one_slow_pair 5->barrier()");
     barrier();
 
     if (0 != from) {
@@ -999,7 +958,6 @@ int Benchmark::benchmark(){
     prepareBuffers();
 
     rootWatch->start();
-    debug("Benchmark::benchmark->barrier()");
     cl->barrier();
 
     rootWatch->stop();
-- 
GitLab


From f7aecaec75940acb66a92dc080dc3ef719d58119 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 12 Sep 2023 10:45:47 +0200
Subject: [PATCH 42/47] Added NOLINT for makrocs

---
 benchmark/error.h           | 3 ++-
 benchmark/portals4_macros.h | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/benchmark/error.h b/benchmark/error.h
index 2807e34..e1db667 100644
--- a/benchmark/error.h
+++ b/benchmark/error.h
@@ -35,10 +35,11 @@ void linktest_debug(const char* file, const char* func, long line, const char* f
  * The names are pretty generic so we have to be careful to avoid naming conflicts
  * that result in hard to understand compiler errors.
  */
+// NOLINTBEGIN
 #define fatal(fmt, ...) linktest_fatal(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__)
 #define error(fmt, ...) linktest_error(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__)
 #define warn(fmt, ...) linktest_warn(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__)
 #define info(fmt, ...) linktest_info(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__)
 #define debug(fmt, ...) linktest_debug(__FILE__, __func__, __LINE__, fmt, ## __VA_ARGS__)
-
+// NOLINTEND
 #endif
\ No newline at end of file
diff --git a/benchmark/portals4_macros.h b/benchmark/portals4_macros.h
index 6175265..91fb342 100644
--- a/benchmark/portals4_macros.h
+++ b/benchmark/portals4_macros.h
@@ -1,6 +1,6 @@
 #ifndef LINKTEST_PORTALS4MACROS_H
 #define LINKTEST_PORTALS4MACROS_H
-
+// NOLINTBEGIN
 #define CHECK_RETURNVAL(x) do { int ret; \
     switch (ret = x) { \
         case PTL_IGNORED: \
@@ -12,6 +12,7 @@
         case PTL_PT_IN_USE: fprintf(stderr, "=> %s returned PTL_PT_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
         case PTL_IN_USE: fprintf(stderr, "=> %s returned PTL_IN_USE (line %u)\n", #x, (unsigned int)__LINE__); abort(); break; \
         default: fprintf(stderr, "=> %s returned failcode %i (line %u)\n", #x, ret, (unsigned int)__LINE__); abort(); break; \
-    } } while (0)
+    } } while (0) 
+// NOLINTEND
 
 #endif //PORTALS4MACROS
\ No newline at end of file
-- 
GitLab


From c6f7f8cf5273acf3f9350075497c594941ae87da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 12 Sep 2023 10:57:59 +0200
Subject: [PATCH 43/47] Tidied up implicit conversion and unused arguments

---
 benchmark/vcluster_portals.cc | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/benchmark/vcluster_portals.cc b/benchmark/vcluster_portals.cc
index d5f494f..f408683 100644
--- a/benchmark/vcluster_portals.cc
+++ b/benchmark/vcluster_portals.cc
@@ -181,18 +181,19 @@ int VirtualClusterPortals::kpingpong(const int from, const int to, MemoryBuffer&
     getRecvCounter();
     releaseSendStructs();
     releaseRecvStructs();
-    if(timing) { 
+    if (timing != nullptr)
+    {
         *timing = watch->getDuration().count() / (2*num_msg);
     }
-    
+
     return SUCCESS;
 }
 
 int VirtualClusterPortals::kUniDir(
     const int from, const int to,
-    MemoryBuffer& buf1, MemoryBuffer& buf2,
-    const int num_msg, double* const timing,
-    const bool doBarrier)
+    MemoryBuffer &buf1, MemoryBuffer &buf2,
+    const int num_msg, double *const timing,
+    const bool /*doBarrier*/)
 {
     bool isSender = rank() == from;
     bool isReceiver = rank() == to;
@@ -223,10 +224,11 @@ int VirtualClusterPortals::kUniDir(
     getRecvCounter();
     releaseSendStructs();
     releaseRecvStructs();
-    if(timing) { 
+    if (timing != nullptr)
+    {
         *timing = watch->getDuration().count() / num_msg;
     }
-    
+
     return SUCCESS;
 
 }
@@ -262,10 +264,11 @@ int VirtualClusterPortals::kbipingpong(
     getRecvCounter();
     releaseSendStructs();
     releaseRecvStructs();
-    if(timing) { 
+    if (timing != nullptr)
+    {
         *timing = watch->getDuration().count() / (2.0 * num_msg);
     }
-    
+
     return SUCCESS;
 
 }
-- 
GitLab


From 59e38a7c9d7ab78025c073eef9445fc860556df3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Fri, 4 Aug 2023 15:39:12 +0200
Subject: [PATCH 44/47] Fix Stage 2023 Intel only has IntelMPI

---
 test/Default.xml   | 6 +++---
 test/LayerTest.xml | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/Default.xml b/test/Default.xml
index a65c93e..68c12e9 100644
--- a/test/Default.xml
+++ b/test/Default.xml
@@ -33,7 +33,7 @@
     <parameter name="MPI" mode="python" tag="!noCompileRunTest">
         "ParaStationMPI" if "${System_Name}" == "deep" else {
         "GCC":   "ParaStationMPI,OpenMPI",
-        "Intel": "ParaStationMPI,OpenMPI,IntelMPI",
+        "Intel": "IntelMPI",
         "NVHPC": "ParaStationMPI,OpenMPI"
         }[ "${Compiler}" ]
     </parameter>
@@ -46,8 +46,8 @@
         }[ "${Compiler}" ]
     </parameter>
     <parameter name="WithCUDA">("${CUDA}" == "CUDA")</parameter>
-    <parameter name="Stack">$Compiler $MPI</parameter>
-    <parameter name="Default_Stack">$DefaultCompiler $DefaultMPI</parameter>
+    <parameter name="Stack">${Compiler}_${MPI}</parameter>
+    <parameter name="Default_Stack">${DefaultCompiler}_${DefaultMPI}</parameter>
     <parameter name="Unload_CUDA" mode="python">"CUDA" if "${Compiler} ${MPI} ${CUDA} " == "Intel IntelMPI " else ""</parameter>
     <parameter name="Transport_Layer_Settings"  mode="python">
         "" if not ${WithCUDA} or "${System_Name}" == "deep" else {
diff --git a/test/LayerTest.xml b/test/LayerTest.xml
index 4594ede..4ab7775 100644
--- a/test/LayerTest.xml
+++ b/test/LayerTest.xml
@@ -5,14 +5,14 @@
     {
     "juwels": "ibverbs,ucp,tcp,cuda",
     "jurecadc": "ibverbs,ucp,tcp,cuda",
-    "deep": "ibverbs,portals"
+    "deep": "ibverbs,ucp,tcp,cuda,portals"
     }[ "${System_Name}" ]
     </parameter> <!-- Options: mpi,ibverbs,psm2,cuda,ucp,portals,tcp -->
     
 </parameterset>
 <parameterset name="Slurm" init_with="Default.xml"> 	
     <parameter name="Partition" mode="python">
-    "dp-bxi" if "${System_Name}" == "portals" else {
+    "dp-bxi" if "${Messaging_Layer}" == "portals" else {
         "juwels": {
             False: "devel",
             True : "develgpus"
-- 
GitLab


From 591b7d0f0add4f3119b365054c734e20960e774d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 12 Sep 2023 11:18:14 +0200
Subject: [PATCH 45/47] Replaced space with underscore in suffix

---
 test/LinktestMain.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml
index 54a1a69..5f1982c 100644
--- a/test/LinktestMain.xml
+++ b/test/LinktestMain.xml
@@ -49,7 +49,7 @@
 			<sub source="§SRUN_ARGS§"                dest="${SRUN_Arguments}" />
 		</substituteset>
 
-		<step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack} ${CUDA}">
+		<step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack}_${CUDA}">
 			<use>Sources</use>
 			<use from="Default.xml">System, Environment, Build</use>
 			<do done_file="ready" error_file="error" tag="!dryRun">
-- 
GitLab


From fb09bdf17d661f7e9432479f2b4a6dd82e24cdf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 12 Sep 2023 16:52:15 +0200
Subject: [PATCH 46/47] Cleaned up error table generation

---
 test/Default.xml         |  2 +
 test/LinktestMain.xml    | 84 ++++++++++++++++++++++------------------
 test/execute_base.sbatch |  2 +-
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/test/Default.xml b/test/Default.xml
index 68c12e9..734aaad 100644
--- a/test/Default.xml
+++ b/test/Default.xml
@@ -46,7 +46,9 @@
         }[ "${Compiler}" ]
     </parameter>
     <parameter name="WithCUDA">("${CUDA}" == "CUDA")</parameter>
+    <parameter name="WithCUDATxt" mode="python">"Yes" if ${WithCUDA} else "No"</parameter>
     <parameter name="Stack">${Compiler}_${MPI}</parameter>
+    <parameter name="StackWithCuda">${Stack}_${CUDA}</parameter>
     <parameter name="Default_Stack">${DefaultCompiler}_${DefaultMPI}</parameter>
     <parameter name="Unload_CUDA" mode="python">"CUDA" if "${Compiler} ${MPI} ${CUDA} " == "Intel IntelMPI " else ""</parameter>
     <parameter name="Transport_Layer_Settings"  mode="python">
diff --git a/test/LinktestMain.xml b/test/LinktestMain.xml
index 5f1982c..cf67976 100644
--- a/test/LinktestMain.xml
+++ b/test/LinktestMain.xml
@@ -3,6 +3,18 @@
 	<benchmark name="JSC Linktest Test Suite" outpath="runs">
 		<comment>Testing compilation and common usages of JSC Linktest</comment>
 
+		<parameterset name="JUBE_Extra">
+			<parameter name="JUBE_REPORT_LAST_CMD" update_mode="step">
+				if [ $? -eq 0 ]; then
+					touch "${jube_wp_abspath}/ready";
+				else
+					echo "${jube_step_name} failed" >> "${jube_wp_abspath}/error";
+				fi
+			</parameter>
+		</parameterset>
+
+		
+
 		<fileset name="Sources">
 			<copy>../benchmark</copy>
 		</fileset>
@@ -50,23 +62,19 @@
 		</substituteset>
 
 		<step name="Compile" procs="9" tag="!(noLayerTest+noModeTest+noCompileTest)" suffix="${Stack}_${CUDA}">
-			<use>Sources</use>
+			<use>JUBE_Extra,Sources</use>
 			<use from="Default.xml">System, Environment, Build</use>
 			<do done_file="ready" error_file="error" tag="!dryRun">
 				set -x
 				$Load_Modules
 				cd benchmark
 				$Make
-				if [ $? -eq 0 ]; then
-					touch ../ready;
-				else
-					echo "linktest compile failed" >> ../error;
-				fi
+				$JUBE_REPORT_LAST_CMD
 				set +x
 			</do>
 			<do done_file="ready" error_file="error" tag="dryRun">
 				echo "Assume succesful compile"
-				touch ready
+				$JUBE_REPORT_LAST_CMD
 			</do>
 		</step>
 
@@ -88,7 +96,7 @@
 
 		<step name="CompileLinktestReport" active="'$Stack' == '$Default_Stack'" tag="!noLinktestReportTest">
 			<use from="Default.xml">Environment</use>
-			<use>ReportSources</use>
+			<use>JUBE_Extra,ReportSources</use>
 			<do done_file="ready" error_file="error">
 				set -x
 				$Load_Modules
@@ -96,17 +104,13 @@
 				python3 -m venv venvLinktest
 				source venvLinktest/bin/activate
 				pip install ./python
-				if [ $? -eq 0 ]; then
-					touch ready;
-				else
-					echo "inktest-report compile failed" >> error;
-				fi
+				$JUBE_REPORT_LAST_CMD
 				deactivate
 				set +x
 			</do>
 		</step>
 
-		<step name="CompileRunTest" procs="9" depend="Compile" active="${WithCUDA} == ${WithGPUs}" suffix="${Stack}_${CUDA}" tag="!noCompileRunTest">
+		<step name="CompileRunTest" procs="9" depend="Compile" active="${WithCUDA} == ${WithGPUs}" suffix="${StackWithCuda}" tag="!noCompileRunTest">
 			<use from="CompileRunTest.xml">Linktest_Args</use>
 			<use from="Default.xml">System, Environment, Slurm, Misc</use>
 			<use>ExecutionScript</use>
@@ -115,16 +119,13 @@
 		</step>
 
 		<step name="LinktestReportTest" procs="7" depend="ModeTest,CompileLinktestReport" active="$No_Sion_File == 0" suffix="${Mode}" tag="!(noLinktestReportTest|noModeTest)">
+			<use>JUBE_Extra</use>
 			<do done_file="ready" error_file="error" tag="!dryRun">
 				set -x
 				$Load_Modules
 				source CompileLinktestReport/venvLinktest/bin/activate
 				linktest-report -i ModeTest/${Report_Name}.sion -o report.pdf
-				if [ $? -eq 0 ]; then
-					touch ready;
-				else
-					echo "linktest-report run failed" >> error;
-				fi
+				$JUBE_REPORT_LAST_CMD
 				deactivate
 				set +x
 			</do>
@@ -143,38 +144,46 @@
 			<pattern name="Options">\+ srun .*?\.sion (.*?)\n</pattern>
 		</patternset>
 
-		<patternset name="errorFilePatterns">
-			<pattern name="error_msg">.*</pattern>
+		<patternset name="genericPatterns">
+			<pattern name="all">.*</pattern>
 		</patternset>
 
 		<!-- Analyse -->
 		<analyser name="analyseCompiles">
 			<analyse step="Compile">
-				<file use="errorFilePatterns">error</file>
+				<file use="genericPatterns">error</file>
+				<file use="genericPatterns">ready</file>
 			</analyse>
 		</analyser>
 
 		<analyser name="analyseRuns">
-			<analyse step="LayerTest" tag="!noLayerTest">
+			<analyse step="Compile">
+				<file use="genericPatterns">error</file>
+				<file use="genericPatterns">ready</file>
+			</analyse>
+			<analyse step="CompileRunTest" tag="!noCompileRunTest">
 				<file use="LinktestOutPatterns">linktest.log</file>
 				<file use="LinktestErrPatterns">linktest.error</file>
-				<file use="errorFilePatterns">error</file>
+				<file use="genericPatterns">error</file>
+				<file use="genericPatterns">ready</file>
 			</analyse>
-			<analyse step="ModeTest" tag="!noModeTest">
+			<analyse step="LayerTest" tag="!noLayerTest">
 				<file use="LinktestOutPatterns">linktest.log</file>
 				<file use="LinktestErrPatterns">linktest.error</file>
-				<file use="errorFilePatterns">error</file>
+				<file use="genericPatterns">error</file>
+				<file use="genericPatterns">ready</file>
 			</analyse>
-			<analyse step="CompileRunTest" tag="!noCompileRunTest">
+			<analyse step="ModeTest" tag="!noModeTest">
 				<file use="LinktestOutPatterns">linktest.log</file>
 				<file use="LinktestErrPatterns">linktest.error</file>
-				<file use="errorFilePatterns">error</file>
+				<file use="genericPatterns">error</file>
+				<file use="genericPatterns">ready</file>
 			</analyse>
 		</analyser>
 
 		<analyser name="analyseReports">
 			<analyse step="LinktestReportTest" tag="!(noLinktestReportTest|noModeTest)">
-				<file use="errorFilePatterns">error</file>
+				<file use="genericPatterns">error</file>
 			</analyse>
 		</analyser>
 
@@ -197,28 +206,27 @@
 		</result>
 		<result>
 			<use>analyseRuns,analyseReports</use>
-			<table name="ErrorResult" style="pretty" sort="jube_step_name">
+			<table name="RunErrors" style="pretty" sort="jube_step_name,Compiler,MPI,Transport_Layer_Settings,WithCUDATxt,Messaging_Layer,SRUN_Arguments,Options">
 				<column title="Test">jube_step_name</column>
 				<column title="Compiler">Compiler</column>
 				<column title="MPI">MPI</column>
-				<column title="Setting">Transport_Layer_Settings</column>
+				<column title="MPI Settings">Transport_Layer_Settings</column>
+				<column title="CUDA">WithCUDATxt</column>
 				<column title="Layer">Messaging_Layer</column>
 				<column title="Srun Args">SRUN_Arguments</column>
 				<column title="Options">Options</column>
-				<column title="Errors">error_msg</column>
+				<column title="Errors">all</column>
 			</table>
 		</result>
 		<result>
 			<use>analyseCompiles</use>
-			<table name="CompileErrors" style="pretty" sort="jube_step_name">
+			<table name="CompileErrors" style="pretty" sort="jube_step_name,Compiler,MPI,CUDA">
 				<column title="Test">jube_step_name</column>
 				<column title="Compiler">Compiler</column>
 				<column title="MPI">MPI</column>
-				<column title="Setting">Transport_Layer_Settings</column>
-				<column title="Layer">Messaging_Layer</column>
-				<column title="Srun Args">SRUN_Arguments</column>
-				<column title="Options">Options</column>
-				<column title="Errors">error_msg</column>
+				<column title="MPI Settings">Transport_Layer_Settings</column>
+				<column title="CUDA">WithCUDATxt</column>
+				<column title="Errors">all</column>
 			</table>
 		</result>
 
diff --git a/test/execute_base.sbatch b/test/execute_base.sbatch
index 02dbf38..21cfb76 100644
--- a/test/execute_base.sbatch
+++ b/test/execute_base.sbatch
@@ -61,7 +61,7 @@ srun --ntasks=${SLURM_NTASKS} \
 
 # Indicate Success to jube
 if [ $? -ne 0 ]; then
-	echo "linktest run failed" >> error;
+	echo "LinkTest run failed" >> error;
 else
 	touch ready;
 fi
-- 
GitLab


From 3c63dce8deb5089673a9461596caa06c5b6977c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Yannik=20M=C3=BCller?= <y.mueller@fz-juelich.de>
Date: Tue, 12 Sep 2023 16:56:45 +0200
Subject: [PATCH 47/47] Removed example install for sionlib

---
 benchmark/installSIONlib.sh | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 benchmark/installSIONlib.sh

diff --git a/benchmark/installSIONlib.sh b/benchmark/installSIONlib.sh
deleted file mode 100644
index 01e0cb9..0000000
--- a/benchmark/installSIONlib.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-wget http://apps.fz-juelich.de/jsc/sionlib/download.php?version=1.7.7
-tar -axf 'download.php?version=1.7.7'
-mkdir install
-cd sionlib
-./configure --prefix=/p/project/deepsea/mueller24/linktest/install --disable-fortran # Check/Change Path
-cd build-linux-gomp10-openmpi
-make
-make install
\ No newline at end of file
-- 
GitLab