From e1119b2ab8e6e754810dcf284546fc886e7c0f65 Mon Sep 17 00:00:00 2001 From: Utz-Uwe Haus <uhaus@cray.com> Date: Thu, 18 Nov 2021 13:34:50 +0100 Subject: [PATCH] Add support for fragments in rdma completion context --- transport/rdma.c | 15 ++++++++++++--- transport/transport_rdma.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/transport/rdma.c b/transport/rdma.c index ef012f4a..5405e60c 100644 --- a/transport/rdma.c +++ b/transport/rdma.c @@ -450,6 +450,7 @@ mstro_transport_rdma_dst_execute(mstro_cdo cdo_dst, Mstro__Pool__TransferTicket* ERR("Failed to alloc RDMA transport closure\n"); return MSTRO_NOMEM; } + closure->num_fragments = 1; closure->mr = mr; closure->fresh_alloc_route = fresh_alloc_route; closure->raw_ptr = cdo_dst->raw_ptr; @@ -468,7 +469,7 @@ mstro_transport_rdma_dst_execute(mstro_cdo cdo_dst, Mstro__Pool__TransferTicket* DEBUG("Doing event creation\n"); status = mstro_event_create(g_transport_rdma_edom, mstro_transport_rdma_cb, - (void*)closure, mstro_transport_rdma_dtor, 0, &(ctx->ev)); + (void*)closure, mstro_transport_rdma_dtor, false, &(ctx->ev)); if (status != MSTRO_OK) { ERR("Couldn't create event (%s)\n", mstro_status_description(status)); return MSTRO_FAIL; @@ -545,12 +546,20 @@ mstro_transport_rdma_cb(mstro_event ev, void* closure) srcidstr, &(args->srccdoid), WITH_CDO_ID_STR( dstidstr, &(args->dstcdoid), - DEBUG("Callback is finishing completion msg of CDO `%s`-> `%s`\n", - srcidstr, dstidstr););); + DEBUG("Callback is finishing completion msg of CDO `%s`-> `%s`, %zu fragments left\n", + srcidstr, dstidstr, args->num_fragments););); mstro_status status; args->status = MSTRO_OK; + size_t num_fragments_left = atomic_load(&(args->num_fragments)); + assert(num_fragments_left>0); + if(num_fragments_left>1) { + DEBUG("More fragments in flight, delaying rdma cleanup\n"); + args->status=MSTRO_OK; + return; + } + // unregister int err = fi_close((struct fid*)args->mr); if (err) { diff --git a/transport/transport_rdma.h b/transport/transport_rdma.h index 37f3d858..7ddfb23c 100644 --- a/transport/transport_rdma.h +++ b/transport/transport_rdma.h @@ -52,6 +52,7 @@ struct mstro_transport_rdma_cb_args { int fresh_alloc_route; void* raw_ptr; size_t len; + _Atomic(size_t) num_fragments; /**< the number of operations in flight (if transfer had to be chunked) */ const char* name; struct mstro_cdo_id srccdoid; struct mstro_cdo_id dstcdoid; -- GitLab