From c1c526bffcbb2417a01683484b627a7d0e50a6b4 Mon Sep 17 00:00:00 2001 From: Utz-Uwe Haus <uhaus@cray.com> Date: Thu, 18 Nov 2021 12:51:04 +0100 Subject: [PATCH] Check maximum supported transfer size of endpoints, raise error Better than having the error manifest in unclear issues from the provider. --- maestro/ofi.c | 5 +++++ transport/rdma.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/maestro/ofi.c b/maestro/ofi.c index f72d0ec2..b6d387b9 100644 --- a/maestro/ofi.c +++ b/maestro/ofi.c @@ -2522,6 +2522,11 @@ mstro_ofi__submit_component_descriptor_read(struct mstro_endpoint *my_ep, DEBUG("Checking for PM config block MR at (remote addr) 0x%" PRIx64 ", key of len %zu value %" PRIx64 "\n", mr_addr, inforeg->raw_key.len, mr_key); + if(sizeof(g_pm_component_descriptor) > my_ep->fi->ep_attr->max_msg_size) { + ERR("component descriptor size exceeds endpoint's max_msg_size. FIXME: should split up operation\n"); + + } + assert(ctx->msg==NULL); assert(my_ep->peer_info_mr!=NULL); /* incoming buffer has been registered at local endpoint set creation */ void * local_buf_mr_desc = fi_mr_desc(my_ep->peer_info_mr); diff --git a/transport/rdma.c b/transport/rdma.c index 1c2e244f..dd1ad574 100644 --- a/transport/rdma.c +++ b/transport/rdma.c @@ -543,6 +543,12 @@ mstro_transport_rdma_dst_execute(mstro_cdo cdo_dst, Mstro__Pool__TransferTicket* ctx->ep = app_entry->ep; mstro_ofi__remember_ctx(app_entry->ep, ctx); + if(len> app_entry->ep->fi->ep_attr->max_msg_size) { + ERR("Transfer size exceeds endpoint's maximum operation size. FIXME: should split up operation\n"); + status=MSTRO_UNIMPL; + goto BAILOUT; + } + int num_retries = 3; RETRY_RDMA_TRANSPORT_READ: ; -- GitLab