diff --git a/maestro/drc.c b/maestro/drc.c index 1a9fbd43286d7c274451ab14054d9b35d1d0d959..ade5f135595756c528c03a5c5a8276ca7d928efa 100644 --- a/maestro/drc.c +++ b/maestro/drc.c @@ -272,17 +272,16 @@ mstro_drc_insert_ofi(struct fi_info *fi, const mstro_drc_info info) { if(fi==NULL||info==NULL) return MSTRO_INVARG; - + + assert(fi->addr_format==FI_ADDR_GNI); #if FI_VERSION_GE(FI_VERSION(FI_MAJOR_VERSION,FI_MINOR_VERSION), FI_VERSION(1,5)) // auth key is free()ed by fabric.c on teardown, so we need to duplicate it fi->domain_attr->auth_key = (uint8_t*) dup_authkey(&info->auth_key); if(fi->domain_attr->auth_key==NULL) { return MSTRO_NOMEM; } - - fi->domain_attr->auth_key_size = sizeof(info->auth_key); #else -#warning Open Fabric version too old for auth_key suppport + #warning Open Fabric version too old for auth_key suppport #endif return MSTRO_OK; diff --git a/maestro/ofi.c b/maestro/ofi.c index 3e636d5e360e0d192aa63b136615b61b8619c536..07ba33064c61f2aead39226b0b283939ed9dafb1 100644 --- a/maestro/ofi.c +++ b/maestro/ofi.c @@ -285,8 +285,10 @@ mstro_ep__create_cred(const struct mstro_endpoint *ep, } break; default: - DEBUG("No credential support for endpoint type %d\n", - ep->pbep->proto_case); + DEBUG("No credential support for endpoint type %d (%s)\n", + ep->pbep->ofiproto, + protobuf_c_enum_descriptor_get_value(&mstro__ofi_endpoint_kind__descriptor, + ep->pbep->ofiproto)->name); } } stat=MSTRO_OK; @@ -861,12 +863,21 @@ mstro_mr_key_get(struct fi_info* fi, struct fid_mr* mr, *mkey = mr_key; { - uint64_t k = 0; - memcpy(&k, mr_key, keysize); /* just for printing */ + /* copy up to first 8 bytes for printing */ + uint8_t k[8]; + for(size_t i=0; i<8; i++) { + if(keysize>i) { + k[i] = mr_key[i]; + } else { + k[i] = 0; + } + } - DEBUG("Component info RDMA block registered%s: local %p, MR %p, addr %" PRIx64 ", desc %p, keysize %zu (key uint64 start: 0x%" PRIx64 ")\n", - fi->domain_attr->mr_mode & FI_MR_RAW ? " (raw keys)" : "", - maddr, mr, addr, fi_mr_desc(mr), keysize, k); + DEBUG("Component info RDMA block registered%s: local %p, MR %p, addr %" PRIx64 ", desc %p, keysize %zu (key start: %" + PRIx8 " %" PRIx8 " %" PRIx8 " %" PRIx8 " %" PRIx8 " %" PRIx8 " %" PRIx8 " %" PRIx8 ")\n", + fi->domain_attr->mr_mode & FI_MR_RAW ? " (raw keys)" : "", + maddr, mr, addr, fi_mr_desc(mr), keysize, + k[0], k[1], k[2], k[3], k[4], k[5], k[6], k[7]); } return MSTRO_OK; @@ -904,22 +915,32 @@ mstro_ep_build_from_ofi(struct mstro_endpoint *dst, retstat = MSTRO_FAIL; goto BAILOUT_FAIL; } - if(fi->addr_format==FI_ADDR_GNI) { - /* put cookie into - * fi->domain_attr->auth_key = (void *) &auth_key; - * fi->domain_attr->auth_key_size = sizeof(auth_key); - */ - /* we only support one cookie per workflow currently. It is - * allocated early on in ofi_init (on pool manager) or obtained - * from PM_INFO (on clients) */ - assert(g_drc_info!=NULL); + switch(fi->addr_format) { + case FI_ADDR_GNI: { + /* put cookie into + * fi->domain_attr->auth_key = (void *) &auth_key; + * fi->domain_attr->auth_key_size = sizeof(auth_key); + */ + /* we only support one cookie per workflow currently. It is + * allocated early on in ofi_init (on pool manager) or obtained + * from PM_INFO (on clients) */ + assert(g_drc_info!=NULL); - stat = mstro_drc_insert_ofi(fi, g_drc_info); - if(stat!=MSTRO_OK) { - ERR("Failed to insert DRC credential into fabric info\n"); - retstat=MSTRO_FAIL; - goto BAILOUT_FAIL; + stat = mstro_drc_insert_ofi(fi, g_drc_info); + if(stat!=MSTRO_OK) { + ERR("Failed to insert DRC credential into fabric info\n"); + retstat=MSTRO_FAIL; + goto BAILOUT_FAIL; + } + break; } + case FI_ADDR_CXI: + WARN("Missing VNI support\n"); + /* fallthrough */ + default: + DEBUG("No auth_key for address format %d, setting to NULL\n", fi->addr_format); + fi->domain_attr->auth_key = NULL; + break; } /* create domain */ @@ -1078,6 +1099,10 @@ mstro_ep_build_from_ofi(struct mstro_endpoint *dst, struct fid_mr *mr; uint64_t requested_key = fi->domain_attr->mr_mode & FI_MR_PROV_KEY ? 0 : mstro_memory_new_key(); + if(requested_key & ((uint64_t) 1) << (8*fi->domain_attr->mr_key_size)) { + ERR("memory registration key out of bound for provider's domain attribute: requested %" PRIx64", domain keylen %" PRIu64 " bytes\n", + requested_key, fi->domain_attr->mr_key_size); + } stat = fi_mr_reg(domain, &g_component_descriptor, sizeof(g_component_descriptor), FI_REMOTE_READ, 0, requested_key, 0, &mr, NULL); diff --git a/protocols/maestro-endpoints.c b/protocols/maestro-endpoints.c index 6ecdbfb4e2f69f974d52a9affbe12b4112e37316..2b041699166a7e322ade2a92190d1e89ed03814d 100644 --- a/protocols/maestro-endpoints.c +++ b/protocols/maestro-endpoints.c @@ -469,7 +469,7 @@ mstro_ep__addr_describe(const Mstro__Endpoint *ep, char *buf, size_t buflen) }; union cxi_addr_ tmp = { .raw = ep->ofiaddr->cxi->raw }; - num_written = snprintf(buf,unabbrev_len,"%d:%d (%d)", tmp.nic, tmp.nic, tmp.valid); + num_written = snprintf(buf,unabbrev_len,"%d:%d (%d)", tmp.nic, tmp.pid, tmp.valid); break; } diff --git a/transport/rdma.c b/transport/rdma.c index 08cf8fa763718fae9d15eb3d6a3563e1db18d1f5..1ee32e0b8c7608a00299113d9e509011d27d3eca 100644 --- a/transport/rdma.c +++ b/transport/rdma.c @@ -244,6 +244,31 @@ FRESH_REGISTRATION: goto BAILOUT_UNLOCK; } regentry->mr_reg = mr; + + if((e->ep->fi->domain_attr->mr_mode & FI_MR_ENDPOINT)) { + /* bind and enable mreg */ + err = fi_mr_bind(mr, (struct fid *)regentry->key.ep, 0); + if(err<0) { + ERR("Failed to bind transport mreg to endpoint: %d (%s)\n", + err, fi_strerror(-err)); + err = fi_close((struct fid *)mr); + if(err<0) { + ERR("Failed to close transport mreg: %d (%s)\n", err, strerror(-err)); + } + status = mstro_memunlock(dl.data, dl.len); + status=MSTRO_FAIL; + goto BAILOUT_UNLOCK; + } + + err = fi_mr_enable(mr); + if(err<0) { + ERR("Failed to enable transport mreg on endpoint: %d (%s)\n", + err, fi_strerror(-err)); + status=MSTRO_FAIL; + goto BAILOUT_UNLOCK; + } + } + WITH_CDO_ID_STR( idstr, &(regentry->key.id), DEBUG("Adding CDO %s (id %s) to mreg table\n",