diff --git a/include/maestro/env.h b/include/maestro/env.h index 4a21202d2338dffd16f79651ae171146197b1790..542236799ce06a4f8626c6dfbb78409ceb57132e 100644 --- a/include/maestro/env.h +++ b/include/maestro/env.h @@ -239,6 +239,27 @@ **/ #define MSTRO_ENV_MIO_CONFIG "MSTRO_MIO_CONFIG" + +/** + ** @brief Flag to enable higher network security on Cray GNI interfaces + ** + ** By default, Cray GNI (Aries) networks allow only jobs of the same + ** job allocation to use the HSN between each other. Despite using + ** user-id based DRC credentials (which allows cross-talk for jobs on + ** different nodes if the user's UID matches), jobs running on the + ** same node of an allocation can not talk to each other unless we + ** use DRC_FLAGS_FLEX_CREDENTIAL. + ** + ** By default we do set DRC_FLAGS_FLEX_CREDENTIAL, as that allows + ** users to schedule jobs of the same workflow on the same or + ** different nodes without worrying about this. If you are sure you + ** will only run one job per compute node, consider enabling @ref + ** MSTRO_ENV_DRC_NON_FLEX to disable flex-credential usage. + ** + **/ +#define MSTRO_ENV_DRC_NON_FLEX "MSTRO_DRC_NON_FLEX" + + /**@} (end of group MSTRO_ENV) */ #endif diff --git a/maestro/drc.c b/maestro/drc.c index 8f95dca52b57b0f3cf8d67c51acdf85bdb13aabb..9eed800538d62d5043b1d8d8f2ccfa06c9b630e7 100644 --- a/maestro/drc.c +++ b/maestro/drc.c @@ -69,12 +69,19 @@ struct fi_gni_auth_key { }; #define DRC_SUCCESS 0 -#define DRC_FLAGS_TARGET_UID 0 -#define DRC_FLAGS_TARGET_UID 0 #define GNIX_AKT_RAW 4711 typedef void * drc_info_handle_t; +enum { + DRC_FLAGS_FLEX_CREDENTIAL = 1 << 0, /* acquire flag, flexible credential mode */ + DRC_FLAGS_PERSISTENT = 1 << 1, /* acquire flag, persistent credential */ + DRC_FLAGS_TARGET_WLM = 1 << 2, /* grant/revoke flag, value is WLM ID */ + DRC_FLAGS_TARGET_UID = 1 << 3, /* grant/revoke flag, value is UID */ + DRC_FLAGS_TARGET_GID = 1 << 4, /* grant/revoke flag, value is GID */ + DRC_MAX_FLAGS +}; + static inline int drc_acquire(uint32_t *credential, int flags) { @@ -149,7 +156,18 @@ mstro_drc_init(mstro_drc_info *result_p) if(*result_p) { int ret; drc_info_handle_t info; - ret = drc_acquire(&(*result_p)->drc_id, 0); + char *do_nonflex = getenv(MSTRO_ENV_DRC_NON_FLEX); + if(do_nonflex!=NULL && atoi(do_nonflex)!=0 + && do_nonflex[0]!='f' && do_nonflex[0]!='F' // fAlSe + && do_nonflex[0]!='d' && do_nonflex[0]!='D' // DiSabled + ) { + // if user requests it: use non-flex credentials + ret = drc_acquire(&(*result_p)->drc_id, 0); + } else { + // default: flex credentials, to allow multiple jobs on the same node ("DRC node insecure mode") + ret = drc_acquire(&(*result_p)->drc_id, DRC_FLAGS_FLEX_CREDENTIAL); + } + if(ret!=DRC_SUCCESS) { ERR("Failed to drc_acquire a new credential: %d\n", ret); stat=MSTRO_FAIL; goto BAILOUT_FREE;