diff --git a/src/tools/perf/api/libperf.h b/src/tools/perf/api/libperf.h index f494983e989..984df703cd3 100644 --- a/src/tools/perf/api/libperf.h +++ b/src/tools/perf/api/libperf.h @@ -168,7 +168,8 @@ typedef struct ucx_perf_params { unsigned thread_count; /* Number of threads in the test program */ ucs_async_mode_t async_mode; /* how async progress and locking is done */ ucx_perf_wait_mode_t wait_mode; /* How to wait */ - ucs_memory_type_t mem_type; /* memory type */ + ucs_memory_type_t send_mem_type; /* Send memory type */ + ucs_memory_type_t recv_mem_type; /* Recv memory type */ unsigned flags; /* See ucx_perf_test_flags. */ size_t *msg_size_list; /* Test message sizes list. The size diff --git a/src/tools/perf/lib/libperf.c b/src/tools/perf/lib/libperf.c index 0e4168cf0eb..4cbccc4e783 100644 --- a/src/tools/perf/lib/libperf.c +++ b/src/tools/perf/lib/libperf.c @@ -308,9 +308,17 @@ static void ucx_perf_test_prepare_new_run(ucx_perf_context_t *perf, static void ucx_perf_test_init(ucx_perf_context_t *perf, ucx_perf_params_t *params) { - perf->params = *params; - perf->offset = 0; - perf->allocator = ucx_perf_mem_type_allocators[params->mem_type]; + unsigned group_index; + + perf->params = *params; + perf->offset = 0; + group_index = rte_call(perf, group_index); + + if (0 == group_index) { + perf->allocator = ucx_perf_mem_type_allocators[params->send_mem_type]; + } else { + perf->allocator = ucx_perf_mem_type_allocators[params->recv_mem_type]; + } ucx_perf_test_prepare_new_run(perf, params); } @@ -457,6 +465,20 @@ static inline size_t __get_max_size(uct_perf_data_layout_t layout, size_t short_ 0; } +static ucs_status_t uct_perf_test_check_md_support(ucx_perf_params_t *params, + ucs_memory_type_t mem_type, + uct_md_attr_t *md_attr) +{ + if (!(md_attr->cap.access_mem_type == mem_type) && + !(md_attr->cap.reg_mem_types & UCS_BIT(mem_type))) { + ucs_error("Unsupported memory type %s by %s/%s", + ucs_memory_type_names[mem_type], + params->uct.tl_name, params->uct.dev_name); + return UCS_ERR_INVALID_PARAM; + } + return UCS_OK; +} + static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params, uct_iface_h iface, uct_md_h md) { @@ -648,12 +670,14 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params, } } - if (!(md_attr.cap.access_mem_type == params->mem_type) && - !(md_attr.cap.reg_mem_types & UCS_BIT(params->mem_type))) { - ucs_error("Unsupported memory type %s by %s/%s", - ucs_memory_type_names[params->mem_type], - params->uct.tl_name, params->uct.dev_name); - return UCS_ERR_INVALID_PARAM; + status = uct_perf_test_check_md_support(params, params->send_mem_type, &md_attr); + if (status != UCS_OK) { + return status; + } + + status = uct_perf_test_check_md_support(params, params->recv_mem_type, &md_attr); + if (status != UCS_OK) { + return status; } return UCS_OK; @@ -1573,8 +1597,9 @@ ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result) ucx_perf_test_init(perf, params); if (perf->allocator == NULL) { - ucs_error("Unsupported memory type %s", - ucs_memory_type_names[params->mem_type]); + ucs_error("Unsupported memory types %s<->%s", + ucs_memory_type_names[params->send_mem_type], + ucs_memory_type_names[params->recv_mem_type]); status = UCS_ERR_UNSUPPORTED; goto out_free; } diff --git a/src/tools/perf/lib/libperf_int.h b/src/tools/perf/lib/libperf_int.h index 515212c1c2b..a361371480e 100644 --- a/src/tools/perf/lib/libperf_int.h +++ b/src/tools/perf/lib/libperf_int.h @@ -221,7 +221,6 @@ size_t ucx_perf_get_message_size(const ucx_perf_params_t *params) return length; } - END_C_DECLS #endif diff --git a/src/tools/perf/perftest.c b/src/tools/perf/perftest.c index 7b52e022410..2f1a9eedc56 100644 --- a/src/tools/perf/perftest.c +++ b/src/tools/perf/perftest.c @@ -282,6 +282,8 @@ static void print_header(struct perftest_context *ctx) printf("| API: %-60s |\n", test_api_str); printf("| Test: %-60s |\n", test->desc); printf("| Data layout: %-60s |\n", test_data_str); + printf("| Send memory: %-60s |\n", ucs_memory_type_names[ctx->params.send_mem_type]); + printf("| Recv memory: %-60s |\n", ucs_memory_type_names[ctx->params.recv_mem_type]); printf("| Message size: %-60zu |\n", ucx_perf_get_message_size(&ctx->params)); } } @@ -330,6 +332,18 @@ static void print_test_name(struct perftest_context *ctx) } } +static void print_memory_type_usage(void) +{ + ucs_memory_type_t it; + for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) { + if (ucx_perf_mem_type_allocators[it] != NULL) { + printf(" %s - %s\n", + ucs_memory_type_names[it], + ucs_memory_type_descs[it]); + } + } +} + static void usage(const struct perftest_context *ctx, const char *program) { static const char* api_names[] = { @@ -365,20 +379,9 @@ static void usage(const struct perftest_context *ctx, const char *program) printf(" -s list of scatter-gather sizes for single message (%zu)\n", ctx->params.msg_size_list[0]); printf(" for example: \"-s 16,48,8192,8192,14\"\n"); - printf(" -m memory type of messages\n"); - printf(" host - system memory(default)\n"); - if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL) { - printf(" cuda - NVIDIA GPU memory\n"); - } - if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL) { - printf(" cuda-managed - NVIDIA GPU managed/unified memory\n"); - } - if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL) { - printf(" rocm - AMD/ROCm GPU memory\n"); - } - if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL) { - printf(" rocm-managed - AMD/ROCm GPU managed memory\n"); - } + printf(" -m [,]\n"); + printf(" memory type of message for sender and receiver (host)\n"); + print_memory_type_usage(); printf(" -n number of iterations to run (%ld)\n", ctx->params.max_iter); printf(" -w number of warm-up iterations (%zu)\n", ctx->params.warmup_iter); @@ -460,6 +463,41 @@ static ucs_status_t parse_ucp_datatype_params(const char *optarg, return UCS_OK; } +static ucs_status_t parse_mem_type(const char *optarg, + ucs_memory_type_t *mem_type) +{ + ucs_memory_type_t it; + for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) { + if(!strcmp(optarg, ucs_memory_type_names[it]) && + (ucx_perf_mem_type_allocators[it] != NULL)) { + *mem_type = it; + return UCS_OK; + } + } + ucs_error("Unsupported memory type: \"%s\"", optarg); + return UCS_ERR_INVALID_PARAM; +} + +static ucs_status_t parse_mem_type_params(const char *optarg, + ucs_memory_type_t *send_mem_type, + ucs_memory_type_t *recv_mem_type) +{ + const char *delim = ","; + char *token = strtok((char*)optarg, delim); + + if (UCS_OK != parse_mem_type(token, send_mem_type)) { + return UCS_ERR_INVALID_PARAM; + } + + token = strtok(NULL, delim); + if (NULL == token) { + *recv_mem_type = *send_mem_type; + return UCS_OK; + } else { + return parse_mem_type(token, recv_mem_type); + } +} + static ucs_status_t parse_message_sizes_params(const char *optarg, ucx_perf_params_t *params) { @@ -523,7 +561,8 @@ static ucs_status_t init_test_params(ucx_perf_params_t *params) params->flags = UCX_PERF_TEST_FLAG_VERBOSE; params->uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW; params->uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT; - params->mem_type = UCS_MEMORY_TYPE_HOST; + params->send_mem_type = UCS_MEMORY_TYPE_HOST; + params->recv_mem_type = UCS_MEMORY_TYPE_HOST; params->msg_size_cnt = 1; params->iov_stride = 0; params->ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG; @@ -668,29 +707,12 @@ static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const } return UCS_ERR_INVALID_PARAM; case 'm': - if (!strcmp(optarg, "host")) { - params->mem_type = UCS_MEMORY_TYPE_HOST; - return UCS_OK; - } else if (!strcmp(optarg, "cuda") && - (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL)) { - params->mem_type = UCS_MEMORY_TYPE_CUDA; - return UCS_OK; - } else if (!strcmp(optarg, "cuda-managed") && - (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL)) { - params->mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED; - return UCS_OK; - } else if (!strcmp(optarg, "rocm") && - (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL)) { - params->mem_type = UCS_MEMORY_TYPE_ROCM; - return UCS_OK; - } else if (!strcmp(optarg, "rocm-managed") && - (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL)) { - params->mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED; - return UCS_OK; + if (UCS_OK != parse_mem_type_params(optarg, + ¶ms->send_mem_type, + ¶ms->recv_mem_type)) { + return UCS_ERR_INVALID_PARAM; } - - ucs_error("Unsupported memory type: \"%s\"", optarg); - return UCS_ERR_INVALID_PARAM; + return UCS_OK; default: return UCS_ERR_INVALID_PARAM; } diff --git a/src/ucs/memory/memory_type.c b/src/ucs/memory/memory_type.c index 0ba6602270a..cbd98f23927 100644 --- a/src/ucs/memory/memory_type.c +++ b/src/ucs/memory/memory_type.c @@ -18,3 +18,12 @@ const char *ucs_memory_type_names[] = { [UCS_MEMORY_TYPE_LAST] = "unknown" }; +const char *ucs_memory_type_descs[] = { + [UCS_MEMORY_TYPE_HOST] = "System memory", + [UCS_MEMORY_TYPE_CUDA] = "NVIDIA GPU memory" , + [UCS_MEMORY_TYPE_CUDA_MANAGED] = "NVIDIA GPU managed/unified memory", + [UCS_MEMORY_TYPE_ROCM] = "AMD/ROCm GPU memory", + [UCS_MEMORY_TYPE_ROCM_MANAGED] = "AMD/ROCm GPU managed memory", + [UCS_MEMORY_TYPE_LAST] = "unknown" +}; + diff --git a/src/ucs/memory/memory_type.h b/src/ucs/memory/memory_type.h index 9c54326278c..dec2a2e59b0 100644 --- a/src/ucs/memory/memory_type.h +++ b/src/ucs/memory/memory_type.h @@ -38,6 +38,11 @@ typedef enum ucs_memory_type { */ extern const char *ucs_memory_type_names[]; +/** + * Array of string descriptions for each memory type + */ +extern const char *ucs_memory_type_descs[]; + END_C_DECLS