Skip to content

Commit

Permalink
Merge pull request #4607 from ROCmSoftwarePlatform/topic/sourav/add-p…
Browse files Browse the repository at this point in the history
…erftest-src-dst

TOOLS/PERF: Enable specifying different memory types for sender and receiver
  • Loading branch information
brminich authored Jan 9, 2020
2 parents 230daff + 72e53c4 commit ff99594
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 50 deletions.
3 changes: 2 additions & 1 deletion src/tools/perf/api/libperf.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ typedef struct ucx_perf_params {
unsigned thread_count; /* Number of threads in the test program */
ucs_async_mode_t async_mode; /* how async progress and locking is done */
ucx_perf_wait_mode_t wait_mode; /* How to wait */
ucs_memory_type_t mem_type; /* memory type */
ucs_memory_type_t send_mem_type; /* Send memory type */
ucs_memory_type_t recv_mem_type; /* Recv memory type */
unsigned flags; /* See ucx_perf_test_flags. */

size_t *msg_size_list; /* Test message sizes list. The size
Expand Down
47 changes: 36 additions & 11 deletions src/tools/perf/lib/libperf.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,17 @@ static void ucx_perf_test_prepare_new_run(ucx_perf_context_t *perf,
static void ucx_perf_test_init(ucx_perf_context_t *perf,
ucx_perf_params_t *params)
{
perf->params = *params;
perf->offset = 0;
perf->allocator = ucx_perf_mem_type_allocators[params->mem_type];
unsigned group_index;

perf->params = *params;
perf->offset = 0;
group_index = rte_call(perf, group_index);

if (0 == group_index) {
perf->allocator = ucx_perf_mem_type_allocators[params->send_mem_type];
} else {
perf->allocator = ucx_perf_mem_type_allocators[params->recv_mem_type];
}

ucx_perf_test_prepare_new_run(perf, params);
}
Expand Down Expand Up @@ -457,6 +465,20 @@ static inline size_t __get_max_size(uct_perf_data_layout_t layout, size_t short_
0;
}

static ucs_status_t uct_perf_test_check_md_support(ucx_perf_params_t *params,
ucs_memory_type_t mem_type,
uct_md_attr_t *md_attr)
{
if (!(md_attr->cap.access_mem_type == mem_type) &&
!(md_attr->cap.reg_mem_types & UCS_BIT(mem_type))) {
ucs_error("Unsupported memory type %s by %s/%s",
ucs_memory_type_names[mem_type],
params->uct.tl_name, params->uct.dev_name);
return UCS_ERR_INVALID_PARAM;
}
return UCS_OK;
}

static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
uct_iface_h iface, uct_md_h md)
{
Expand Down Expand Up @@ -648,12 +670,14 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
}
}

if (!(md_attr.cap.access_mem_type == params->mem_type) &&
!(md_attr.cap.reg_mem_types & UCS_BIT(params->mem_type))) {
ucs_error("Unsupported memory type %s by %s/%s",
ucs_memory_type_names[params->mem_type],
params->uct.tl_name, params->uct.dev_name);
return UCS_ERR_INVALID_PARAM;
status = uct_perf_test_check_md_support(params, params->send_mem_type, &md_attr);
if (status != UCS_OK) {
return status;
}

status = uct_perf_test_check_md_support(params, params->recv_mem_type, &md_attr);
if (status != UCS_OK) {
return status;
}

return UCS_OK;
Expand Down Expand Up @@ -1573,8 +1597,9 @@ ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result)
ucx_perf_test_init(perf, params);

if (perf->allocator == NULL) {
ucs_error("Unsupported memory type %s",
ucs_memory_type_names[params->mem_type]);
ucs_error("Unsupported memory types %s<->%s",
ucs_memory_type_names[params->send_mem_type],
ucs_memory_type_names[params->recv_mem_type]);
status = UCS_ERR_UNSUPPORTED;
goto out_free;
}
Expand Down
1 change: 0 additions & 1 deletion src/tools/perf/lib/libperf_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ size_t ucx_perf_get_message_size(const ucx_perf_params_t *params)
return length;
}


END_C_DECLS

#endif
96 changes: 59 additions & 37 deletions src/tools/perf/perftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ static void print_header(struct perftest_context *ctx)
printf("| API: %-60s |\n", test_api_str);
printf("| Test: %-60s |\n", test->desc);
printf("| Data layout: %-60s |\n", test_data_str);
printf("| Send memory: %-60s |\n", ucs_memory_type_names[ctx->params.send_mem_type]);
printf("| Recv memory: %-60s |\n", ucs_memory_type_names[ctx->params.recv_mem_type]);
printf("| Message size: %-60zu |\n", ucx_perf_get_message_size(&ctx->params));
}
}
Expand Down Expand Up @@ -330,6 +332,18 @@ static void print_test_name(struct perftest_context *ctx)
}
}

static void print_memory_type_usage(void)
{
ucs_memory_type_t it;
for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) {
if (ucx_perf_mem_type_allocators[it] != NULL) {
printf(" %s - %s\n",
ucs_memory_type_names[it],
ucs_memory_type_descs[it]);
}
}
}

static void usage(const struct perftest_context *ctx, const char *program)
{
static const char* api_names[] = {
Expand Down Expand Up @@ -365,20 +379,9 @@ static void usage(const struct perftest_context *ctx, const char *program)
printf(" -s <size> list of scatter-gather sizes for single message (%zu)\n",
ctx->params.msg_size_list[0]);
printf(" for example: \"-s 16,48,8192,8192,14\"\n");
printf(" -m <mem type> memory type of messages\n");
printf(" host - system memory(default)\n");
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL) {
printf(" cuda - NVIDIA GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL) {
printf(" cuda-managed - NVIDIA GPU managed/unified memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL) {
printf(" rocm - AMD/ROCm GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL) {
printf(" rocm-managed - AMD/ROCm GPU managed memory\n");
}
printf(" -m <send mem type>[,<recv mem type>]\n");
printf(" memory type of message for sender and receiver (host)\n");
print_memory_type_usage();
printf(" -n <iters> number of iterations to run (%ld)\n", ctx->params.max_iter);
printf(" -w <iters> number of warm-up iterations (%zu)\n",
ctx->params.warmup_iter);
Expand Down Expand Up @@ -460,6 +463,41 @@ static ucs_status_t parse_ucp_datatype_params(const char *optarg,
return UCS_OK;
}

static ucs_status_t parse_mem_type(const char *optarg,
ucs_memory_type_t *mem_type)
{
ucs_memory_type_t it;
for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) {
if(!strcmp(optarg, ucs_memory_type_names[it]) &&
(ucx_perf_mem_type_allocators[it] != NULL)) {
*mem_type = it;
return UCS_OK;
}
}
ucs_error("Unsupported memory type: \"%s\"", optarg);
return UCS_ERR_INVALID_PARAM;
}

static ucs_status_t parse_mem_type_params(const char *optarg,
ucs_memory_type_t *send_mem_type,
ucs_memory_type_t *recv_mem_type)
{
const char *delim = ",";
char *token = strtok((char*)optarg, delim);

if (UCS_OK != parse_mem_type(token, send_mem_type)) {
return UCS_ERR_INVALID_PARAM;
}

token = strtok(NULL, delim);
if (NULL == token) {
*recv_mem_type = *send_mem_type;
return UCS_OK;
} else {
return parse_mem_type(token, recv_mem_type);
}
}

static ucs_status_t parse_message_sizes_params(const char *optarg,
ucx_perf_params_t *params)
{
Expand Down Expand Up @@ -523,7 +561,8 @@ static ucs_status_t init_test_params(ucx_perf_params_t *params)
params->flags = UCX_PERF_TEST_FLAG_VERBOSE;
params->uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW;
params->uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT;
params->mem_type = UCS_MEMORY_TYPE_HOST;
params->send_mem_type = UCS_MEMORY_TYPE_HOST;
params->recv_mem_type = UCS_MEMORY_TYPE_HOST;
params->msg_size_cnt = 1;
params->iov_stride = 0;
params->ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG;
Expand Down Expand Up @@ -668,29 +707,12 @@ static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const
}
return UCS_ERR_INVALID_PARAM;
case 'm':
if (!strcmp(optarg, "host")) {
params->mem_type = UCS_MEMORY_TYPE_HOST;
return UCS_OK;
} else if (!strcmp(optarg, "cuda") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA;
return UCS_OK;
} else if (!strcmp(optarg, "cuda-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED;
return UCS_OK;
} else if (!strcmp(optarg, "rocm") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM;
return UCS_OK;
} else if (!strcmp(optarg, "rocm-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
return UCS_OK;
if (UCS_OK != parse_mem_type_params(optarg,
&params->send_mem_type,
&params->recv_mem_type)) {
return UCS_ERR_INVALID_PARAM;
}

ucs_error("Unsupported memory type: \"%s\"", optarg);
return UCS_ERR_INVALID_PARAM;
return UCS_OK;
default:
return UCS_ERR_INVALID_PARAM;
}
Expand Down
9 changes: 9 additions & 0 deletions src/ucs/memory/memory_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@ const char *ucs_memory_type_names[] = {
[UCS_MEMORY_TYPE_LAST] = "unknown"
};

const char *ucs_memory_type_descs[] = {
[UCS_MEMORY_TYPE_HOST] = "System memory",
[UCS_MEMORY_TYPE_CUDA] = "NVIDIA GPU memory" ,
[UCS_MEMORY_TYPE_CUDA_MANAGED] = "NVIDIA GPU managed/unified memory",
[UCS_MEMORY_TYPE_ROCM] = "AMD/ROCm GPU memory",
[UCS_MEMORY_TYPE_ROCM_MANAGED] = "AMD/ROCm GPU managed memory",
[UCS_MEMORY_TYPE_LAST] = "unknown"
};

5 changes: 5 additions & 0 deletions src/ucs/memory/memory_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ typedef enum ucs_memory_type {
*/
extern const char *ucs_memory_type_names[];

/**
* Array of string descriptions for each memory type
*/
extern const char *ucs_memory_type_descs[];


END_C_DECLS

Expand Down

0 comments on commit ff99594

Please sign in to comment.