Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TOOLS/PERF: Enable specifying different memory types for sender and receiver #4607

Merged
merged 5 commits into from
Jan 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/tools/perf/api/libperf.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ typedef struct ucx_perf_params {
unsigned thread_count; /* Number of threads in the test program */
ucs_async_mode_t async_mode; /* how async progress and locking is done */
ucx_perf_wait_mode_t wait_mode; /* How to wait */
ucs_memory_type_t mem_type; /* memory type */
ucs_memory_type_t send_mem_type; /* Send memory type */
ucs_memory_type_t recv_mem_type; /* Recv memory type */
unsigned flags; /* See ucx_perf_test_flags. */

size_t *msg_size_list; /* Test message sizes list. The size
Expand Down
47 changes: 36 additions & 11 deletions src/tools/perf/lib/libperf.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,17 @@ static void ucx_perf_test_prepare_new_run(ucx_perf_context_t *perf,
static void ucx_perf_test_init(ucx_perf_context_t *perf,
ucx_perf_params_t *params)
{
perf->params = *params;
perf->offset = 0;
perf->allocator = ucx_perf_mem_type_allocators[params->mem_type];
unsigned group_index;

perf->params = *params;
perf->offset = 0;
group_index = rte_call(perf, group_index);

if (0 == group_index) {
perf->allocator = ucx_perf_mem_type_allocators[params->send_mem_type];
} else {
perf->allocator = ucx_perf_mem_type_allocators[params->recv_mem_type];
}

ucx_perf_test_prepare_new_run(perf, params);
}
Expand Down Expand Up @@ -457,6 +465,20 @@ static inline size_t __get_max_size(uct_perf_data_layout_t layout, size_t short_
0;
}

static ucs_status_t uct_perf_test_check_md_support(ucx_perf_params_t *params,
ucs_memory_type_t mem_type,
uct_md_attr_t *md_attr)
{
if (!(md_attr->cap.access_mem_type == mem_type) &&
!(md_attr->cap.reg_mem_types & UCS_BIT(mem_type))) {
ucs_error("Unsupported memory type %s by %s/%s",
ucs_memory_type_names[mem_type],
params->uct.tl_name, params->uct.dev_name);
return UCS_ERR_INVALID_PARAM;
}
return UCS_OK;
}

static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
uct_iface_h iface, uct_md_h md)
{
Expand Down Expand Up @@ -648,12 +670,14 @@ static ucs_status_t uct_perf_test_check_capabilities(ucx_perf_params_t *params,
}
}

if (!(md_attr.cap.access_mem_type == params->mem_type) &&
!(md_attr.cap.reg_mem_types & UCS_BIT(params->mem_type))) {
ucs_error("Unsupported memory type %s by %s/%s",
ucs_memory_type_names[params->mem_type],
params->uct.tl_name, params->uct.dev_name);
return UCS_ERR_INVALID_PARAM;
status = uct_perf_test_check_md_support(params, params->send_mem_type, &md_attr);
if (status != UCS_OK) {
return status;
}

status = uct_perf_test_check_md_support(params, params->recv_mem_type, &md_attr);
if (status != UCS_OK) {
return status;
}

return UCS_OK;
Expand Down Expand Up @@ -1573,8 +1597,9 @@ ucs_status_t ucx_perf_run(ucx_perf_params_t *params, ucx_perf_result_t *result)
ucx_perf_test_init(perf, params);

if (perf->allocator == NULL) {
ucs_error("Unsupported memory type %s",
ucs_memory_type_names[params->mem_type]);
ucs_error("Unsupported memory types %s<->%s",
ucs_memory_type_names[params->send_mem_type],
ucs_memory_type_names[params->recv_mem_type]);
status = UCS_ERR_UNSUPPORTED;
goto out_free;
}
Expand Down
1 change: 0 additions & 1 deletion src/tools/perf/lib/libperf_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ size_t ucx_perf_get_message_size(const ucx_perf_params_t *params)
return length;
}


END_C_DECLS

#endif
96 changes: 59 additions & 37 deletions src/tools/perf/perftest.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ static void print_header(struct perftest_context *ctx)
printf("| API: %-60s |\n", test_api_str);
printf("| Test: %-60s |\n", test->desc);
printf("| Data layout: %-60s |\n", test_data_str);
printf("| Send memory: %-60s |\n", ucs_memory_type_names[ctx->params.send_mem_type]);
printf("| Recv memory: %-60s |\n", ucs_memory_type_names[ctx->params.recv_mem_type]);
printf("| Message size: %-60zu |\n", ucx_perf_get_message_size(&ctx->params));
}
}
Expand Down Expand Up @@ -330,6 +332,18 @@ static void print_test_name(struct perftest_context *ctx)
}
}

static void print_memory_type_usage(void)
{
ucs_memory_type_t it;
for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) {
if (ucx_perf_mem_type_allocators[it] != NULL) {
printf(" %s - %s\n",
ucs_memory_type_names[it],
ucs_memory_type_descs[it]);
}
}
}

static void usage(const struct perftest_context *ctx, const char *program)
{
static const char* api_names[] = {
Expand Down Expand Up @@ -365,20 +379,9 @@ static void usage(const struct perftest_context *ctx, const char *program)
printf(" -s <size> list of scatter-gather sizes for single message (%zu)\n",
ctx->params.msg_size_list[0]);
printf(" for example: \"-s 16,48,8192,8192,14\"\n");
printf(" -m <mem type> memory type of messages\n");
printf(" host - system memory(default)\n");
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL) {
printf(" cuda - NVIDIA GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL) {
printf(" cuda-managed - NVIDIA GPU managed/unified memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL) {
printf(" rocm - AMD/ROCm GPU memory\n");
}
if (ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL) {
printf(" rocm-managed - AMD/ROCm GPU managed memory\n");
}
printf(" -m <send mem type>[,<recv mem type>]\n");
printf(" memory type of message for sender and receiver (host)\n");
dmitrygx marked this conversation as resolved.
Show resolved Hide resolved
print_memory_type_usage();
printf(" -n <iters> number of iterations to run (%ld)\n", ctx->params.max_iter);
printf(" -w <iters> number of warm-up iterations (%zu)\n",
ctx->params.warmup_iter);
Expand Down Expand Up @@ -460,6 +463,41 @@ static ucs_status_t parse_ucp_datatype_params(const char *optarg,
return UCS_OK;
}

static ucs_status_t parse_mem_type(const char *optarg,
ucs_memory_type_t *mem_type)
{
ucs_memory_type_t it;
for (it = UCS_MEMORY_TYPE_HOST; it < UCS_MEMORY_TYPE_LAST; it++) {
if(!strcmp(optarg, ucs_memory_type_names[it]) &&
(ucx_perf_mem_type_allocators[it] != NULL)) {
*mem_type = it;
return UCS_OK;
}
}
ucs_error("Unsupported memory type: \"%s\"", optarg);
return UCS_ERR_INVALID_PARAM;
}

static ucs_status_t parse_mem_type_params(const char *optarg,
ucs_memory_type_t *send_mem_type,
ucs_memory_type_t *recv_mem_type)
{
const char *delim = ",";
char *token = strtok((char*)optarg, delim);

if (UCS_OK != parse_mem_type(token, send_mem_type)) {
return UCS_ERR_INVALID_PARAM;
}

token = strtok(NULL, delim);
if (NULL == token) {
*recv_mem_type = *send_mem_type;
return UCS_OK;
} else {
return parse_mem_type(token, recv_mem_type);
}
}

static ucs_status_t parse_message_sizes_params(const char *optarg,
ucx_perf_params_t *params)
{
Expand Down Expand Up @@ -523,7 +561,8 @@ static ucs_status_t init_test_params(ucx_perf_params_t *params)
params->flags = UCX_PERF_TEST_FLAG_VERBOSE;
params->uct.fc_window = UCT_PERF_TEST_MAX_FC_WINDOW;
params->uct.data_layout = UCT_PERF_DATA_LAYOUT_SHORT;
params->mem_type = UCS_MEMORY_TYPE_HOST;
params->send_mem_type = UCS_MEMORY_TYPE_HOST;
params->recv_mem_type = UCS_MEMORY_TYPE_HOST;
params->msg_size_cnt = 1;
params->iov_stride = 0;
params->ucp.send_datatype = UCP_PERF_DATATYPE_CONTIG;
Expand Down Expand Up @@ -668,29 +707,12 @@ static ucs_status_t parse_test_params(ucx_perf_params_t *params, char opt, const
}
return UCS_ERR_INVALID_PARAM;
case 'm':
if (!strcmp(optarg, "host")) {
params->mem_type = UCS_MEMORY_TYPE_HOST;
return UCS_OK;
} else if (!strcmp(optarg, "cuda") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA;
return UCS_OK;
} else if (!strcmp(optarg, "cuda-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_CUDA_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_CUDA_MANAGED;
return UCS_OK;
} else if (!strcmp(optarg, "rocm") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM;
return UCS_OK;
} else if (!strcmp(optarg, "rocm-managed") &&
(ucx_perf_mem_type_allocators[UCS_MEMORY_TYPE_ROCM_MANAGED] != NULL)) {
params->mem_type = UCS_MEMORY_TYPE_ROCM_MANAGED;
return UCS_OK;
if (UCS_OK != parse_mem_type_params(optarg,
&params->send_mem_type,
&params->recv_mem_type)) {
return UCS_ERR_INVALID_PARAM;
}

ucs_error("Unsupported memory type: \"%s\"", optarg);
return UCS_ERR_INVALID_PARAM;
return UCS_OK;
default:
return UCS_ERR_INVALID_PARAM;
}
Expand Down
9 changes: 9 additions & 0 deletions src/ucs/memory/memory_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@ const char *ucs_memory_type_names[] = {
[UCS_MEMORY_TYPE_LAST] = "unknown"
};

const char *ucs_memory_type_descs[] = {
[UCS_MEMORY_TYPE_HOST] = "System memory",
[UCS_MEMORY_TYPE_CUDA] = "NVIDIA GPU memory" ,
[UCS_MEMORY_TYPE_CUDA_MANAGED] = "NVIDIA GPU managed/unified memory",
[UCS_MEMORY_TYPE_ROCM] = "AMD/ROCm GPU memory",
[UCS_MEMORY_TYPE_ROCM_MANAGED] = "AMD/ROCm GPU managed memory",
[UCS_MEMORY_TYPE_LAST] = "unknown"
};

5 changes: 5 additions & 0 deletions src/ucs/memory/memory_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ typedef enum ucs_memory_type {
*/
extern const char *ucs_memory_type_names[];

/**
* Array of string descriptions for each memory type
*/
extern const char *ucs_memory_type_descs[];


END_C_DECLS

Expand Down