Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

validate rma lane if rkey is not needed for mem type #2995

Merged
merged 6 commits into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ run_ucx_perftest_mpi() {
$MPIRUN -np 2 -x UCX_TLS=rc,cuda_copy,gdr_copy -x UCX_MEMTYPE_CACHE=y $AFFINITY $UCX_PERFTEST
$MPIRUN -np 2 -x UCX_TLS=rc,cuda_copy,gdr_copy -x UCX_MEMTYPE_CACHE=n $AFFINITY $UCX_PERFTEST
$MPIRUN -np 2 -x UCX_TLS=rc,cuda_copy $AFFINITY $UCX_PERFTEST
$MPIRUN -np 2 -x UCX_TLS=self,mm,cma,cuda_copy $AFFINITY $UCX_PERFTEST
$MPIRUN -np 2 $AFFINITY $UCX_PERFTEST
unset CUDA_VISIBLE_DEVICES
fi
Expand Down
7 changes: 5 additions & 2 deletions src/ucp/core/ucp_mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ typedef struct ucp_rkey {
ucp_rma_proto_t *rma_proto; /* Protocol for RMAs */
} cache;
ucp_md_map_t md_map; /* Which *remote* MDs have valid memory handles */
uct_memory_type_t mem_type;/* Memory type of remote key memory */
uct_rkey_bundle_t uct[0]; /* Remote key for every MD */
} ucp_rkey_t;

Expand Down Expand Up @@ -122,10 +123,12 @@ ucs_status_t ucp_mem_rereg_mds(ucp_context_h context, ucp_md_map_t reg_md_map,
size_t ucp_rkey_packed_size(ucp_context_h context, ucp_md_map_t md_map);

void ucp_rkey_packed_copy(ucp_context_h context, ucp_md_map_t md_map,
void *rkey_buffer, const void* uct_rkeys[]);
uct_memory_type_t mem_type, void *rkey_buffer,
const void* uct_rkeys[]);

ssize_t ucp_rkey_pack_uct(ucp_context_h context, ucp_md_map_t md_map,
const uct_mem_h *memh, void *rkey_buffer);
const uct_mem_h *memh, uct_memory_type_t mem_type,
void *rkey_buffer);

void ucp_rkey_dump_packed(const void *rkey_buffer, char *buffer, size_t max);

Expand Down
37 changes: 31 additions & 6 deletions src/ucp/core/ucp_rkey.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
#include <inttypes.h>


static ucp_md_map_t ucp_mem_dummy_buffer = 0;
static struct {
ucp_md_map_t md_map;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent on column

uint8_t mem_type;
} UCS_S_PACKED ucp_mem_dummy_buffer = {0, 0};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe use UCT_MEM_TYPE_xx constant instead of 0?



size_t ucp_rkey_packed_size(ucp_context_h context, ucp_md_map_t md_map)
Expand All @@ -22,6 +25,7 @@ size_t ucp_rkey_packed_size(ucp_context_h context, ucp_md_map_t md_map)
unsigned md_index;

size = sizeof(ucp_md_map_t);
size += sizeof(uint8_t);
ucs_for_each_bit (md_index, md_map) {
md_size = context->tl_mds[md_index].attr.rkey_packed_size;
ucs_assert_always(md_size <= UINT8_MAX);
Expand All @@ -31,7 +35,8 @@ size_t ucp_rkey_packed_size(ucp_context_h context, ucp_md_map_t md_map)
}

void ucp_rkey_packed_copy(ucp_context_h context, ucp_md_map_t md_map,
void *rkey_buffer, const void* uct_rkeys[])
uct_memory_type_t mem_type, void *rkey_buffer,
const void* uct_rkeys[])
{
void *p = rkey_buffer;
unsigned md_index;
Expand All @@ -40,6 +45,8 @@ void ucp_rkey_packed_copy(ucp_context_h context, ucp_md_map_t md_map,
*(ucp_md_map_t*)p = md_map;
p += sizeof(ucp_md_map_t);

*((uint8_t *)p++) = mem_type;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pls use uct_memory_type_t instead of uint8_t for casting and sizeof where relevant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@evgeny-leksikov uct_memory_type_t is defined as enum. wanted to save on rkey packed size with typecasting to uint8_t.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, then pls check if we have static assert for UCT_MD_MEM_TYPE_LAST <= 255 or similar

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added assert while packing rkey


ucs_for_each_bit(md_index, md_map) {
md_size = context->tl_mds[md_index].attr.rkey_packed_size;
ucs_assert_always(md_size <= UINT8_MAX);
Expand All @@ -51,7 +58,8 @@ void ucp_rkey_packed_copy(ucp_context_h context, ucp_md_map_t md_map,
}

ssize_t ucp_rkey_pack_uct(ucp_context_h context, ucp_md_map_t md_map,
const uct_mem_h *memh, void *rkey_buffer)
const uct_mem_h *memh, uct_memory_type_t mem_type,
void *rkey_buffer)
{
void *p = rkey_buffer;
ucs_status_t status = UCS_OK;
Expand All @@ -66,6 +74,10 @@ ssize_t ucp_rkey_pack_uct(ucp_context_h context, ucp_md_map_t md_map,
*(ucp_md_map_t*)p = md_map;
p += sizeof(ucp_md_map_t);

/* Write memory type */
UCS_STATIC_ASSERT(UCT_MD_MEM_TYPE_LAST <= 255);
*((uint8_t*)p++) = mem_type;

/* Write both size and rkey_buffer for each UCT rkey */
uct_memh_index = 0;
ucs_for_each_bit (md_index, md_map) {
Expand Down Expand Up @@ -122,7 +134,8 @@ ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh,

p = rkey_buffer;

packed_size = ucp_rkey_pack_uct(context, memh->md_map, memh->uct, p);
packed_size = ucp_rkey_pack_uct(context, memh->md_map, memh->uct,
memh->mem_type, p);
if (packed_size < 0) {
status = (ucs_status_t)packed_size;
goto err_destroy;
Expand Down Expand Up @@ -161,6 +174,7 @@ ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, const void *rkey_buffer,
unsigned md_count;
ucs_status_t status;
ucp_rkey_h rkey;
uct_memory_type_t mem_type;
uint8_t md_size;
const void *p;

Expand Down Expand Up @@ -193,7 +207,11 @@ ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, const void *rkey_buffer,
goto err;
}

/* Read memory type */
mem_type = *((uint8_t*)p++);

rkey->md_map = md_map;
rkey->mem_type = mem_type;

/* Unpack rkey of each UCT MD */
remote_md_index = 0; /* Index of remote MD */
Expand Down Expand Up @@ -259,6 +277,8 @@ void ucp_rkey_dump_packed(const void *rkey_buffer, char *buffer, size_t max)
md_map = *(ucp_md_map_t*)(rkey_buffer);
rkey_buffer += sizeof(ucp_md_map_t);

rkey_buffer += sizeof(uint8_t);

first = 1;
ucs_for_each_bit(md_index, md_map) {
md_size = *((uint8_t*)rkey_buffer);
Expand Down Expand Up @@ -337,6 +357,7 @@ static ucp_lane_index_t ucp_config_find_rma_lane(ucp_context_h context,
ucp_lane_index_t lane;
ucp_md_map_t dst_md_mask;
ucp_md_index_t md_index;
uct_md_attr_t *md_attr;
uint8_t rkey_index;
int prio;

Expand All @@ -349,16 +370,20 @@ static ucp_lane_index_t ucp_config_find_rma_lane(ucp_context_h context,
}

md_index = config->md_index[lane];
md_attr = &context->tl_mds[md_index].attr;

if ((md_index != UCP_NULL_RESOURCE) &&
(!(context->tl_mds[md_index].attr.cap.flags & UCT_MD_FLAG_NEED_RKEY)))
(!(md_attr->cap.flags & UCT_MD_FLAG_NEED_RKEY)) &&
(!rkey || (md_attr->cap.mem_type == mem_type &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

( ) around internal conditions
also - maybe use helper bool variable? it's hard to parse this..

md_attr->cap.mem_type == rkey->mem_type)))
{
/* Lane does not need rkey, can use the lane with invalid rkey */
*uct_rkey_p = UCT_INVALID_RKEY;
return lane;
}

if ((md_index != UCP_NULL_RESOURCE) &&
(!(context->tl_mds[md_index].attr.cap.reg_mem_types & UCS_BIT(mem_type)))) {
(!(md_attr->cap.reg_mem_types & UCS_BIT(mem_type)))) {
continue;
}

Expand Down
4 changes: 2 additions & 2 deletions src/ucp/tag/offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_tag_offload_unexp_rndv,
dummy_rts->address = remote_addr;
dummy_rts->size = length;

ucp_rkey_packed_copy(worker->context, UCS_BIT(md_index), dummy_rts + 1,
uct_rkeys);
ucp_rkey_packed_copy(worker->context, UCS_BIT(md_index),
UCT_MD_MEM_TYPE_HOST, dummy_rts + 1, uct_rkeys);

UCP_WORKER_STAT_TAG_OFFLOAD(worker, RX_UNEXP_RNDV);
ucp_rndv_process_rts(worker, dummy_rts, dummy_rts_size, 0);
Expand Down
4 changes: 4 additions & 0 deletions src/ucp/tag/rndv.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ size_t ucp_tag_rndv_rts_pack(void *dest, void *arg)
packed_rkey_size = ucp_rkey_pack_uct(worker->context,
sreq->send.state.dt.dt.contig.md_map,
sreq->send.state.dt.dt.contig.memh,
sreq->send.mem_type,
rndv_rts_hdr + 1);
if (packed_rkey_size < 0) {
ucs_fatal("failed to pack rendezvous remote key: %s",
Expand Down Expand Up @@ -95,6 +96,7 @@ static size_t ucp_tag_rndv_rtr_pack(void *dest, void *arg)
packed_rkey_size = ucp_rkey_pack_uct(rndv_req->send.ep->worker->context,
rreq->recv.state.dt.contig.md_map,
rreq->recv.state.dt.contig.memh,
rreq->recv.mem_type,
rndv_rtr_hdr + 1);
if (packed_rkey_size < 0) {
return packed_rkey_size;
Expand Down Expand Up @@ -870,10 +872,12 @@ static ucs_status_t ucp_rndv_pipeline(ucp_request_t *sreq, ucp_rndv_rtr_hdr_t *r
frag_req->send.ep = pipeline_ep;
frag_req->send.buffer = mdesc + 1;
frag_req->send.datatype = ucp_dt_make_contig(1);
frag_req->send.mem_type = sreq->send.mem_type;
frag_req->send.state.dt.dt.contig.memh[0]= ucp_memh2uct(mdesc->memh, md_index);
frag_req->send.state.dt.dt.contig.md_map = UCS_BIT(md_index);
frag_req->send.length = length;
frag_req->send.uct.func = ucp_rndv_progress_rma_get_zcopy;
frag_req->send.rndv_get.rkey = NULL;
frag_req->send.rndv_get.remote_address = (uint64_t)(sreq->send.buffer + offset);
frag_req->send.rndv_get.lanes_map = 0;
frag_req->send.rndv_get.lane_count = 0;
Expand Down