Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCP: Add the latency.overhead to the passed address. #1544

Merged
merged 1 commit into from
May 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/ucp/core/ucp_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ static void ucp_worker_init_device_atomics(ucp_worker_h worker)
dummy_iface_attr.cap_flags = -1;
dummy_iface_attr.overhead = 0;
dummy_iface_attr.priority = 0;
dummy_iface_attr.lat_ovh = 0;

supp_tls = 0;
best_score = -1;
Expand Down
2 changes: 1 addition & 1 deletion src/ucp/tag/tag_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ static ucs_status_t ucp_tag_req_start(ucp_request_t *req, size_t count,
flag_iov_single = (count <= config->tag.eager.max_iov);

if (!flag_iov_single && ucp_ep_is_tag_offload_enabled(config)) {
/* Make sure SW RNDV will be used, becasuse tag offload does
/* Make sure SW RNDV will be used, because tag offload does
* not support multi-packet eager protocols. */
force_sw_rndv = 1;
}
Expand Down
17 changes: 13 additions & 4 deletions src/ucp/wireup/address.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ typedef struct {
typedef struct {
float overhead;
float bandwidth;
double lat_ovh;
uint32_t prio_cap_flags; /* 8 lsb: prio, 24 msb - cap flags */
} ucp_address_packed_iface_attr_t;

Expand Down Expand Up @@ -234,6 +235,7 @@ static void ucp_address_pack_iface_attr(ucp_address_packed_iface_attr_t *packed,
packed->prio_cap_flags = ((uint8_t)iface_attr->priority);
packed->overhead = iface_attr->overhead;
packed->bandwidth = iface_attr->bandwidth;
packed->lat_ovh = iface_attr->latency.overhead;

/* Keep only the bits defined by UCP_ADDRESS_IFACE_FLAGS, to shrink address. */
packed_flag = UCS_BIT(8);
Expand All @@ -260,6 +262,7 @@ ucp_address_unpack_iface_attr(ucp_address_iface_attr_t *iface_attr,
iface_attr->priority = packed->prio_cap_flags & UCS_MASK(8);
iface_attr->overhead = packed->overhead;
iface_attr->bandwidth = packed->bandwidth;
iface_attr->lat_ovh = packed->lat_ovh;

packed_flag = UCS_BIT(8);
bit = 1;
Expand Down Expand Up @@ -387,12 +390,15 @@ static ucs_status_t ucp_address_do_pack(ucp_worker_h worker, ucp_ep_h ep,


ucs_trace("pack addr[%d] : "UCT_TL_RESOURCE_DESC_FMT
" md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e ",
" md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e "
"lat_ovh: %e dev_priority %d",
index,
UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[i].tl_rsc),
md_flags, worker->ifaces[i].attr.cap.flags,
worker->ifaces[i].attr.bandwidth,
worker->ifaces[i].attr.overhead);
worker->ifaces[i].attr.overhead,
worker->ifaces[i].attr.latency.overhead,
worker->ifaces[i].attr.priority);
++index;
}
}
Expand Down Expand Up @@ -568,10 +574,13 @@ ucs_status_t ucp_address_unpack(const void *buffer, uint64_t *remote_uuid_p,
address->tl_addr = (tl_addr_len > 0) ? ptr : NULL;
address->tl_addr_len = tl_addr_len;

ucs_trace("unpack addr[%d] : md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e ",
ucs_trace("unpack addr[%d] : md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e "
"lat_ovh %e dev_priority %d",
(int)(address - address_list),
address->md_flags, address->iface_attr.cap_flags,
address->iface_attr.bandwidth, address->iface_attr.overhead);
address->iface_attr.bandwidth, address->iface_attr.overhead,
address->iface_attr.lat_ovh,
address->iface_attr.priority);
++address;

ptr += tl_addr_len;
Expand Down
1 change: 1 addition & 0 deletions src/ucp/wireup/address.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct ucp_address_iface_attr {
double overhead; /* Interface performance - overhead */
double bandwidth; /* Interface performance - bandwidth */
int priority; /* Priority of device */
double lat_ovh; /* latency overhead */
};


Expand Down
25 changes: 18 additions & 7 deletions src/ucp/wireup/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,14 @@ ucp_wireup_select_transport(ucp_ep_h ep, const ucp_address_entry_t *address_list
return UCS_OK;
}

static inline double ucp_wireup_tl_iface_latency(ucp_context_h context,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return ucs_max(iface_attr->latency.overhead, remote_iface_attr->lat_ovh) +
(iface_attr->latency.growth * context->config.est_num_eps);
}

static UCS_F_NOINLINE void
ucp_wireup_add_lane_desc(ucp_wireup_lane_desc_t *lane_descs,
ucp_lane_index_t *num_lanes_p, ucp_rsc_index_t rsc_index,
Expand Down Expand Up @@ -459,7 +467,8 @@ static double ucp_wireup_rma_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best for 4k messages */
return 1e-3 / (ucp_tl_iface_latency(context, iface_attr) + iface_attr->overhead +
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead +
(4096.0 / ucs_min(iface_attr->bandwidth, remote_iface_attr->bandwidth)));
}

Expand Down Expand Up @@ -495,7 +504,8 @@ double ucp_wireup_amo_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best one-sided latency */
return 1e-3 / (ucp_tl_iface_latency(context, iface_attr) + iface_attr->overhead);
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead);
}

static ucs_status_t ucp_wireup_add_amo_lanes(ucp_ep_h ep, unsigned address_count,
Expand Down Expand Up @@ -543,7 +553,7 @@ static double ucp_wireup_am_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best end-to-end latency */
return 1e-3 / (ucp_tl_iface_latency(context, iface_attr) +
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + remote_iface_attr->overhead);
}

Expand All @@ -556,8 +566,9 @@ static double ucp_wireup_rndv_score_func(ucp_context_h context,
* a size which is likely to be used with the Rendezvous protocol, for
* how long it would take to transfer it with a certain transport. */

return 1 / ((UCP_WIREUP_RNDV_TEST_MSG_SIZE / iface_attr->bandwidth) +
ucp_tl_iface_latency(context, iface_attr) +
return 1 / ((UCP_WIREUP_RNDV_TEST_MSG_SIZE /
ucs_min(iface_attr->bandwidth, remote_iface_attr->bandwidth)) +
ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + md_attr->reg_cost.overhead +
(UCP_WIREUP_RNDV_TEST_MSG_SIZE * md_attr->reg_cost.growth));
}
Expand Down Expand Up @@ -861,8 +872,8 @@ static double ucp_wireup_aux_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best end-to-end latency and larger bcopy size */
return (1e-3 / (ucp_tl_iface_latency(context, iface_attr) + iface_attr->overhead +
remote_iface_attr->overhead)) +
return (1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + remote_iface_attr->overhead)) +
(1e3 * ucs_max(iface_attr->cap.am.max_bcopy, iface_attr->cap.am.max_short));
}

Expand Down