Skip to content

Commit

Permalink
Merge pull request #1552 from alinask/topic/v1-2-ucp-add-lat-ovh-to-addr
Browse files Browse the repository at this point in the history
UCP: Add the latency.overhead to the passed address.
  • Loading branch information
yosefe authored May 30, 2017
2 parents e4d7f63 + e211f78 commit d242fb8
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/ucp/core/ucp_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ static void ucp_worker_init_device_atomics(ucp_worker_h worker)
dummy_iface_attr.cap_flags = -1;
dummy_iface_attr.overhead = 0;
dummy_iface_attr.priority = 0;
dummy_iface_attr.lat_ovh = 0;

supp_tls = 0;
best_score = -1;
Expand Down
17 changes: 13 additions & 4 deletions src/ucp/wireup/address.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ typedef struct {
typedef struct {
float overhead;
float bandwidth;
double lat_ovh;
uint32_t prio_cap_flags; /* 8 lsb: prio, 24 msb - cap flags */
} ucp_address_packed_iface_attr_t;

Expand Down Expand Up @@ -234,6 +235,7 @@ static void ucp_address_pack_iface_attr(ucp_address_packed_iface_attr_t *packed,
packed->prio_cap_flags = ((uint8_t)iface_attr->priority);
packed->overhead = iface_attr->overhead;
packed->bandwidth = iface_attr->bandwidth;
packed->lat_ovh = iface_attr->latency.overhead;

/* Keep only the bits defined by UCP_ADDRESS_IFACE_FLAGS, to shrink address. */
packed_flag = UCS_BIT(8);
Expand All @@ -260,6 +262,7 @@ ucp_address_unpack_iface_attr(ucp_address_iface_attr_t *iface_attr,
iface_attr->priority = packed->prio_cap_flags & UCS_MASK(8);
iface_attr->overhead = packed->overhead;
iface_attr->bandwidth = packed->bandwidth;
iface_attr->lat_ovh = packed->lat_ovh;

packed_flag = UCS_BIT(8);
bit = 1;
Expand Down Expand Up @@ -387,12 +390,15 @@ static ucs_status_t ucp_address_do_pack(ucp_worker_h worker, ucp_ep_h ep,


ucs_trace("pack addr[%d] : "UCT_TL_RESOURCE_DESC_FMT
" md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e ",
" md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e "
"lat_ovh: %e dev_priority %d",
index,
UCT_TL_RESOURCE_DESC_ARG(&context->tl_rscs[i].tl_rsc),
md_flags, worker->iface_attrs[i].cap.flags,
worker->iface_attrs[i].bandwidth,
worker->iface_attrs[i].overhead);
worker->iface_attrs[i].overhead,
worker->iface_attrs[i].latency.overhead,
worker->iface_attrs[i].priority);
++index;
}
}
Expand Down Expand Up @@ -568,10 +574,13 @@ ucs_status_t ucp_address_unpack(const void *buffer, uint64_t *remote_uuid_p,
address->tl_addr = (tl_addr_len > 0) ? ptr : NULL;
address->tl_addr_len = tl_addr_len;

ucs_trace("unpack addr[%d] : md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e ",
ucs_trace("unpack addr[%d] : md_flags 0x%"PRIx64" tl_flags 0x%"PRIx64" bw %e ovh %e "
"lat_ovh %e dev_priority %d",
(int)(address - address_list),
address->md_flags, address->iface_attr.cap_flags,
address->iface_attr.bandwidth, address->iface_attr.overhead);
address->iface_attr.bandwidth, address->iface_attr.overhead,
address->iface_attr.lat_ovh,
address->iface_attr.priority);
++address;

ptr += tl_addr_len;
Expand Down
1 change: 1 addition & 0 deletions src/ucp/wireup/address.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct ucp_address_iface_attr {
double overhead; /* Interface performance - overhead */
double bandwidth; /* Interface performance - bandwidth */
int priority; /* Priority of device */
double lat_ovh; /* latency overhead */
};


Expand Down
25 changes: 18 additions & 7 deletions src/ucp/wireup/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,14 @@ ucp_wireup_select_transport(ucp_ep_h ep, const ucp_address_entry_t *address_list
return UCS_OK;
}

static inline double ucp_wireup_tl_iface_latency(ucp_context_h context,
const uct_iface_attr_t *iface_attr,
const ucp_address_iface_attr_t *remote_iface_attr)
{
return ucs_max(iface_attr->latency.overhead, remote_iface_attr->lat_ovh) +
(iface_attr->latency.growth * context->config.est_num_eps);
}

static UCS_F_NOINLINE void
ucp_wireup_add_lane_desc(ucp_wireup_lane_desc_t *lane_descs,
ucp_lane_index_t *num_lanes_p, ucp_rsc_index_t rsc_index,
Expand Down Expand Up @@ -452,7 +460,8 @@ static double ucp_wireup_rma_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best for 4k messages */
return 1e-3 / (ucp_tl_iface_latency(context, iface_attr) + iface_attr->overhead +
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead +
(4096.0 / ucs_min(iface_attr->bandwidth, remote_iface_attr->bandwidth)));
}

Expand Down Expand Up @@ -488,7 +497,8 @@ double ucp_wireup_amo_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best one-sided latency */
return 1e-3 / (ucp_tl_iface_latency(context, iface_attr) + iface_attr->overhead);
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead);
}

static ucs_status_t ucp_wireup_add_amo_lanes(ucp_ep_h ep, unsigned address_count,
Expand Down Expand Up @@ -536,7 +546,7 @@ static double ucp_wireup_am_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best end-to-end latency */
return 1e-3 / (ucp_tl_iface_latency(context, iface_attr) +
return 1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + remote_iface_attr->overhead);
}

Expand All @@ -549,8 +559,9 @@ static double ucp_wireup_rndv_score_func(ucp_context_h context,
* a size which is likely to be used with the Rendezvous protocol, for
* how long it would take to transfer it with a certain transport. */

return 1 / ((UCP_WIREUP_RNDV_TEST_MSG_SIZE / iface_attr->bandwidth) +
ucp_tl_iface_latency(context, iface_attr) +
return 1 / ((UCP_WIREUP_RNDV_TEST_MSG_SIZE /
ucs_min(iface_attr->bandwidth, remote_iface_attr->bandwidth)) +
ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + md_attr->reg_cost.overhead +
(UCP_WIREUP_RNDV_TEST_MSG_SIZE * md_attr->reg_cost.growth));
}
Expand Down Expand Up @@ -801,8 +812,8 @@ static double ucp_wireup_aux_score_func(ucp_context_h context,
const ucp_address_iface_attr_t *remote_iface_attr)
{
/* best end-to-end latency and larger bcopy size */
return (1e-3 / (ucp_tl_iface_latency(context, iface_attr) + iface_attr->overhead +
remote_iface_attr->overhead)) +
return (1e-3 / (ucp_wireup_tl_iface_latency(context, iface_attr, remote_iface_attr) +
iface_attr->overhead + remote_iface_attr->overhead)) +
(1e3 * ucs_max(iface_attr->cap.am.max_bcopy, iface_attr->cap.am.max_short));
}

Expand Down

0 comments on commit d242fb8

Please sign in to comment.