Skip to content

Commit

Permalink
Merge pull request #1850 from brminich/topic/ud_filtering
Browse files Browse the repository at this point in the history
UCT/UD: Filter incoming packets by DGID
  • Loading branch information
yosefe authored Sep 21, 2017
2 parents 5f00bb1 + 5e00645 commit e68a822
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/uct/ib/ud/accel/ud_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,17 +400,22 @@ uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface, int is_async)

iface->super.rx.available++;
iface->rx.wq.cq_wqe_counter++;

len = ntohl(cqe->byte_cnt);
count = 1;
len = ntohl(cqe->byte_cnt);
VALGRIND_MAKE_MEM_DEFINED(packet, len);

if (!uct_ud_iface_check_grh(&iface->super, packet + UCT_IB_GRH_LEN,
(ntohl(cqe->flags_rqpn) >> 28) & 3)) {
ucs_mpool_put_inline(desc);
goto out;
}

uct_ib_mlx5_log_rx(&iface->super.super, IBV_QPT_UD, cqe, packet,
uct_ud_dump_packet);
uct_ud_ep_process_rx(&iface->super,
(uct_ud_neth_t *)(packet + UCT_IB_GRH_LEN),
len - UCT_IB_GRH_LEN,
(uct_ud_recv_skb_t *)desc, is_async);
count = 1;

out:
if (iface->super.rx.available >= iface->super.super.config.rx_max_batch) {
/* we need to try to post buffers always. Otherwise it is possible
Expand Down
42 changes: 42 additions & 0 deletions src/uct/ib/ud/base/ud_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <ucs/type/class.h>
#include <ucs/datastruct/queue.h>
#include <sys/poll.h>
#include <linux/ip.h>


SGLIB_DEFINE_LIST_FUNCTIONS(uct_ud_iface_peer_t, uct_ud_iface_peer_cmp, next)
Expand Down Expand Up @@ -371,6 +372,39 @@ void uct_ud_iface_remove_async_handlers(uct_ud_iface_t *iface)
ucs_async_remove_handler(iface->async.timer_id, 1);
}

/* Calculate real GIDs len. Can be either 16 (RoCEv1 or RoCEv2/IPv6)
* or 4 (RoCEv2/IPv4). This len is used for packets filtering by DGIDs.
*
* According to Annex17_RoCEv2 (A17.4.5.2):
* "The first 40 bytes of user posted UD Receive Buffers are reserved for the L3
* header of the incoming packet (as per the InfiniBand Spec Section 11.4.1.2).
* In RoCEv2, this area is filled up with the IP header. IPv6 header uses the
* entire 40 bytes. IPv4 headers use the 20 bytes in the second half of the
* reserved 40 bytes area (i.e. offset 20 from the beginning of the receive
* buffer). In this case, the content of the first 20 bytes is undefined." */
static void uct_ud_iface_calc_gid_len(uct_ud_iface_t *iface)
{
const int ipv4_len = sizeof(struct in_addr);
const int ipv6_len = sizeof(struct in6_addr);
uint16_t *local_gid_u16 = (uint16_t*)iface->super.gid.raw;

/* Make sure that daddr in IPv4 resides in the last 4 bytes in GRH */
UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - (20 + offsetof(struct iphdr, daddr))) == ipv4_len);

/* Make sure that dgid resides in the last 16 bytes in GRH */
UCS_STATIC_ASSERT(UCT_IB_GRH_LEN - offsetof(struct ibv_grh, dgid) == ipv6_len);

/* IPv4 mapped to IPv6 looks like: 0000:0000:0000:0000:0000:ffff:????:????,
* so check for leading zeroes and verify that 11-12 bytes are 0xff.
* Otherwise either RoCEv1 or RoCEv2/IPv6 are used. */
if (local_gid_u16[0] == 0x0000) {
ucs_assert_always(local_gid_u16[5] == 0xffff);
iface->config.gid_len = ipv4_len;
} else {
iface->config.gid_len = ipv6_len;
}
}

UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
uct_worker_h worker, const uct_iface_params_t *params,
unsigned ud_rx_priv_len,
Expand Down Expand Up @@ -418,6 +452,8 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
self->rx.available = config->super.rx.queue_len;
self->config.tx_qp_len = config->super.tx.queue_len;
self->config.peer_timeout = ucs_time_from_sec(config->peer_timeout);
self->config.check_grh_dgid = (config->dgid_check &&
(self->super.addr_type == UCT_IB_ADDRESS_TYPE_ETH));

if (config->slow_timer_backoff <= 0.) {
ucs_error("The slow timer back off should be > 0 (%lf)",
Expand Down Expand Up @@ -469,6 +505,8 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,

ucs_queue_head_init(&self->rx.pending_q);

uct_ud_iface_calc_gid_len(self);

return UCS_OK;

err_mpool:
Expand Down Expand Up @@ -511,6 +549,10 @@ ucs_config_field_t uct_ud_iface_config_table[] = {
{"SLOW_TIMER_BACKOFF", "2.0", "Timeout multiplier for resending trigger",
ucs_offsetof(uct_ud_iface_config_t, slow_timer_backoff),
UCS_CONFIG_TYPE_DOUBLE},
{"ETH_DGID_CHECK", "y",
"Enable checking destination GID for incoming packets of Ethernet network\n"
"Mismatched packets are silently dropped.",
ucs_offsetof(uct_ud_iface_config_t, dgid_check), UCS_CONFIG_TYPE_BOOL},
{NULL}
};

Expand Down
28 changes: 28 additions & 0 deletions src/uct/ib/ud/base/ud_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ typedef struct uct_ud_iface_config {
uct_ib_iface_config_t super;
double peer_timeout;
double slow_timer_backoff;
int dgid_check;
} uct_ud_iface_config_t;

struct uct_ud_iface_peer {
Expand Down Expand Up @@ -123,6 +124,8 @@ struct uct_ud_iface {
double slow_timer_backoff;
unsigned tx_qp_len;
unsigned max_inline;
int check_grh_dgid;
unsigned gid_len;
} config;
ucs_ptr_array_t eps;
uct_ud_iface_peer_t *peers[UCT_UD_HASH_SIZE];
Expand Down Expand Up @@ -215,6 +218,31 @@ static UCS_F_ALWAYS_INLINE void uct_ud_leave(uct_ud_iface_t *iface)
UCS_ASYNC_UNBLOCK(iface->super.super.worker->async);
}

static UCS_F_ALWAYS_INLINE int
uct_ud_iface_check_grh(uct_ud_iface_t *iface, void *grh_end, int is_grh_present)
{
void *dest_gid, *local_gid;

if (!iface->config.check_grh_dgid) {
return 1;
}

if (ucs_unlikely(!is_grh_present)) {
ucs_warn("RoCE packet does not contain GRH");
return 1;
}

local_gid = (char*)iface->super.gid.raw + (16 - iface->config.gid_len);
dest_gid = (char*)grh_end - iface->config.gid_len;

if (memcmp(local_gid, dest_gid, iface->config.gid_len)) {
ucs_trace_data("Drop packet with wrong dgid");
return 0;
}

return 1;
}

/*
management of connecting endpoints (cep)
Expand Down
5 changes: 5 additions & 0 deletions src/uct/ib/ud/verbs/ud_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,11 @@ uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface, int is_async)
}

UCT_IB_IFACE_VERBS_FOREACH_RXWQE(&iface->super.super, i, packet, wc, num_wcs) {
if (!uct_ud_iface_check_grh(&iface->super, packet + UCT_IB_GRH_LEN,
wc[i].wc_flags & IBV_WC_GRH)) {
ucs_mpool_put_inline((void*)wc[i].wr_id);
continue;
}
uct_ib_log_recv_completion(&iface->super.super, IBV_QPT_UD, &wc[i],
packet, wc[i].byte_len, uct_ud_dump_packet);
uct_ud_ep_process_rx(&iface->super,
Expand Down

0 comments on commit e68a822

Please sign in to comment.