Skip to content

Commit

Permalink
Merge pull request #1992 from brminich/topic/backport_to_v1.2.x
Browse files Browse the repository at this point in the history
UCT/UD: Backport DGID packet filtering
  • Loading branch information
yosefe authored Nov 14, 2017
2 parents 68620d7 + 0e471cb commit 46bd382
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 2 deletions.
12 changes: 12 additions & 0 deletions contrib/test_jenkins.sh
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,17 @@ build_icc() {
fi
}

#
# Build debug version
#
build_debug() {
echo "==== Build with --enable-debug option ===="
../contrib/configure-devel --prefix=$ucx_inst --enable-debug
$MAKE clean
$MAKE
$MAKE distclean
}

run_hello() {
api=$1
shift
Expand Down Expand Up @@ -483,6 +494,7 @@ run_tests() {
export UCX_ERROR_MAIL_FOOTER=$JOB_URL/$BUILD_NUMBER/console

do_distributed_task 0 4 build_icc
do_distributed_task 1 4 build_debug

# all are running mpi tests
run_mpi_tests
Expand Down
8 changes: 8 additions & 0 deletions src/uct/ib/ud/accel/ud_mlx5.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,14 @@ ucs_status_t uct_ud_mlx5_iface_poll_rx(uct_ud_mlx5_iface_t *iface, int is_async)

len = ntohl(cqe->byte_cnt);
VALGRIND_MAKE_MEM_DEFINED(packet, len);

if (!uct_ud_iface_check_grh(&iface->super, packet + UCT_IB_GRH_LEN,
(ntohl(cqe->flags_rqpn) >> 28) & 3)) {
ucs_mpool_put_inline(desc);
status = UCS_ERR_NO_PROGRESS;
goto out;
}

uct_ib_mlx5_log_rx(&iface->super.super, IBV_QPT_UD, cqe, packet,
uct_ud_dump_packet);
uct_ud_ep_process_rx(&iface->super,
Expand Down
68 changes: 66 additions & 2 deletions src/uct/ib/ud/base/ud_iface.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,22 @@
#include <ucs/type/class.h>
#include <ucs/datastruct/queue.h>
#include <sys/poll.h>
#include <linux/ip.h>


#define UCT_UD_IPV4_ADDR_LEN sizeof(struct in_addr)
#define UCT_UD_IPV6_ADDR_LEN sizeof(struct in6_addr)

#if ENABLE_STATS
static ucs_stats_class_t uct_ud_iface_stats_class = {
.name = "ud_iface",
.num_counters = UCT_UD_IFACE_STAT_LAST,
.counter_names = {
[UCT_UD_IFACE_STAT_RX_DROP] = "rx_drop"
}
};
#endif

SGLIB_DEFINE_LIST_FUNCTIONS(uct_ud_iface_peer_t, uct_ud_iface_peer_cmp, next)
SGLIB_DEFINE_HASHED_CONTAINER_FUNCTIONS(uct_ud_iface_peer_t,
UCT_UD_HASH_SIZE,
Expand Down Expand Up @@ -387,6 +401,39 @@ void uct_ud_iface_remove_async_handlers(uct_ud_iface_t *iface)
ucs_async_remove_handler(iface->async.timer_id, 1);
}

/* Calculate real GIDs len. Can be either 16 (RoCEv1 or RoCEv2/IPv6)
* or 4 (RoCEv2/IPv4). This len is used for packets filtering by DGIDs.
*
* According to Annex17_RoCEv2 (A17.4.5.2):
* "The first 40 bytes of user posted UD Receive Buffers are reserved for the L3
* header of the incoming packet (as per the InfiniBand Spec Section 11.4.1.2).
* In RoCEv2, this area is filled up with the IP header. IPv6 header uses the
* entire 40 bytes. IPv4 headers use the 20 bytes in the second half of the
* reserved 40 bytes area (i.e. offset 20 from the beginning of the receive
* buffer). In this case, the content of the first 20 bytes is undefined." */
static void uct_ud_iface_calc_gid_len(uct_ud_iface_t *iface)
{
uint16_t *local_gid_u16 = (uint16_t*)iface->super.gid.raw;

/* Make sure that daddr in IPv4 resides in the last 4 bytes in GRH */
UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - (20 + offsetof(struct iphdr, daddr))) ==
UCT_UD_IPV4_ADDR_LEN);

/* Make sure that dgid resides in the last 16 bytes in GRH */
UCS_STATIC_ASSERT((UCT_IB_GRH_LEN - offsetof(struct ibv_grh, dgid)) ==
UCT_UD_IPV6_ADDR_LEN);

/* IPv4 mapped to IPv6 looks like: 0000:0000:0000:0000:0000:ffff:????:????,
* so check for leading zeroes and verify that 11-12 bytes are 0xff.
* Otherwise either RoCEv1 or RoCEv2/IPv6 are used. */
if (local_gid_u16[0] == 0x0000) {
ucs_assert_always(local_gid_u16[5] == 0xffff);
iface->config.gid_len = UCT_UD_IPV4_ADDR_LEN;
} else {
iface->config.gid_len = UCT_UD_IPV6_ADDR_LEN;
}
}

UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
uct_worker_h worker, const uct_iface_params_t *params,
unsigned ud_rx_priv_len,
Expand Down Expand Up @@ -434,6 +481,8 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
self->rx.available = config->super.rx.queue_len;
self->config.tx_qp_len = config->super.tx.queue_len;
self->config.peer_timeout = ucs_time_from_sec(config->peer_timeout);
self->config.check_grh_dgid = (config->dgid_check &&
(self->super.addr_type == UCT_IB_ADDRESS_TYPE_ETH));

if (config->slow_timer_backoff <= 0.) {
ucs_error("The slow timer back off should be > 0 (%lf)",
Expand Down Expand Up @@ -472,7 +521,7 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,
&config->super.tx.mp, self->config.tx_qp_len,
uct_ud_iface_send_skb_init, "ud_tx_skb");
if (status != UCS_OK) {
goto err_mpool;
goto err_rx_mpool;
}
self->tx.skb = ucs_mpool_get(&self->tx.mp);
self->tx.skb_inl.super.len = sizeof(uct_ud_neth_t);
Expand All @@ -485,9 +534,19 @@ UCS_CLASS_INIT_FUNC(uct_ud_iface_t, uct_ud_iface_ops_t *ops, uct_md_h md,

ucs_queue_head_init(&self->rx.pending_q);

uct_ud_iface_calc_gid_len(self);

status = UCS_STATS_NODE_ALLOC(&self->stats, &uct_ud_iface_stats_class,
self->super.super.stats);
if (status != UCS_OK) {
goto err_tx_mpool;
}

return UCS_OK;

err_mpool:
err_tx_mpool:
ucs_mpool_cleanup(&self->tx.mp, 1);
err_rx_mpool:
ucs_mpool_cleanup(&self->rx.mp, 1);
err_qp:
ibv_destroy_qp(self->qp);
Expand All @@ -514,6 +573,7 @@ static UCS_CLASS_CLEANUP_FUNC(uct_ud_iface_t)
ucs_ptr_array_cleanup(&self->eps);
ucs_arbiter_cleanup(&self->tx.pending_q);
ucs_assert(self->tx.pending_q_len == 0);
UCS_STATS_NODE_FREE(self->stats);
uct_ud_leave(self);
}

Expand All @@ -527,6 +587,10 @@ ucs_config_field_t uct_ud_iface_config_table[] = {
{"SLOW_TIMER_BACKOFF", "2.0", "Timeout multiplier for resending trigger",
ucs_offsetof(uct_ud_iface_config_t, slow_timer_backoff),
UCS_CONFIG_TYPE_DOUBLE},
{"ETH_DGID_CHECK", "y",
"Enable checking destination GID for incoming packets of Ethernet network\n"
"Mismatched packets are silently dropped.",
ucs_offsetof(uct_ud_iface_config_t, dgid_check), UCS_CONFIG_TYPE_BOOL},
{NULL}
};

Expand Down
37 changes: 37 additions & 0 deletions src/uct/ib/ud/base/ud_iface.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,18 @@

#define UCT_UD_MIN_INLINE 48

enum {
UCT_UD_IFACE_STAT_RX_DROP,
UCT_UD_IFACE_STAT_LAST
};

/* TODO: maybe tx_moderation can be defined at compile-time since tx completions are used only to know how much space is there in tx qp */

typedef struct uct_ud_iface_config {
uct_ib_iface_config_t super;
double peer_timeout;
double slow_timer_backoff;
int dgid_check;
} uct_ud_iface_config_t;

struct uct_ud_iface_peer {
Expand Down Expand Up @@ -123,7 +129,12 @@ struct uct_ud_iface {
double slow_timer_backoff;
unsigned tx_qp_len;
unsigned max_inline;
int check_grh_dgid;
unsigned gid_len;
} config;

UCS_STATS_NODE_DECLARE(stats);

ucs_ptr_array_t eps;
uct_ud_iface_peer_t *peers[UCT_UD_HASH_SIZE];
struct {
Expand Down Expand Up @@ -215,6 +226,32 @@ static UCS_F_ALWAYS_INLINE void uct_ud_leave(uct_ud_iface_t *iface)
UCS_ASYNC_UNBLOCK(iface->super.super.worker->async);
}

static UCS_F_ALWAYS_INLINE int
uct_ud_iface_check_grh(uct_ud_iface_t *iface, void *grh_end, int is_grh_present)
{
void *dest_gid, *local_gid;

if (!iface->config.check_grh_dgid) {
return 1;
}

if (ucs_unlikely(!is_grh_present)) {
ucs_warn("RoCE packet does not contain GRH");
return 1;
}

local_gid = (char*)iface->super.gid.raw + (16 - iface->config.gid_len);
dest_gid = (char*)grh_end - iface->config.gid_len;

if (memcmp(local_gid, dest_gid, iface->config.gid_len)) {
UCS_STATS_UPDATE_COUNTER(iface->stats, UCT_UD_IFACE_STAT_RX_DROP, 1);
ucs_trace_data("Drop packet with wrong dgid");
return 0;
}

return 1;
}

/*
management of connecting endpoints (cep)
Expand Down
5 changes: 5 additions & 0 deletions src/uct/ib/ud/verbs/ud_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,11 @@ uct_ud_verbs_iface_poll_rx(uct_ud_verbs_iface_t *iface, int is_async)
}

UCT_IB_IFACE_VERBS_FOREACH_RXWQE(&iface->super.super, i, packet, wc, num_wcs) {
if (!uct_ud_iface_check_grh(&iface->super, packet + UCT_IB_GRH_LEN,
wc[i].wc_flags & IBV_WC_GRH)) {
ucs_mpool_put_inline((void*)wc[i].wr_id);
continue;
}
uct_ib_log_recv_completion(&iface->super.super, IBV_QPT_UD, &wc[i],
packet, wc[i].byte_len, uct_ud_dump_packet);
uct_ud_ep_process_rx(&iface->super,
Expand Down

0 comments on commit 46bd382

Please sign in to comment.