From 24c0ff04d0d1ab8c18dfb537cbdedf5f9aef3f8e Mon Sep 17 00:00:00 2001 From: Mikhail Brinskii Date: Fri, 28 May 2021 20:05:00 +0300 Subject: [PATCH] UCT/RC: Print remote QP info for tx error CQEs - v1.11x --- src/uct/ib/base/ib_log.c | 16 ++++++ src/uct/ib/base/ib_log.h | 4 ++ src/uct/ib/base/ib_verbs.h | 22 ++++++++ src/uct/ib/mlx5/dv/ib_mlx5_dv.c | 84 ++++++++++++++++++++++++++++ src/uct/ib/mlx5/dv/ib_mlx5_ifc.h | 37 +++++++++++- src/uct/ib/mlx5/ib_mlx5.c | 14 +++++ src/uct/ib/mlx5/ib_mlx5.h | 17 ++++++ src/uct/ib/mlx5/ib_mlx5_log.c | 56 ++++++++++++------- src/uct/ib/rc/verbs/rc_verbs_iface.c | 23 +++++--- 9 files changed, 243 insertions(+), 30 deletions(-) diff --git a/src/uct/ib/base/ib_log.c b/src/uct/ib/base/ib_log.c index d9620f04951..1959d28888a 100644 --- a/src/uct/ib/base/ib_log.c +++ b/src/uct/ib/base/ib_log.c @@ -110,6 +110,22 @@ void uct_ib_log_dump_atomic_masked_cswap(int argsize, uint64_t compare, uint64_t argsize * 8, compare, compare_mask, swap, swap_mask); } +void uct_ib_log_dump_qp_peer_info(uct_ib_iface_t *iface, + const struct ibv_ah_attr *ah_attr, + uint32_t dest_qpn, char *buf, size_t max) +{ + char *s = buf; + char *ends = buf + max; + + snprintf(s, ends - s, "[rqpn 0x%x ", dest_qpn); + s += strlen(s); + + uct_ib_ah_attr_str(s, ends - s, ah_attr); + s += strlen(s); + + snprintf(s, ends - s, "]"); +} + void uct_ib_log_dump_recv_completion(uct_ib_iface_t *iface, uint32_t local_qp, uint32_t sender_qp, uint16_t sender_lid, void *data, size_t length, diff --git a/src/uct/ib/base/ib_log.h b/src/uct/ib/base/ib_log.h index 5cc5b46c047..9b426782f86 100644 --- a/src/uct/ib/base/ib_log.h +++ b/src/uct/ib/base/ib_log.h @@ -53,6 +53,10 @@ void uct_ib_log_dump_atomic_masked_cswap(int argsize, uint64_t compare, uint64_t uint64_t swap, uint64_t swap_mask, char *buf, size_t max); +void uct_ib_log_dump_qp_peer_info(uct_ib_iface_t *iface, + const struct ibv_ah_attr *ah_attr, + uint32_t dest_qpn, char *buf, size_t max); + void uct_ib_log_dump_recv_completion(uct_ib_iface_t *iface, uint32_t local_qp, uint32_t sender_qp, uint16_t sender_lid, void *data, size_t length, diff --git a/src/uct/ib/base/ib_verbs.h b/src/uct/ib/base/ib_verbs.h index 83727ddf5df..524139cb03a 100644 --- a/src/uct/ib/base/ib_verbs.h +++ b/src/uct/ib/base/ib_verbs.h @@ -326,4 +326,26 @@ static inline ucs_status_t uct_ib_qp_max_send_sge(struct ibv_qp *qp, return UCS_OK; } +static inline ucs_status_t +uct_ib_query_qp_peer_info(struct ibv_qp *qp, struct ibv_ah_attr *ah_attr, + uint32_t *dest_qpn) +{ + struct ibv_qp_attr qp_attr = {}; + struct ibv_qp_init_attr qp_init_attr = {}; + int ret; + + ret = ibv_query_qp(qp, &qp_attr, IBV_QP_AV | IBV_QP_DEST_QPN, + &qp_init_attr); + if (ret) { + ucs_error("failed to query qp 0x%u (ret=%d): %m", qp->qp_num, ret); + return UCS_ERR_IO_ERROR; + } + + *dest_qpn = qp_attr.dest_qp_num; + + memcpy(ah_attr, &qp_attr.ah_attr, sizeof(*ah_attr)); + + return UCS_OK; +} + #endif /* UCT_IB_VERBS_H */ diff --git a/src/uct/ib/mlx5/dv/ib_mlx5_dv.c b/src/uct/ib/mlx5/dv/ib_mlx5_dv.c index fd03bb39463..cc2ca9059c1 100644 --- a/src/uct/ib/mlx5/dv/ib_mlx5_dv.c +++ b/src/uct/ib/mlx5/dv/ib_mlx5_dv.c @@ -246,6 +246,41 @@ ucs_status_t uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp, return UCS_OK; } +static ucs_status_t +uct_ib_mlx5_devx_query_qp(uct_ib_mlx5_qp_t *qp, void *in, size_t inlen, + void *out, size_t outlen) +{ + int ret; + + UCT_IB_MLX5DV_SET(query_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_QUERY_QP); + UCT_IB_MLX5DV_SET(query_qp_in, in, qpn, qp->qp_num); + + switch (qp->type) { + case UCT_IB_MLX5_OBJ_TYPE_VERBS: + ret = mlx5dv_devx_qp_query(qp->verbs.qp, in, inlen, out, outlen); + if (ret) { + ucs_error("mlx5dv_devx_qp_query(%x) failed, syndrome %x: %m", + UCT_IB_MLX5_CMD_OP_QUERY_QP, + UCT_IB_MLX5DV_GET(modify_qp_out, out, syndrome)); + return UCS_ERR_IO_ERROR; + } + break; + case UCT_IB_MLX5_OBJ_TYPE_DEVX: + ret = mlx5dv_devx_obj_query(qp->devx.obj, in, inlen, out, outlen); + if (ret) { + ucs_error("mlx5dv_devx_obj_query(%x) failed, syndrome %x: %m", + UCT_IB_MLX5_CMD_OP_QUERY_QP, + UCT_IB_MLX5DV_GET(modify_qp_out, out, syndrome)); + return UCS_ERR_IO_ERROR; + } + break; + case UCT_IB_MLX5_OBJ_TYPE_LAST: + return UCS_ERR_UNSUPPORTED; + } + + return UCS_OK; +} + ucs_status_t uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp, enum ibv_qp_state state) { @@ -329,6 +364,55 @@ void uct_ib_mlx5_devx_set_qpc_port_affinity(uct_ib_mlx5_md_t *md, } UCT_IB_MLX5DV_SET(qpc, qpc, lag_tx_port_affinity, tx_port); } + +ucs_status_t +uct_ib_mlx5_devx_query_qp_peer_info(uct_ib_iface_t *iface, uct_ib_mlx5_qp_t *qp, + struct ibv_ah_attr *ah_attr, + uint32_t *dest_qpn) +{ + char in[UCT_IB_MLX5DV_ST_SZ_BYTES(query_qp_in)] = {}; + char out[UCT_IB_MLX5DV_ST_SZ_BYTES(query_qp_out)] = {}; + void *ctx; + ucs_status_t status; + + status = uct_ib_mlx5_devx_query_qp(qp, in, sizeof(in), out, sizeof(out)); + if (status != UCS_OK) { + return UCS_ERR_IO_ERROR; + } + + ctx = UCT_IB_MLX5DV_ADDR_OF(query_qp_out, out, qpc); + *dest_qpn = UCT_IB_MLX5DV_GET(qpc, ctx, remote_qpn); + ah_attr->dlid = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.rlid); + ah_attr->sl = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.sl); + ah_attr->port_num = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.vhca_port_num); + ah_attr->static_rate = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.stat_rate); + ah_attr->src_path_bits = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.mlid); + ah_attr->is_global = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.grh) || + uct_ib_iface_is_roce(iface); + ah_attr->grh.sgid_index = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.src_addr_index); + ah_attr->grh.traffic_class = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.tclass); + ah_attr->grh.flow_label = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.flow_label); + ah_attr->grh.hop_limit = UCT_IB_MLX5DV_GET(qpc, ctx, + primary_address_path.hop_limit); + + if (ah_attr->is_global) { + memcpy(ah_attr->grh.dgid.raw, + UCT_IB_MLX5DV_ADDR_OF(qpc, ctx, primary_address_path.rgid_rip), + sizeof(ah_attr->grh.dgid.raw)); + } + + return UCS_OK; +} + #endif ucs_status_t uct_ib_mlx5dv_arm_cq(uct_ib_mlx5_cq_t *cq, int solicited) diff --git a/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h b/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h index de6a185d098..452c637bddc 100644 --- a/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h +++ b/src/uct/ib/mlx5/dv/ib_mlx5_ifc.h @@ -70,6 +70,7 @@ enum { UCT_IB_MLX5_CMD_OP_RTR2RTS_QP = 0x504, UCT_IB_MLX5_CMD_OP_2ERR_QP = 0x507, UCT_IB_MLX5_CMD_OP_2RST_QP = 0x50a, + UCT_IB_MLX5_CMD_OP_QUERY_QP = 0x50b, UCT_IB_MLX5_CMD_OP_CREATE_RMP = 0x90c, UCT_IB_MLX5_CMD_OP_CREATE_DCT = 0x710, UCT_IB_MLX5_CMD_OP_DRAIN_DCT = 0x712, @@ -83,8 +84,8 @@ enum { }; enum { - UCT_IB_MLX5_HCA_CAP_OPMOD_GET_MAX = 0, - UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR = 1 + UCT_IB_MLX5_HCA_CAP_OPMOD_GET_MAX = 0, + UCT_IB_MLX5_HCA_CAP_OPMOD_GET_CUR = 1 }; enum { @@ -1522,6 +1523,38 @@ struct uct_ib_mlx5_modify_qp_in_bits { uint8_t reserved_at_60[0x20]; }; +struct uct_ib_mlx5_query_qp_out_bits { + uint8_t status[0x8]; + uint8_t reserved_at_8[0x18]; + + uint8_t syndrome[0x20]; + + uint8_t reserved_at_40[0x40]; + + uint8_t opt_param_mask[0x20]; + + uint8_t reserved_at_a0[0x20]; + + struct uct_ib_mlx5_qpc_bits qpc; + + uint8_t reserved_at_800[0x80]; + + uint8_t pas[0][0x40]; +}; + +struct uct_ib_mlx5_query_qp_in_bits { + uint8_t opcode[0x10]; + uint8_t reserved_at_10[0x10]; + + uint8_t reserved_at_20[0x10]; + uint8_t op_mod[0x10]; + + uint8_t reserved_at_40[0x8]; + uint8_t qpn[0x18]; + + uint8_t reserved_at_60[0x20]; +}; + enum { UCT_IB_MLX5_EVENT_TYPE_SRQ_LAST_WQE = 0x13 }; diff --git a/src/uct/ib/mlx5/ib_mlx5.c b/src/uct/ib/mlx5/ib_mlx5.c index afd57bd4738..3dd0b55b502 100644 --- a/src/uct/ib/mlx5/ib_mlx5.c +++ b/src/uct/ib/mlx5/ib_mlx5.c @@ -738,6 +738,20 @@ ucs_status_t uct_ib_mlx5_modify_qp_state(uct_ib_mlx5_md_t *md, } } +ucs_status_t +uct_ib_mlx5_query_qp_peer_info(uct_ib_iface_t *iface, uct_ib_mlx5_qp_t *qp, + struct ibv_ah_attr *ah_attr, uint32_t *dest_qpn) +{ + uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t); + + if (md->flags & UCT_IB_MLX5_MD_FLAG_DEVX) { + return uct_ib_mlx5_devx_query_qp_peer_info(iface, qp, ah_attr, + dest_qpn); + } else { + return uct_ib_query_qp_peer_info(qp->verbs.qp, ah_attr, dest_qpn); + } +} + ucs_status_t uct_ib_mlx5_md_get_atomic_mr_id(uct_ib_md_t *ibmd, uint8_t *mr_id) { uct_ib_mlx5_md_t *md = ucs_derived_of(ibmd, uct_ib_mlx5_md_t); diff --git a/src/uct/ib/mlx5/ib_mlx5.h b/src/uct/ib/mlx5/ib_mlx5.h index e8c5182f443..46ec5dbac44 100644 --- a/src/uct/ib/mlx5/ib_mlx5.h +++ b/src/uct/ib/mlx5/ib_mlx5.h @@ -495,6 +495,10 @@ ucs_status_t uct_ib_mlx5_modify_qp_state(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp, enum ibv_qp_state state); +ucs_status_t +uct_ib_mlx5_query_qp_peer_info(uct_ib_iface_t *iface, uct_ib_mlx5_qp_t *qp, + struct ibv_ah_attr *ah_attr, uint32_t *dest_qpn); + void uct_ib_mlx5_destroy_qp(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp); /** @@ -611,6 +615,11 @@ void uct_ib_mlx5_devx_set_qpc_port_affinity(uct_ib_mlx5_md_t *md, uint8_t path_index, void *qpc, uint32_t *opt_param_mask); +ucs_status_t +uct_ib_mlx5_devx_query_qp_peer_info(uct_ib_iface_t *iface, uct_ib_mlx5_qp_t *qp, + struct ibv_ah_attr *ah_attr, + uint32_t *dest_qpn); + static inline ucs_status_t uct_ib_mlx5_md_buf_alloc(uct_ib_mlx5_md_t *md, size_t size, int silent, void **buf_p, uct_ib_mlx5_devx_umem_t *mem, @@ -700,6 +709,14 @@ uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp, enum ibv_qp_state state) return UCS_ERR_UNSUPPORTED; } +static inline ucs_status_t +uct_ib_mlx5_devx_query_qp_peer_info(uct_ib_iface_t *iface, uct_ib_mlx5_qp_t *qp, + struct ibv_ah_attr *ah_attr, + uint32_t *dest_qpn) +{ + return UCS_ERR_UNSUPPORTED; +} + static inline void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp) { } #endif diff --git a/src/uct/ib/mlx5/ib_mlx5_log.c b/src/uct/ib/mlx5/ib_mlx5_log.c index cb9f375f196..1c5fe375acd 100644 --- a/src/uct/ib/mlx5/ib_mlx5_log.c +++ b/src/uct/ib/mlx5/ib_mlx5_log.c @@ -53,17 +53,33 @@ static const char *uct_ib_mlx5_cqe_err_opcode(uct_ib_mlx5_err_cqe_t *ecqe) } } +static int uct_ib_mlx5_is_qp_require_av_seg(int qp_type) +{ + if (qp_type == IBV_QPT_UD) { + return 1; + } +#if HAVE_TL_DC + if (qp_type == UCT_IB_QPT_DCI) { + return 1; + } +#endif + return 0; +} + ucs_status_t uct_ib_mlx5_completion_with_err(uct_ib_iface_t *iface, uct_ib_mlx5_err_cqe_t *ecqe, uct_ib_mlx5_txwq_t *txwq, ucs_log_level_t log_level) { - ucs_status_t status = UCS_ERR_IO_ERROR; - char err_info[256] = {}; - char wqe_info[256] = {}; - uint16_t wqe_index; - uint32_t qp_num; - void *wqe; + ucs_status_t status = UCS_ERR_IO_ERROR; + char err_info[256] = {}; + char wqe_info[256] = {}; + char peer_info[128] = {}; + uint16_t wqe_index; + uint32_t qp_num; + void *wqe; + unsigned dest_qpn; + struct ibv_ah_attr ah_attr; wqe_index = ntohs(ecqe->wqe_counter); qp_num = ntohl(ecqe->s_wqe_opcode_qpn) & UCS_MASK(UCT_IB_QPN_ORDER); @@ -132,6 +148,17 @@ ucs_status_t uct_ib_mlx5_completion_with_err(uct_ib_iface_t *iface, wqe = UCS_PTR_BYTE_OFFSET(txwq->qstart, MLX5_SEND_WQE_BB * wqe_index); uct_ib_mlx5_wqe_dump(iface, wqe, txwq->qstart, txwq->qend, INT_MAX, 0, NULL, wqe_info, sizeof(wqe_info) - 1, NULL); + + /* If av is not required by the transport need to dump remote QP info, + * because it will not be shown in the wqe dump */ + if (!uct_ib_mlx5_is_qp_require_av_seg(iface->config.qp_type)) { + status = uct_ib_mlx5_query_qp_peer_info(iface, &txwq->super, + &ah_attr, &dest_qpn); + if (status == UCS_OK) { + uct_ib_log_dump_qp_peer_info(iface, &ah_attr, dest_qpn, + peer_info, sizeof(peer_info)); + } + } } else { snprintf(wqe_info, sizeof(wqe_info) - 1, "opcode %s", uct_ib_mlx5_cqe_err_opcode(ecqe)); @@ -139,12 +166,12 @@ ucs_status_t uct_ib_mlx5_completion_with_err(uct_ib_iface_t *iface, ucs_log(log_level, "%s on "UCT_IB_IFACE_FMT"/%s (synd 0x%x vend 0x%x hw_synd %d/%d)\n" - "%s QP 0x%x wqe[%d]: %s", + "%s QP 0x%x wqe[%d]: %s %s", err_info, UCT_IB_IFACE_ARG(iface), uct_ib_iface_is_roce(iface) ? "RoCE" : "IB", ecqe->syndrome, ecqe->vendor_err_synd, ecqe->hw_synd_type >> 4, ecqe->hw_err_synd, uct_ib_qp_type_str(iface->config.qp_type), - qp_num, wqe_index, wqe_info); + qp_num, wqe_index, wqe_info, peer_info); out: return status; @@ -228,19 +255,6 @@ static size_t uct_ib_mlx5_dump_dgram(char *buf, size_t max, void *seg, int is_et UCT_IB_MLX5_AV_FULL_SIZE : UCT_IB_MLX5_AV_BASE_SIZE; } -static int uct_ib_mlx5_is_qp_require_av_seg(int qp_type) -{ - if (qp_type == IBV_QPT_UD) { - return 1; - } -#if HAVE_TL_DC - if (qp_type == UCT_IB_QPT_DCI) { - return 1; - } -#endif - return 0; -} - static void uct_ib_mlx5_wqe_dump(uct_ib_iface_t *iface, void *wqe, void *qstart, void *qend, int max_sge, int dump_qp, uct_log_data_dump_func_t packet_dump_cb, diff --git a/src/uct/ib/rc/verbs/rc_verbs_iface.c b/src/uct/ib/rc/verbs/rc_verbs_iface.c index 174491ce150..83bae53da4c 100644 --- a/src/uct/ib/rc/verbs/rc_verbs_iface.c +++ b/src/uct/ib/rc/verbs/rc_verbs_iface.c @@ -79,12 +79,15 @@ uct_rc_verbs_update_tx_res(uct_rc_iface_t *iface, uct_rc_verbs_ep_t *ep, static void uct_rc_verbs_handle_failure(uct_ib_iface_t *ib_iface, void *arg, ucs_status_t ep_status) { - struct ibv_wc *wc = arg; - uct_rc_iface_t *iface = ucs_derived_of(ib_iface, uct_rc_iface_t); - ucs_log_level_t log_lvl = UCS_LOG_LEVEL_FATAL; + struct ibv_wc *wc = arg; + uct_rc_iface_t *iface = ucs_derived_of(ib_iface, uct_rc_iface_t); + ucs_log_level_t log_lvl = UCS_LOG_LEVEL_FATAL; + char peer_info[128] = {}; + unsigned dest_qpn; uct_rc_verbs_ep_t *ep; - ucs_status_t status; - unsigned count; + ucs_status_t status; + unsigned count; + struct ibv_ah_attr ah_attr; ep = ucs_derived_of(uct_rc_iface_lookup_ep(iface, wc->qp_num), uct_rc_verbs_ep_t); @@ -111,10 +114,16 @@ static void uct_rc_verbs_handle_failure(uct_ib_iface_t *ib_iface, void *arg, &ep->super.super.super, ep_status); log_lvl = uct_base_iface_failure_log_level(&ib_iface->super, status, ep_status); + status = uct_ib_query_qp_peer_info(ep->qp, &ah_attr, &dest_qpn); + if (status == UCS_OK) { + uct_ib_log_dump_qp_peer_info(ib_iface, &ah_attr, dest_qpn, peer_info, + sizeof(peer_info)); + } ucs_log(log_lvl, - "send completion with error: %s qpn 0x%x wrid 0x%lx vendor_err 0x%x", - ibv_wc_status_str(wc->status), wc->qp_num, wc->wr_id, wc->vendor_err); + "send completion with error: %s [qpn 0x%x wrid 0x%lx" + "vendor_err 0x%x]\n%s", ibv_wc_status_str(wc->status), wc->qp_num, + wc->wr_id, wc->vendor_err, peer_info); } ucs_status_t uct_rc_verbs_wc_to_ucs_status(enum ibv_wc_status status)