diff --git a/src/ucs/memory/rcache.c b/src/ucs/memory/rcache.c index 7cf2ae8c50a..0c05154bae5 100644 --- a/src/ucs/memory/rcache.c +++ b/src/ucs/memory/rcache.c @@ -336,7 +336,7 @@ static void ucs_rcache_invalidate_range(ucs_rcache_t *rcache, ucs_pgt_addr_t sta } /* Lock must be held in write mode */ -static void ucs_rcache_check_inv_queue(ucs_rcache_t *rcache) +static void ucs_rcache_check_inv_queue_internal(ucs_rcache_t *rcache) { ucs_rcache_inv_entry_t *entry; @@ -489,7 +489,7 @@ ucs_rcache_check_overlap(ucs_rcache_t *rcache, ucs_pgt_addr_t *start, ucs_trace_func("rcache=%s, *start=0x%lx, *end=0x%lx", rcache->name, *start, *end); - ucs_rcache_check_inv_queue(rcache); + ucs_rcache_check_inv_queue_internal(rcache); ucs_rcache_find_regions(rcache, *start, *end - 1, ®ion_list); @@ -739,6 +739,13 @@ void ucs_rcache_region_put(ucs_rcache_t *rcache, ucs_rcache_region_t *region) UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_PUTS, 1); } +void ucs_rcache_check_inv_queue_slow(ucs_rcache_t *rcache) +{ + pthread_rwlock_wrlock(&rcache->lock); + ucs_rcache_check_inv_queue_internal(rcache); + pthread_rwlock_unlock(&rcache->lock); +} + static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params, const char *name, ucs_stats_node_t *stats_parent) { @@ -839,7 +846,7 @@ static UCS_CLASS_CLEANUP_FUNC(ucs_rcache_t) ucm_unset_event_handler(self->params.ucm_events, ucs_rcache_unmapped_callback, self); - ucs_rcache_check_inv_queue(self); + ucs_rcache_check_inv_queue_internal(self); ucs_rcache_purge(self); if (self->lru.count > 0) { diff --git a/src/ucs/memory/rcache_int.h b/src/ucs/memory/rcache_int.h index 2a144adbc2c..c69889a5459 100644 --- a/src/ucs/memory/rcache_int.h +++ b/src/ucs/memory/rcache_int.h @@ -7,7 +7,11 @@ #ifndef UCS_REG_CACHE_INT_H_ #define UCS_REG_CACHE_INT_H_ +#include "rcache.h" + #include +#include +#include #include @@ -61,4 +65,16 @@ struct ucs_rcache { UCS_STATS_NODE_DECLARE(stats) }; + +void ucs_rcache_check_inv_queue_slow(ucs_rcache_t *rcache); + + +static UCS_F_ALWAYS_INLINE void +ucs_rcache_check_inv_queue_fast(ucs_rcache_t *rcache) +{ + if (ucs_unlikely(!ucs_queue_is_empty(&rcache->inv_q))) { + ucs_rcache_check_inv_queue_slow(rcache); + } +} + #endif diff --git a/src/uct/base/uct_md.h b/src/uct/base/uct_md.h index 7754c457e63..97033935d26 100644 --- a/src/uct/base/uct_md.h +++ b/src/uct/base/uct_md.h @@ -15,6 +15,7 @@ #include #include +#include #include diff --git a/src/uct/ib/base/ib_md.h b/src/uct/ib/base/ib_md.h index dc3a2dba952..c98fecf5b29 100644 --- a/src/uct/ib/base/ib_md.h +++ b/src/uct/ib/base/ib_md.h @@ -417,4 +417,13 @@ ucs_status_t uct_ib_md_handle_mr_list_multithreaded(uct_ib_md_t *md, void *address, size_t length, uint64_t access, size_t chunk, struct ibv_mr **mrs); + +static UCS_F_ALWAYS_INLINE void +uct_ib_md_progress(uct_ib_md_t *md) +{ + if (ucs_likely(md->rcache != NULL)){ + ucs_rcache_check_inv_queue_fast(md->rcache); + } +} + #endif diff --git a/src/uct/ib/dc/dc_mlx5.c b/src/uct/ib/dc/dc_mlx5.c index 42e14b2032a..69dc15a58dd 100644 --- a/src/uct/ib/dc/dc_mlx5.c +++ b/src/uct/ib/dc/dc_mlx5.c @@ -239,29 +239,37 @@ uct_dc_mlx5_poll_tx(uct_dc_mlx5_iface_t *iface) return 1; } -static unsigned uct_dc_mlx5_iface_progress(void *arg) +static UCS_F_ALWAYS_INLINE unsigned +uct_dc_mlx5_iface_progress_common(uct_dc_mlx5_iface_t *iface, int poll_flags) { - uct_dc_mlx5_iface_t *iface = arg; unsigned count; - count = uct_rc_mlx5_iface_common_poll_rx(&iface->super, 0); + count = uct_rc_mlx5_iface_common_poll_rx(&iface->super, poll_flags); + if (count > 0) { + return count; + } + + count = uct_dc_mlx5_poll_tx(iface); if (count > 0) { return count; } - return uct_dc_mlx5_poll_tx(iface); + + uct_ib_md_progress(uct_ib_iface_md(&iface->super.super.super)); + return 0; +} + +static unsigned uct_dc_mlx5_iface_progress(void *arg) +{ + uct_dc_mlx5_iface_t *iface = arg; + + return uct_dc_mlx5_iface_progress_common(iface, 0); } static unsigned uct_dc_mlx5_iface_progress_tm(void *arg) { uct_dc_mlx5_iface_t *iface = arg; - unsigned count; - count = uct_rc_mlx5_iface_common_poll_rx(&iface->super, - UCT_RC_MLX5_POLL_FLAG_TM); - if (count > 0) { - return count; - } - return uct_dc_mlx5_poll_tx(iface); + return uct_dc_mlx5_iface_progress_common(iface, UCT_RC_MLX5_POLL_FLAG_TM); } static void UCS_CLASS_DELETE_FUNC_NAME(uct_dc_mlx5_iface_t)(uct_iface_t*); diff --git a/src/uct/ib/rc/accel/rc_mlx5_iface.c b/src/uct/ib/rc/accel/rc_mlx5_iface.c index 5335f4c56b8..7316736c215 100644 --- a/src/uct/ib/rc/accel/rc_mlx5_iface.c +++ b/src/uct/ib/rc/accel/rc_mlx5_iface.c @@ -140,16 +140,30 @@ uct_rc_mlx5_iface_poll_tx(uct_rc_mlx5_iface_common_t *iface) return 1; } -unsigned uct_rc_mlx5_iface_progress(void *arg) +static UCS_F_ALWAYS_INLINE unsigned +uct_rc_mlx5_iface_progress_common(uct_rc_mlx5_iface_common_t *iface, + int poll_flag) { - uct_rc_mlx5_iface_common_t *iface = arg; unsigned count; - count = uct_rc_mlx5_iface_common_poll_rx(iface, UCT_RC_MLX5_POLL_FLAG_HAS_EP); + count = uct_rc_mlx5_iface_common_poll_rx(iface, poll_flag); if (count > 0) { return count; } - return uct_rc_mlx5_iface_poll_tx(iface); + count = uct_rc_mlx5_iface_poll_tx(iface); + if (count > 0) { + return count; + } + + uct_ib_md_progress(uct_ib_iface_md(&iface->super.super)); + return 0; +} + +unsigned uct_rc_mlx5_iface_progress(void *arg) +{ + uct_rc_mlx5_iface_common_t *iface = arg; + + return uct_rc_mlx5_iface_progress_common(iface, UCT_RC_MLX5_POLL_FLAG_HAS_EP); } static ucs_status_t uct_rc_mlx5_iface_query(uct_iface_h tl_iface, uct_iface_attr_t *iface_attr) @@ -320,15 +334,9 @@ ucs_status_t uct_rc_mlx5_iface_create_qp(uct_rc_mlx5_iface_common_t *iface, static UCS_F_MAYBE_UNUSED unsigned uct_rc_mlx5_iface_progress_tm(void *arg) { uct_rc_mlx5_iface_common_t *iface = arg; - unsigned count; - count = uct_rc_mlx5_iface_common_poll_rx(iface, - UCT_RC_MLX5_POLL_FLAG_HAS_EP | + return uct_rc_mlx5_iface_progress_common(iface, UCT_RC_MLX5_POLL_FLAG_HAS_EP | UCT_RC_MLX5_POLL_FLAG_TM); - if (count > 0) { - return count; - } - return uct_rc_mlx5_iface_poll_tx(iface); } #if IBV_HW_TM diff --git a/src/uct/ib/rc/verbs/rc_verbs_iface.c b/src/uct/ib/rc/verbs/rc_verbs_iface.c index 6338a1be01c..4d23196d8d9 100644 --- a/src/uct/ib/rc/verbs/rc_verbs_iface.c +++ b/src/uct/ib/rc/verbs/rc_verbs_iface.c @@ -139,7 +139,13 @@ static unsigned uct_rc_verbs_iface_progress(void *arg) return count; } - return uct_rc_verbs_iface_poll_tx(iface); + count = uct_rc_verbs_iface_poll_tx(iface); + if (count > 0) { + return count; + } + + uct_ib_md_progress(uct_ib_iface_md(&iface->super.super)); + return 0; } static void uct_rc_verbs_iface_init_inl_wrs(uct_rc_verbs_iface_t *iface) diff --git a/src/uct/ib/ud/base/ud_iface.c b/src/uct/ib/ud/base/ud_iface.c index 43822ac64f5..9a734220478 100644 --- a/src/uct/ib/ud/base/ud_iface.c +++ b/src/uct/ib/ud/base/ud_iface.c @@ -847,6 +847,8 @@ static void uct_ud_iface_timer(int timer_id, void *arg) ucs_twheel_sweep(&iface->async.slow_timer, now); uct_ud_iface_async_progress(iface); uct_ud_leave(iface); + + uct_ib_md_progress(uct_ib_iface_md(&iface->super)); } void uct_ud_iface_release_desc(uct_recv_desc_t *self, void *desc)