From 85d2d9d0f1a942dcb0e3e20eddc7df14983e48f3 Mon Sep 17 00:00:00 2001 From: Artemy Kovalyov Date: Fri, 13 Oct 2023 12:08:05 +0300 Subject: [PATCH] UCS/RCACHE: Add support for dynamic region alignment --- src/ucp/core/ucp_mm.c | 47 +++--- src/ucp/core/ucp_mm.inl | 4 +- src/ucs/memory/rcache.c | 218 +++++++++++++++------------- src/ucs/memory/rcache.h | 9 +- src/ucs/memory/rcache.inl | 9 +- src/ucs/memory/rcache_int.h | 5 - src/uct/cuda/gdr_copy/gdr_copy_md.c | 12 +- src/uct/rocm/copy/rocm_copy_md.c | 7 +- src/uct/sm/mm/xpmem/mm_xpmem.c | 5 +- test/gtest/common/test_obj_size.cc | 2 +- test/gtest/ucs/test_rcache.cc | 76 +++++++++- 11 files changed, 229 insertions(+), 165 deletions(-) diff --git a/src/ucp/core/ucp_mm.c b/src/ucp/core/ucp_mm.c index 57dbdb23632..3481f40b7be 100644 --- a/src/ucp/core/ucp_mm.c +++ b/src/ucp/core/ucp_mm.c @@ -582,9 +582,10 @@ ucp_memh_create(ucp_context_h context, void *address, size_t length, return UCS_OK; } -static ucs_status_t -ucp_memh_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, - ucs_memory_type_t mem_type, ucp_mem_h *memh_p) +static ucs_status_t ucp_memh_rcache_get(ucs_rcache_t *rcache, void *address, + size_t length, + ucs_memory_type_t mem_type, + size_t alignment, ucp_mem_h *memh_p) { ucp_mem_attr_t attr = { .mem_type = mem_type @@ -592,8 +593,8 @@ ucp_memh_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, ucs_rcache_region_t *rregion; ucs_status_t status; - status = ucs_rcache_get(rcache, address, length, PROT_READ | PROT_WRITE, - &attr, &rregion); + status = ucs_rcache_get(rcache, address, length, alignment, + PROT_READ | PROT_WRITE, &attr, &rregion); if (status != UCS_OK) { return status; } @@ -713,26 +714,18 @@ static ucs_status_t ucp_memh_init_uct_reg(ucp_context_h context, ucp_mem_h memh, return status; } -static void -ucp_memh_reg_align(ucp_context_h context, void **address_p, size_t *length_p, - ucp_md_map_t reg_md_map) +static size_t ucp_memh_reg_align(ucp_context_h context, ucp_md_map_t reg_md_map) { size_t reg_align = UCS_RCACHE_MIN_ALIGNMENT; const uct_md_attr_v2_t *md_attr; ucp_md_index_t md_index; - void *start, *end; ucs_for_each_bit(md_index, reg_md_map) { md_attr = &context->tl_mds[md_index].attr; reg_align = ucs_max(md_attr->reg_alignment, reg_align); } - start = ucs_align_down_pow2_ptr(*address_p, reg_align); - end = ucs_align_up_pow2_ptr(UCS_PTR_BYTE_OFFSET(*address_p, *length_p), - reg_align); - - *address_p = start; - *length_p = UCS_PTR_BYTE_DIFF(start, end); + return reg_align; } ucs_status_t ucp_memh_get_slow(ucp_context_h context, void *address, @@ -745,6 +738,7 @@ ucs_status_t ucp_memh_get_slow(ucp_context_h context, void *address, size_t reg_length; ucs_status_t status; ucs_memory_info_t mem_info; + size_t reg_align; if (context->config.ext.reg_whole_alloc_bitmap & UCS_BIT(mem_type)) { ucp_memory_detect_internal(context, address, length, &mem_info); @@ -755,13 +749,7 @@ ucs_status_t ucp_memh_get_slow(ucp_context_h context, void *address, reg_length = length; } - ucp_memh_reg_align(context, ®_address, ®_length, reg_md_map); - - ucs_trace( - "memh_get_slow: %s address %p/%p length %zu/%zu %s md_map %" PRIx64 - " flags 0x%x", - alloc_name, address, reg_address, length, reg_length, - ucs_memory_type_names[mem_type], reg_md_map, uct_flags); + reg_align = ucp_memh_reg_align(context, reg_md_map); UCP_THREAD_CS_ENTER(&context->mt_lock); if (context->rcache == NULL) { @@ -769,7 +757,7 @@ ucs_status_t ucp_memh_get_slow(ucp_context_h context, void *address, UCT_ALLOC_METHOD_LAST, 0, &memh); } else { status = ucp_memh_rcache_get(context->rcache, reg_address, reg_length, - mem_type, &memh); + mem_type, reg_align, &memh); } if (status != UCS_OK) { @@ -777,6 +765,15 @@ ucs_status_t ucp_memh_get_slow(ucp_context_h context, void *address, } ucs_assert(memh->mem_type == mem_type); + ucs_assert(ucs_padding((intptr_t)ucp_memh_address(memh), reg_align) == 0); + ucs_assert(ucs_padding(ucp_memh_length(memh), reg_align) == 0); + + ucs_trace( + "memh_get_slow: %s address %p/%p length %zu/%zu %s md_map %" PRIx64 + " flags 0x%x", + alloc_name, address, ucp_memh_address(memh), length, + ucp_memh_length(memh), ucs_memory_type_names[mem_type], reg_md_map, + uct_flags); status = ucp_memh_register(context, memh, reg_md_map, uct_flags, alloc_name); @@ -1617,7 +1614,7 @@ ucp_memh_import_slow(ucp_context_h context, ucs_rcache_t *existing_rcache, status = ucp_memh_rcache_get(rcache, unpacked->address, unpacked->length, unpacked->mem_type, - &memh); + UCS_RCACHE_MIN_ALIGNMENT, &memh); if (status != UCS_OK) { goto err_rcache_destroy; } @@ -1697,7 +1694,7 @@ ucp_memh_import(ucp_context_h context, const void *export_mkey_buffer, ucs_assert(rcache != NULL); rregion = ucs_rcache_lookup_unsafe(rcache, unpacked_memh.address, - unpacked_memh.length, + unpacked_memh.length, 1, PROT_READ | PROT_WRITE); if (rregion != NULL) { rcache_memh = ucs_derived_of(rregion, ucp_mem_t); diff --git a/src/ucp/core/ucp_mm.inl b/src/ucp/core/ucp_mm.inl index 54a13ce9d84..a660d1c1b7d 100644 --- a/src/ucp/core/ucp_mm.inl +++ b/src/ucp/core/ucp_mm.inl @@ -50,8 +50,8 @@ ucp_memh_get(ucp_context_h context, void *address, size_t length, if (ucs_likely(context->rcache != NULL)) { UCP_THREAD_CS_ENTER(&context->mt_lock); - rregion = ucs_rcache_lookup_unsafe(context->rcache, address, length, - PROT_READ | PROT_WRITE); + rregion = ucs_rcache_lookup_unsafe(context->rcache, address, length, + 1, PROT_READ | PROT_WRITE); if (rregion == NULL) { goto not_found; } diff --git a/src/ucs/memory/rcache.c b/src/ucs/memory/rcache.c index d2f5f515415..f25243780e7 100644 --- a/src/ucs/memory/rcache.c +++ b/src/ucs/memory/rcache.c @@ -102,11 +102,6 @@ ucs_config_field_t ucs_config_rcache_table[] = { {"RCACHE_OVERHEAD", "auto", "Registration cache lookup overhead", ucs_offsetof(ucs_rcache_config_t, overhead), UCS_CONFIG_TYPE_TIME_UNITS}, - {"RCACHE_ADDR_ALIGN", UCS_PP_MAKE_STRING(UCS_SYS_CACHE_LINE_SIZE), - "Registration cache address alignment, must be power of 2\n" - "between " UCS_PP_MAKE_STRING(UCS_PGT_ADDR_ALIGN) "and system page size", - ucs_offsetof(ucs_rcache_config_t, alignment), UCS_CONFIG_TYPE_MEMUNITS}, - {"RCACHE_MAX_REGIONS", "inf", "Maximal number of regions in the registration cache", ucs_offsetof(ucs_rcache_config_t, max_regions), @@ -184,8 +179,6 @@ void ucs_rcache_region_log(const char *file, int line, const char *function, void ucs_rcache_set_default_params(ucs_rcache_params_t *rcache_params) { rcache_params->region_struct_size = sizeof(ucs_rcache_region_t); - rcache_params->alignment = UCS_RCACHE_MIN_ALIGNMENT; - rcache_params->max_alignment = ucs_get_page_size(); rcache_params->ucm_events = 0; rcache_params->ucm_event_priority = 1000; rcache_params->ops = NULL; @@ -201,7 +194,6 @@ void ucs_rcache_set_params(ucs_rcache_params_t *rcache_params, { ucs_rcache_set_default_params(rcache_params); - rcache_params->alignment = rcache_config->alignment; rcache_params->ucm_event_priority = rcache_config->event_prio; rcache_params->max_regions = rcache_config->max_regions; rcache_params->max_size = rcache_config->max_size; @@ -373,7 +365,7 @@ static void ucs_rcache_region_collect_callback(const ucs_pgtable_t *pgtable, static void ucs_rcache_find_regions(ucs_rcache_t *rcache, ucs_pgt_addr_t from, ucs_pgt_addr_t to, ucs_list_link_t *list) { - ucs_list_head_init(list); + ucs_trace("%s: find regions in 0x%lx..0x%lx", rcache->name, from, to); ucs_pgtable_search_range(&rcache->pgtable, from, to, ucs_rcache_region_collect_callback, list); } @@ -546,6 +538,7 @@ static void ucs_rcache_invalidate_range(ucs_rcache_t *rcache, ucs_pgt_addr_t sta ucs_trace_func("rcache=%s, start=0x%lx, end=0x%lx", rcache->name, start, end); + ucs_list_head_init(®ion_list); ucs_rcache_find_regions(rcache, start, end - 1, ®ion_list); ucs_list_for_each_safe(region, tmp, ®ion_list, tmp_list) { /* all regions on the list are in the page table */ @@ -763,15 +756,86 @@ static void ucs_rcache_lru_evict(ucs_rcache_t *rcache) } } +/* Lock must be held */ +static ucs_status_t +ucs_rcache_check_overlap_one(ucs_rcache_t *rcache, ucs_pgt_addr_t *start, + ucs_pgt_addr_t *end, size_t *alignment, int *prot, + ucs_rcache_region_t *region) +{ + int mem_prot; + + UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_MERGES, 1); + /* + * If we don't provide some of the permissions the other region had, + * we might want to expand our permissions to support them. We can + * do that only if the memory range actually has those permissions. + * This will prevent the users of the other region to kick us out + * the next time. + */ + if (!ucs_test_all_flags(*prot, region->prot)) { + /* A slow path because searching /proc/maps in order to + * check memory protection is very expensive. + * + * TODO: currently rcache is optimized for the case where most of + * the regions have same protection. + */ + mem_prot = UCS_PROFILE_CALL_ALWAYS(ucs_get_mem_prot, *start, *end); + if (!ucs_test_all_flags(mem_prot, *prot)) { + ucs_rcache_region_trace(rcache, region, + "do not merge "UCS_RCACHE_PROT_FMT + " with mem "UCS_RCACHE_PROT_FMT, + UCS_RCACHE_PROT_ARG(*prot), + UCS_RCACHE_PROT_ARG(mem_prot)); + /* The memory protection can not satisfy that of the + * region. However mem_reg still may be able to deal with it. + * Do the safest thing: invalidate cached region + */ + ucs_rcache_region_invalidate_internal( + rcache, region, UCS_RCACHE_REGION_PUT_FLAG_IN_PGTABLE); + return UCS_ERR_CANCELED; + } else if (ucs_test_all_flags(mem_prot, region->prot)) { + *prot |= region->prot; + } else { + /* Could not support other region's permissions - so do not merge + * with it. If anybody will use the other region, this will kick + * out our region, and may potentially lead to ineffective use + * of the cache. We can't solve it as long as we have only one + * page table, since it does not allow overlap. + */ + ucs_rcache_region_trace(rcache, region, + "do not merge mem "UCS_RCACHE_PROT_FMT" with", + UCS_RCACHE_PROT_ARG(mem_prot)); + ucs_rcache_region_invalidate_internal( + rcache, region, UCS_RCACHE_REGION_PUT_FLAG_IN_PGTABLE); + return UCS_ERR_CANCELED; + } + } + + ucs_rcache_region_trace(rcache, region, + "merge 0x%lx..0x%lx "UCS_RCACHE_PROT_FMT" with", + *start, *end, UCS_RCACHE_PROT_ARG(*prot)); + *alignment = ucs_max(*alignment, region->alignment); + *start = ucs_min(*start, region->super.start); + *end = ucs_max(*end, region->super.end); + + /* coverity[double_unlock] */ + /* coverity[double_lock] */ + ucs_rcache_region_invalidate_internal( + rcache, region, UCS_RCACHE_REGION_PUT_FLAG_IN_PGTABLE); + + return UCS_OK; +} + /* Lock must be held */ static ucs_status_t ucs_rcache_check_overlap(ucs_rcache_t *rcache, ucs_pgt_addr_t *start, - ucs_pgt_addr_t *end, int *prot, int *merged, - ucs_rcache_region_t **region_p) + ucs_pgt_addr_t *end, size_t *alignment, int *prot, + int *merged, ucs_rcache_region_t **region_p) { ucs_rcache_region_t *region, *tmp; + ucs_pgt_addr_t old_start, old_end; ucs_list_link_t region_list; - int mem_prot; + ucs_status_t status; ucs_trace_func("rcache=%s, *start=0x%lx, *end=0x%lx", rcache->name, *start, *end); @@ -780,78 +844,43 @@ ucs_rcache_check_overlap(ucs_rcache_t *rcache, ucs_pgt_addr_t *start, /* coverity[double_unlock] */ ucs_rcache_check_gc_list(rcache, 1); + ucs_list_head_init(®ion_list); ucs_rcache_find_regions(rcache, *start, *end - 1, ®ion_list); - /* TODO check if any of the regions is locked */ + region = ucs_list_next(®ion_list, ucs_rcache_region_t, tmp_list); + if (ucs_list_is_only(®ion_list, ®ion->tmp_list) && + (*start >= region->super.start) && (*end <= region->super.end) && + ucs_rcache_region_test(region, *prot, *alignment)) { + /* Found a region which contains the given address range */ + ucs_rcache_region_hold(rcache, region); + *region_p = region; + return UCS_ERR_ALREADY_EXISTS; + } - ucs_list_for_each_safe(region, tmp, ®ion_list, tmp_list) { - if ((*start >= region->super.start) && (*end <= region->super.end) && - ucs_rcache_region_test(region, *prot)) - { - /* Found a region which contains the given address range */ - ucs_rcache_region_hold(rcache, region); - *region_p = region; - return UCS_ERR_ALREADY_EXISTS; + /* TODO check if any of the regions is locked */ + do { + old_start = *start; + old_end = *end; + + ucs_list_for_each_safe(region, tmp, ®ion_list, tmp_list) { + status = ucs_rcache_check_overlap_one(rcache, start, end, alignment, + prot, region); + if (status == UCS_OK) { + *merged = 1; + } } - UCS_STATS_UPDATE_COUNTER(rcache->stats, UCS_RCACHE_MERGES, 1); - /* - * If we don't provide some of the permissions the other region had, - * we might want to expand our permissions to support them. We can - * do that only if the memory range actually has those permissions. - * This will prevent the users of the other region to kick us out - * the next time. + *start = ucs_align_down_pow2(*start, *alignment); + *end = ucs_align_up_pow2(*end, *alignment); + + /* Overlapping regions might necessitate a larger alignment, which, + * in turn, can result in even more overlapping regions. */ - if (!ucs_test_all_flags(*prot, region->prot)) { - /* A slow path because searching /proc/maps in order to - * check memory protection is very expensive. - * - * TODO: currently rcache is optimized for the case where most of - * the regions have same protection. - */ - mem_prot = UCS_PROFILE_CALL_ALWAYS(ucs_get_mem_prot, *start, *end); - if (!ucs_test_all_flags(mem_prot, *prot)) { - ucs_rcache_region_trace(rcache, region, - "do not merge "UCS_RCACHE_PROT_FMT - " with mem "UCS_RCACHE_PROT_FMT, - UCS_RCACHE_PROT_ARG(*prot), - UCS_RCACHE_PROT_ARG(mem_prot)); - /* The memory protection can not satisfy that of the - * region. However mem_reg still may be able to deal with it. - * Do the safest thing: invalidate cached region - */ - ucs_rcache_region_invalidate_internal( - rcache, region, UCS_RCACHE_REGION_PUT_FLAG_IN_PGTABLE); - continue; - } else if (ucs_test_all_flags(mem_prot, region->prot)) { - *prot |= region->prot; - } else { - /* Could not support other region's permissions - so do not merge - * with it. If anybody will use the other region, this will kick - * out our region, and may potentially lead to ineffective use - * of the cache. We can't solve it as long as we have only one - * page table, since it does not allow overlap. - */ - ucs_rcache_region_trace(rcache, region, - "do not merge mem "UCS_RCACHE_PROT_FMT" with", - UCS_RCACHE_PROT_ARG(mem_prot)); - ucs_rcache_region_invalidate_internal( - rcache, region, UCS_RCACHE_REGION_PUT_FLAG_IN_PGTABLE); - continue; - } - } + ucs_list_head_init(®ion_list); + ucs_rcache_find_regions(rcache, *start, old_start - 1, ®ion_list); + ucs_rcache_find_regions(rcache, old_end, *end - 1, ®ion_list); + } while (!ucs_list_is_empty(®ion_list)); - ucs_rcache_region_trace(rcache, region, - "merge 0x%lx..0x%lx "UCS_RCACHE_PROT_FMT" with", - *start, *end, UCS_RCACHE_PROT_ARG(*prot)); - *start = ucs_min(*start, region->super.start); - *end = ucs_max(*end, region->super.end); - *merged = 1; - /* coverity[double_unlock] */ - /* coverity[double_lock] */ - ucs_rcache_region_invalidate_internal( - rcache, region, UCS_RCACHE_REGION_PUT_FLAG_IN_PGTABLE); - } return UCS_OK; } @@ -887,9 +916,9 @@ static ucs_status_t ucs_rcache_fill_pfn(ucs_rcache_region_t *region) return status; } -ucs_status_t -ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, size_t length, - int prot, void *arg, ucs_rcache_region_t **region_p) +ucs_status_t ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, + size_t length, size_t alignment, int prot, + void *arg, ucs_rcache_region_t **region_p) { ucs_rcache_region_t *region; ucs_pgt_addr_t start, end; @@ -905,17 +934,15 @@ ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, size_t length, retry: /* Align to page size */ - start = ucs_align_down_pow2((uintptr_t)address, - rcache->params.alignment); - end = ucs_align_up_pow2 ((uintptr_t)address + length, - rcache->params.alignment); + start = ucs_align_down_pow2((uintptr_t)address, alignment); + end = ucs_align_up_pow2 ((uintptr_t)address + length, alignment); region = NULL; merged = 0; /* Check overlap with existing regions */ /* coverity[double_lock] */ status = UCS_PROFILE_CALL(ucs_rcache_check_overlap, rcache, &start, &end, - &prot, &merged, ®ion); + &alignment, &prot, &merged, ®ion); if (status == UCS_ERR_ALREADY_EXISTS) { /* Found a matching region (it could have been added after we released * the lock) @@ -965,6 +992,7 @@ ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, size_t length, region->lru_flags = 0; region->refcount = 1; region->status = UCS_INPROGRESS; + region->alignment = alignment; ++rcache->num_regions; @@ -1033,7 +1061,8 @@ void ucs_rcache_region_hold(ucs_rcache_t *rcache, ucs_rcache_region_t *region) } ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, - int prot, void *arg, ucs_rcache_region_t **region_p) + size_t alignment, int prot, void *arg, + ucs_rcache_region_t **region_p) { ucs_pgt_addr_t start = (uintptr_t)address; ucs_pgt_region_t *pgt_region; @@ -1050,8 +1079,7 @@ ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, if (ucs_likely(pgt_region != NULL)) { region = ucs_derived_of(pgt_region, ucs_rcache_region_t); if (((start + length) <= region->super.end) && - ucs_rcache_region_test(region, prot)) - { + ucs_rcache_region_test(region, prot, alignment)) { ucs_rcache_region_hold(rcache, region); ucs_rcache_region_validate_pfn(rcache, region); ucs_rcache_region_lru_get(rcache, region); @@ -1070,7 +1098,7 @@ ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, * - found unregistered region */ return UCS_PROFILE_CALL(ucs_rcache_create_region, rcache, address, length, - prot, arg, region_p); + alignment, prot, arg, region_p); } void ucs_rcache_region_put(ucs_rcache_t *rcache, ucs_rcache_region_t *region) @@ -1241,18 +1269,6 @@ static UCS_CLASS_INIT_FUNC(ucs_rcache_t, const ucs_rcache_params_t *params, goto err; } - if (!ucs_is_pow2(params->alignment) || - (params->alignment < UCS_RCACHE_MIN_ALIGNMENT) || - (params->alignment > params->max_alignment)) - { - ucs_error("invalid regcache alignment (%zu): must be a power of 2 " - "between %zu and %zu", - params->alignment, UCS_RCACHE_MIN_ALIGNMENT, - params->max_alignment); - status = UCS_ERR_INVALID_PARAM; - goto err; - } - self->name = ucs_strdup(name, "ucs rcache name"); if (self->name == NULL) { status = UCS_ERR_NO_MEMORY; diff --git a/src/ucs/memory/rcache.h b/src/ucs/memory/rcache.h index 6ae1da835f6..ec52cd37ff4 100644 --- a/src/ucs/memory/rcache.h +++ b/src/ucs/memory/rcache.h @@ -130,10 +130,6 @@ struct ucs_rcache_params { size_t region_struct_size; /**< Size of memory region structure, must be at least the size of @ref ucs_rcache_region_t */ - size_t alignment; /**< Force-align regions to this size. - Must be smaller or equal to - system page size. */ - size_t max_alignment; /**< Maximum alignment */ int ucm_events; /**< UCM events to register. Currently UCM_EVENT_VM_UNMAPPED and UCM_EVENT_MEM_TYPE_FREE are supported */ @@ -167,6 +163,7 @@ struct ucs_rcache_region { ucs_list_link_t lru_list; /**< LRU list element */ ucs_list_link_t tmp_list; /**< Temp list element */ ucs_list_link_t comp_list; /**< Completion list element */ + size_t alignment; volatile uint32_t refcount; /**< Reference count, including +1 if it's in the page table */ ucs_status_t status; /**< Current status code */ @@ -210,6 +207,7 @@ void ucs_rcache_destroy(ucs_rcache_t *rcache); * @param [in] rcache Memory registration cache. * @param [in] address Address to register or resolve. * @param [in] length Length of buffer to register or resolve. + * @param [in] alignment Alignment for registration buffer. * @param [in] prot Requested access flags, PROT_xx (same as passed to mmap). * @param [in] arg Custom argument passed down to memory registration * callback, if a memory registration happens during @@ -223,7 +221,8 @@ void ucs_rcache_destroy(ucs_rcache_t *rcache); * @return Error code. */ ucs_status_t ucs_rcache_get(ucs_rcache_t *rcache, void *address, size_t length, - int prot, void *arg, ucs_rcache_region_t **region_p); + size_t alignment, int prot, void *arg, + ucs_rcache_region_t **region_p); /** diff --git a/src/ucs/memory/rcache.inl b/src/ucs/memory/rcache.inl index 024b660d518..16d30da2ca4 100644 --- a/src/ucs/memory/rcache.inl +++ b/src/ucs/memory/rcache.inl @@ -10,10 +10,11 @@ #include "rcache_int.h" static UCS_F_ALWAYS_INLINE int -ucs_rcache_region_test(ucs_rcache_region_t *region, int prot) +ucs_rcache_region_test(ucs_rcache_region_t *region, int prot, size_t alignment) { return (region->flags & UCS_RCACHE_REGION_FLAG_REGISTERED) && - ucs_test_all_flags(region->prot, prot); + ucs_test_all_flags(region->prot, prot) && + ((alignment == 1) || (region->alignment >= alignment)); } @@ -47,7 +48,7 @@ ucs_rcache_region_lru_remove(ucs_rcache_t *rcache, ucs_rcache_region_t *region) static UCS_F_ALWAYS_INLINE ucs_rcache_region_t * ucs_rcache_lookup_unsafe(ucs_rcache_t *rcache, void *address, size_t length, - int prot) + size_t alignment, int prot) { ucs_pgt_addr_t start = (uintptr_t)address; ucs_pgt_region_t *pgt_region; @@ -68,7 +69,7 @@ ucs_rcache_lookup_unsafe(ucs_rcache_t *rcache, void *address, size_t length, region = ucs_derived_of(pgt_region, ucs_rcache_region_t); if (((start + length) > region->super.end) || - !ucs_rcache_region_test(region, prot)) + !ucs_rcache_region_test(region, prot, alignment)) { return NULL; } diff --git a/src/ucs/memory/rcache_int.h b/src/ucs/memory/rcache_int.h index 6e399cd4a0a..90b7c5cf084 100644 --- a/src/ucs/memory/rcache_int.h +++ b/src/ucs/memory/rcache_int.h @@ -131,11 +131,6 @@ void ucs_rcache_atfork_disable(); size_t ucs_rcache_distribution_get_num_bins(); -ucs_status_t -ucs_rcache_create_region(ucs_rcache_t *rcache, void *address, size_t length, - int prot, void *arg, ucs_rcache_region_t **region_p); - - void ucs_mem_region_destroy_internal(ucs_rcache_t *rcache, ucs_rcache_region_t *region, int drop_lock); diff --git a/src/uct/cuda/gdr_copy/gdr_copy_md.c b/src/uct/cuda/gdr_copy/gdr_copy_md.c index e193f9d050c..16d136ea13e 100644 --- a/src/uct/cuda/gdr_copy/gdr_copy_md.c +++ b/src/uct/cuda/gdr_copy/gdr_copy_md.c @@ -185,7 +185,6 @@ uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length, const uct_md_mem_reg_params_t *params, uct_mem_h *memh_p) { uct_gdr_copy_mem_t *mem_hndl = NULL; - void *start, *end; ucs_status_t status; mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle"); @@ -194,13 +193,10 @@ uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length, return UCS_ERR_NO_MEMORY; } - start = ucs_align_down_pow2_ptr(address, GPU_PAGE_SIZE); - end = ucs_align_up_pow2_ptr(UCS_PTR_BYTE_OFFSET(address, length), GPU_PAGE_SIZE); - ucs_assert_always(start <= end); - - status = uct_gdr_copy_mem_reg_internal(uct_md, start, - UCS_PTR_BYTE_DIFF(start, end), - 0, mem_hndl); + ucs_assert(ucs_padding((intptr_t)address, GPU_PAGE_SIZE) == 0); + ucs_assert(ucs_padding(length, GPU_PAGE_SIZE) == 0); + status = uct_gdr_copy_mem_reg_internal(uct_md, address, length, 0, + mem_hndl); if (status != UCS_OK) { ucs_free(mem_hndl); return status; diff --git a/src/uct/rocm/copy/rocm_copy_md.c b/src/uct/rocm/copy/rocm_copy_md.c index 29197920a29..22b74167099 100644 --- a/src/uct/rocm/copy/rocm_copy_md.c +++ b/src/uct/rocm/copy/rocm_copy_md.c @@ -314,8 +314,9 @@ uct_rocm_copy_mem_rcache_reg(uct_md_h uct_md, void *address, size_t length, ucs_status_t status; uct_rocm_copy_mem_t *memh; - status = ucs_rcache_get(md->rcache, (void *)address, length, PROT_READ|PROT_WRITE, - &flags, &rregion); + status = ucs_rcache_get(md->rcache, (void *)address, length, + ucs_get_page_size(), PROT_READ | PROT_WRITE, &flags, + &rregion); if (status != UCS_OK) { return status; } @@ -431,8 +432,6 @@ uct_rocm_copy_md_open(uct_component_h component, const char *md_name, if (md_config->enable_rcache != UCS_NO) { ucs_rcache_set_params(&rcache_params, &md_config->rcache); rcache_params.region_struct_size = sizeof(uct_rocm_copy_rcache_region_t); - rcache_params.alignment = ucs_get_page_size(); - rcache_params.max_alignment = ucs_get_page_size(); rcache_params.ucm_events = UCM_EVENT_MEM_TYPE_FREE; rcache_params.ucm_event_priority = md_config->rcache.event_prio; rcache_params.context = md; diff --git a/src/uct/sm/mm/xpmem/mm_xpmem.c b/src/uct/sm/mm/xpmem/mm_xpmem.c index d1500347996..35bd07fb887 100644 --- a/src/uct/sm/mm/xpmem/mm_xpmem.c +++ b/src/uct/sm/mm/xpmem/mm_xpmem.c @@ -248,8 +248,6 @@ uct_xpmem_rmem_add(xpmem_segid_t xsegid, uct_xpmem_remote_mem_t **rmem_p) } rcache_params.region_struct_size = sizeof(uct_xpmem_remote_region_t); - rcache_params.alignment = ucs_get_page_size(); - rcache_params.max_alignment = ucs_get_page_size(); rcache_params.ucm_events = 0; rcache_params.ucm_event_priority = 0; rcache_params.ops = &uct_xpmem_rcache_ops; @@ -370,7 +368,8 @@ uct_xpmem_mem_attach_common(xpmem_segid_t xsegid, uintptr_t remote_address, end = ucs_align_up_pow2 (remote_address + length, ucs_get_page_size()); status = ucs_rcache_get(rmem->rcache, (void*)start, end - start, - PROT_READ|PROT_WRITE, NULL, &rcache_region); + ucs_get_page_size(), PROT_READ | PROT_WRITE, NULL, + &rcache_region); if (status != UCS_OK) { goto err_rmem_put; } diff --git a/test/gtest/common/test_obj_size.cc b/test/gtest/common/test_obj_size.cc index c18288c27c6..7933162d129 100644 --- a/test/gtest/common/test_obj_size.cc +++ b/test/gtest/common/test_obj_size.cc @@ -59,7 +59,7 @@ UCS_TEST_F(test_obj_size, size) { /* TODO reduce request size to 240 or less after removing old protocols state */ EXPECTED_SIZE(ucp_request_t, 264); EXPECTED_SIZE(ucp_recv_desc_t, 48); - EXPECTED_SIZE(ucp_mem_t, 152); + EXPECTED_SIZE(ucp_mem_t, 160); EXPECTED_SIZE(uct_ep_t, 8); EXPECTED_SIZE(uct_base_ep_t, 8); EXPECTED_SIZE(uct_rkey_bundle_t, 24); diff --git a/test/gtest/ucs/test_rcache.cc b/test/gtest/ucs/test_rcache.cc index 0fdb31f365e..a3662a8a5b9 100644 --- a/test/gtest/ucs/test_rcache.cc +++ b/test/gtest/ucs/test_rcache.cc @@ -20,8 +20,6 @@ static ucs_rcache_params_t get_default_rcache_params(void *context, const ucs_rcache_ops_t *ops) { ucs_rcache_params_t params = {sizeof(ucs_rcache_region_t), - UCS_PGT_ADDR_ALIGN, - ucs_get_page_size(), UCM_EVENT_VM_UNMAPPED, 1000, ops, @@ -102,10 +100,13 @@ class test_rcache : public ucs::test { return params; } - region *get(void *address, size_t length, int prot = PROT_READ|PROT_WRITE) { + region *get(void *address, size_t length, int prot = PROT_READ | PROT_WRITE, + size_t alignment = UCS_PGT_ADDR_ALIGN) + { ucs_status_t status; ucs_rcache_region_t *r; - status = ucs_rcache_get(m_rcache, address, length, prot, NULL, &r); + status = ucs_rcache_get(m_rcache, address, length, alignment, prot, + NULL, &r); ASSERT_UCS_OK(status); EXPECT_TRUE(r != NULL); struct region *region = ucs_derived_of(r, struct region); @@ -401,6 +402,66 @@ UCS_MT_TEST_F(test_rcache, merge, 6) { munmap(mem, size1 + pad + size2); } +UCS_TEST_F(test_rcache, merge_aligned) +{ + /* + * 0 4 5 9 + * +---------+-----+---------+ + * | region1 | pad | region2 | 13 + * +---+-----+-----+----+----+----------+ + * | region3 | + * +--------------------+----+----------+----+ + * | merged | aligned | + * +--------------------+--------------------+ + * 0 8 16 + */ + static const size_t size = 4 * ucs_get_page_size(); + static const size_t align = 8 * ucs_get_page_size(); + static const size_t total_size = 16 * ucs_get_page_size(); + void *mem = NULL; + + region *region1, *region2, *region3, *region1_2, *region2_2, *region3_2; + void *ptr1, *ptr2, *ptr3; + + EXPECT_EQ(posix_memalign(&mem, align, total_size), 0); + memset(mem, 0, total_size); + + /* Create region1 */ + ptr1 = (char*)mem; + region1 = get(ptr1, size); + + /* Create region2 */ + ptr2 = (char*)mem + 5 * ucs_get_page_size(); + region2 = get(ptr2, size); + + /* Create region3 which should merge region1 and region2 */ + ptr3 = (char*)mem + 9 * ucs_get_page_size(); + region3 = get(ptr3, size); + + region3_2 = get(ptr3, size, PROT_READ | PROT_WRITE, align); + EXPECT_NE(region3, region3_2) << /* should be different */ + "region3 0x" << std::hex << region3->super.super.start << "..0x" << + region3->super.super.end << " region3_2 0x" << + region3_2->super.super.start << "..0x" << region3_2->super.super.end; + + EXPECT_EQ(region3_2->super.super.start, (uintptr_t)mem); + EXPECT_EQ(region3_2->super.super.end, (uintptr_t)mem + total_size); + + region1_2 = get(ptr1, size); + region2_2 = get(ptr2, size); + EXPECT_EQ(region3_2, region2_2); /* should be merged */ + EXPECT_EQ(region3_2, region1_2); /* should be merged too */ + + put(region1_2); + put(region2_2); + put(region3_2); + put(region1); + put(region2); + put(region3); + + free(mem); +} + UCS_MT_TEST_F(test_rcache, merge_inv, 6) { /* * Merge with another region which causes immediate invalidation of the @@ -655,7 +716,8 @@ UCS_MT_TEST_F(test_rcache_no_register, register_failure, 10) { ucs_status_t status; ucs_rcache_region_t *r; - status = ucs_rcache_get(m_rcache, ptr, size, PROT_READ|PROT_WRITE, NULL, &r); + status = ucs_rcache_get(m_rcache, ptr, size, UCS_PGT_ADDR_ALIGN, + PROT_READ | PROT_WRITE, NULL, &r); EXPECT_EQ(UCS_ERR_IO_ERROR, status); EXPECT_EQ(0u, m_reg_count); @@ -701,7 +763,8 @@ UCS_MT_TEST_F(test_rcache_no_register, merge_invalid_prot_slow, 5) ucs_status_t status; ucs_rcache_region_t *r; - status = ucs_rcache_get(m_rcache, ptr2, size2, PROT_WRITE, NULL, &r); + status = ucs_rcache_get(m_rcache, ptr2, size2, UCS_PGT_ADDR_ALIGN, + PROT_WRITE, NULL, &r); EXPECT_EQ(UCS_ERR_IO_ERROR, status); barrier(); @@ -717,7 +780,6 @@ class test_rcache_with_limit : public test_rcache { ucs_rcache_params_t params = test_rcache::rcache_params(); params.max_regions = 2; params.max_size = 1000; - params.alignment = 16; return params; }