From 607cfec4e2819c6ab263aa559547c9c9a4c580b4 Mon Sep 17 00:00:00 2001 From: "Luis E. P" Date: Mon, 3 Aug 2020 18:03:27 -0500 Subject: [PATCH] UCS: Improve clear_cache performance for aarch64 by avoiding unnecessary cache invalidates if caches are coherent --- src/ucs/arch/aarch64/cpu.h | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/ucs/arch/aarch64/cpu.h b/src/ucs/arch/aarch64/cpu.h index d5551ba83a7..b7a4d277cab 100644 --- a/src/ucs/arch/aarch64/cpu.h +++ b/src/ucs/arch/aarch64/cpu.h @@ -160,14 +160,19 @@ static inline void ucs_arch_clear_cache(void *start, void *end) uintptr_t ptr; unsigned icache; unsigned dcache; + unsigned dic; + unsigned idc; unsigned ctr_el0; /* Get cache line size, using ctr_el0 register * * Bits Name Function * ***************************** - * [31] - Reserved, res1. - * [30:28] - Reserved, res0. + * [31] - Reserved, RES1. + * [30] - Reserved, RES0. + * [29] DIC Instruction cache invalidation requirements for data to instruction + * coherence. + * [28] IDC Data cache clean requirements for instruction to data coherence. * [27:24] CWG Cache Write-Back granule. Log2 of the number of words of the * maximum size of memory that can be overwritten as a result of * the eviction of a cache entry that has had a memory location @@ -196,14 +201,28 @@ static inline void ucs_arch_clear_cache(void *start, void *end) asm volatile ("mrs\t%0, ctr_el0":"=r" (ctr_el0)); icache = sizeof(int) << (ctr_el0 & 0xf); dcache = sizeof(int) << ((ctr_el0 >> 16) & 0xf); + dic = (ctr_el0 >> 29) & 0x1; + idc = (ctr_el0 >> 28) & 0x1; - for (ptr = ucs_align_down((uintptr_t)start, dcache); ptr < (uintptr_t)end; ptr += dcache) { - asm volatile ("dc cvau, %0" :: "r" (ptr) : "memory"); + /* + * Check if Data cache clean to the Point of Unification is required for instruction to + * data coherence + */ + if (idc == 0) { + for (ptr = ucs_align_down((uintptr_t)start, dcache); ptr < (uintptr_t)end; ptr += dcache) { + asm volatile ("dc cvau, %0" :: "r" (ptr) : "memory"); + } } - ucs_aarch64_dsb(ish); - for (ptr = ucs_align_down((uintptr_t)start, icache); ptr < (uintptr_t)end; ptr += icache) { - asm volatile ("ic ivau, %0" :: "r" (ptr) : "memory"); + /* + * Check if Instruction cache invalidation to the Point of Unification is required for + * data to instruction coherence. + */ + if (dic == 0) { + ucs_aarch64_dsb(ish); + for (ptr = ucs_align_down((uintptr_t)start, icache); ptr < (uintptr_t)end; ptr += icache) { + asm volatile ("ic ivau, %0" :: "r" (ptr) : "memory"); + } } ucs_aarch64_dsb(ish); ucs_aarch64_isb();