Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCS/ARCH: Add SVE memcpy #5954

Merged
merged 1 commit into from
Dec 1, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion src/ucs/arch/aarch64/cpu.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
* Copyright (C) ARM Ltd. 2016-2020. ALL RIGHTS RESERVED.
* Copyright (C) Stony Brook University. 2016-2020. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand All @@ -19,6 +20,9 @@
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
#ifdef __ARM_FEATURE_SVE
#include <arm_sve.h>
#endif
lyu marked this conversation as resolved.
Show resolved Hide resolved


#define UCS_ARCH_CACHE_LINE_SIZE 64
Expand Down Expand Up @@ -231,10 +235,30 @@ static inline void ucs_arch_clear_cache(void *start, void *end)
}
#endif

#if defined(__ARM_FEATURE_SVE)
static inline void *memcpy_aarch64_sve(void *dest, const void *src, size_t len)
{
uint8_t *dest_u8 = (uint8_t*) dest;
const uint8_t *src_u8 = (uint8_t*) src;
uint64_t i = 0;
svbool_t pg = svwhilelt_b8_u64(i, (uint64_t)len);

do {
yosefe marked this conversation as resolved.
Show resolved Hide resolved
svst1_u8(pg, &dest_u8[i], svld1_u8(pg, &src_u8[i]));
i += svcntb();
pg = svwhilelt_b8_u64(i, (uint64_t)len);
} while (svptest_first(svptrue_b8(), pg));

return dest;
}
#endif

static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len)
{
#if defined(HAVE_AARCH64_THUNDERX2)
return __memcpy_thunderx2(dst, src,len);
return __memcpy_thunderx2(dst, src, len);
#elif defined(__ARM_FEATURE_SVE)
return memcpy_aarch64_sve(dst, src, len);
#else
return memcpy(dst, src, len);
#endif
Expand All @@ -245,6 +269,8 @@ ucs_memcpy_nontemporal(void *dst, const void *src, size_t len)
{
#if defined(HAVE_AARCH64_THUNDERX2)
__memcpy_thunderx2(dst, src,len);
#elif defined(__ARM_FEATURE_SVE)
memcpy_aarch64_sve(dst, src, len);
#else
memcpy(dst, src, len);
#endif
Expand Down