Skip to content

Commit

Permalink
[SYCL][ESIMD] Use passthrough for mask_expand_load (#15664)
Browse files Browse the repository at this point in the history
The unread elements had undefined values, causing sporadic failures.

We can't use the pass_thru arg to gather because that breaks Gen12 which
was working before.

Closes: #15257
Closes: #15653

---------

Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
  • Loading branch information
sarnex authored Oct 11, 2024
1 parent ec57ad7 commit ec979f5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
11 changes: 9 additions & 2 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14257,7 +14257,10 @@ mask_expand_load(const T *p, simd_mask<N> mask, PropertyListT props = {}) {
// becomes an index for compressed store/expanded load operation.
simd<uint32_t, N> offset =
cbit(simd<uint32_t, N>(offsets::value) & pack_mask(mask));
return gather(p, offset * sizeof(T), mask, props);
simd<T, N> pass_thru = 0;
simd<T, N> res = gather(p, offset * sizeof(T), mask, props);
res.merge(pass_thru, !mask);
return res;
}

/// template <typename T, int N, typename AccessorTy,
Expand Down Expand Up @@ -14305,7 +14308,11 @@ mask_expand_load(AccessorTy acc, uint32_t global_offset, simd_mask<N> mask,
// becomes an index for compressed store/expanded load operation.
simd<uint32_t, N> offset =
cbit(simd<uint32_t, N>(offsets::value) & pack_mask(mask));
return gather<T>(acc, offset * sizeof(T) + global_offset, mask, props);
simd<T, N> pass_thru = 0;
simd<T, N> res =
gather<T>(acc, offset * sizeof(T) + global_offset, mask, props);
res.merge(pass_thru, !mask);
return res;
}

/// template <typename T, int N,
Expand Down
3 changes: 0 additions & 3 deletions sycl/test-e2e/ESIMD/mask_expand_load.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out
// RUN: %{run} %t.out

// https://github.com/intel/llvm/issues/14826
// XFAIL: arch-intel_gpu_pvc

// This is a basic test to validate the expanded load API.

#include "esimd_test_utils.hpp"
Expand Down

0 comments on commit ec979f5

Please sign in to comment.