Skip to content

Commit

Permalink
Extend AoSoA with aligned sub arrays
Browse files Browse the repository at this point in the history
The fixed-size sub arrays of AoSoA are appropriately aligned now by default. This setting is customizable.
  • Loading branch information
bernhardmgruber committed Jan 6, 2024
1 parent 1e2c4bc commit f6b1102
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 28 deletions.
35 changes: 22 additions & 13 deletions include/llama/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,12 @@ namespace llama
typename ArrayExtents,
typename RecordDim,
typename ArrayExtents::value_type Lanes,
mapping::FieldAlignment FA,
typename LinearizeArrayIndexFunctor,
template<typename>
typename PermuteFields>
inline constexpr std::size_t
aosoaLanes<mapping::AoSoA<ArrayExtents, RecordDim, Lanes, LinearizeArrayIndexFunctor, PermuteFields>>
aosoaLanes<mapping::AoSoA<ArrayExtents, RecordDim, Lanes, FA, LinearizeArrayIndexFunctor, PermuteFields>>
= Lanes;
} // namespace internal

Expand Down Expand Up @@ -364,19 +365,23 @@ namespace llama
typename LinearizeArrayIndex,
typename ArrayExtents::value_type LanesSrc,
typename ArrayExtents::value_type LanesDst,
mapping::FieldAlignment AlignSrc,
mapping::FieldAlignment AlignDst,
template<typename>
typename PermuteFields>
struct Copy<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayIndex, PermuteFields>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayIndex, PermuteFields>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>,
std::enable_if_t<LanesSrc != LanesDst>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayIndex, PermuteFields>, SrcBlob>&
srcView,
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayIndex, PermuteFields>, DstBlob>&
dstView,
const View<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,
SrcBlob>& srcView,
View<
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>,
DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
{
Expand All @@ -393,16 +398,18 @@ namespace llama
template<typename>
typename PermuteFields,
typename ArrayExtents::value_type LanesSrc,
mapping::FieldAlignment AlignSrc,
mapping::Blobs DstBlobs,
mapping::SubArrayAlignment DstSubArrayAlignment>
struct Copy<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayIndex, PermuteFields>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,
mapping::SoA<ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, LinearizeArrayIndex, PermuteFields>, SrcBlob>&
srcView,
const View<
mapping::AoSoA<ArrayExtents, RecordDim, LanesSrc, AlignSrc, LinearizeArrayIndex, PermuteFields>,
SrcBlob>& srcView,
View<
mapping::
SoA<ArrayExtents, RecordDim, DstBlobs, DstSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,
Expand All @@ -423,20 +430,22 @@ namespace llama
template<typename>
typename PermuteFields,
typename ArrayExtents::value_type LanesDst,
mapping::FieldAlignment AlignDst,
mapping::Blobs SrcBlobs,
mapping::SubArrayAlignment SrcSubArrayAlignment>
struct Copy<
mapping::SoA<ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayIndex, PermuteFields>>
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>>
{
template<typename SrcBlob, typename DstBlob>
void operator()(
const View<
mapping::
SoA<ArrayExtents, RecordDim, SrcBlobs, SrcSubArrayAlignment, LinearizeArrayIndex, PermuteFields>,
SrcBlob>& srcView,
View<mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, LinearizeArrayIndex, PermuteFields>, DstBlob>&
dstView,
View<
mapping::AoSoA<ArrayExtents, RecordDim, LanesDst, AlignDst, LinearizeArrayIndex, PermuteFields>,
DstBlob>& dstView,
std::size_t threadId,
std::size_t threadCount)
{
Expand Down
32 changes: 26 additions & 6 deletions include/llama/mapping/AoSoA.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ namespace llama::mapping

/// Array of struct of arrays mapping. Used to create a \ref View via \ref allocView.
/// \tparam Lanes The size of the inner arrays of this array of struct of arrays.
/// \tparam TFieldAlignment If Align, padding bytes are inserted to guarantee that struct members are properly
/// aligned. If Pack, struct members are tightly packed.
/// \tparam PermuteFields Defines how the record dimension's fields should be permuted. See \ref
/// PermuteFieldsInOrder, \ref PermuteFieldsIncreasingAlignment, \ref PermuteFieldsDecreasingAlignment and
/// \ref PermuteFieldsMinimizePadding.
Expand All @@ -35,6 +37,7 @@ namespace llama::mapping
typename TArrayExtents,
typename TRecordDim,
typename TArrayExtents::value_type Lanes,
FieldAlignment TFieldAlignment = FieldAlignment::Align,
typename TLinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
template<typename> typename PermuteFields = PermuteFieldsInOrder>
struct AoSoA : MappingBase<TArrayExtents, TRecordDim>
Expand All @@ -45,6 +48,7 @@ namespace llama::mapping

public:
inline static constexpr typename TArrayExtents::value_type lanes = Lanes;
inline static constexpr FieldAlignment fieldAlignment = TFieldAlignment;
using LinearizeArrayIndexFunctor = TLinearizeArrayIndexFunctor;
using Permuter = PermuteFields<FlatRecordDim<TRecordDim>>;
inline static constexpr std::size_t blobCount = 1;
Expand All @@ -61,7 +65,8 @@ namespace llama::mapping

LLAMA_FN_HOST_ACC_INLINE constexpr auto blobSize(size_type) const -> size_type
{
const auto rs = static_cast<size_type>(sizeOf<TRecordDim>);
const auto rs = static_cast<size_type>(
flatSizeOf<typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align>);
return roundUpToMultiple(LinearizeArrayIndexFunctor{}.size(Base::extents()) * rs, Lanes * rs);
}

Expand All @@ -86,8 +91,15 @@ namespace llama::mapping
Permuter::template permute<flatRecordCoord<TRecordDim, RecordCoord<RecordCoords...>>>;
const auto blockIndex = flatArrayIndex / Lanes;
const auto laneIndex = flatArrayIndex % Lanes;
const auto offset = static_cast<size_type>(sizeOf<TRecordDim> * Lanes) * blockIndex
+ static_cast<size_type>(flatOffsetOf<typename Permuter::FlatRecordDim, flatFieldIndex, false>) * Lanes
const auto offset
= static_cast<size_type>(
flatSizeOf<typename Permuter::FlatRecordDim, fieldAlignment == FieldAlignment::Align> * Lanes)
* blockIndex
+ static_cast<size_type>(flatOffsetOf<
typename Permuter::FlatRecordDim,
flatFieldIndex,
fieldAlignment == FieldAlignment::Align>)
* Lanes
+ static_cast<size_type>(sizeof(GetType<TRecordDim, RecordCoord<RecordCoords...>>)) * laneIndex;
return {0, offset};
}
Expand All @@ -98,19 +110,27 @@ namespace llama::mapping
LLAMA_EXPORT
template<
std::size_t Lanes,
FieldAlignment Alignment = FieldAlignment::Align,
typename LinearizeArrayIndexFunctor = LinearizeArrayIndexRight,
template<typename> typename PermuteFields = PermuteFieldsInOrder>
struct BindAoSoA
{
template<typename ArrayExtents, typename RecordDim>
using fn = AoSoA<ArrayExtents, RecordDim, Lanes, LinearizeArrayIndexFunctor, PermuteFields>;
using fn = AoSoA<ArrayExtents, RecordDim, Lanes, Alignment, LinearizeArrayIndexFunctor, PermuteFields>;
};

LLAMA_EXPORT
template<typename Mapping>
inline constexpr bool isAoSoA = false;

LLAMA_EXPORT
template<typename AD, typename RD, typename AD::value_type L, typename Lin, template<typename> typename Perm>
inline constexpr bool isAoSoA<AoSoA<AD, RD, L, Lin, Perm>> = true;
template<
typename AD,
typename RD,
typename AD::value_type L,
FieldAlignment A,
typename Lin,
template<typename>
typename Perm>
inline constexpr bool isAoSoA<AoSoA<AD, RD, L, A, Lin, Perm>> = true;
} // namespace llama::mapping
14 changes: 12 additions & 2 deletions tests/copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,20 @@ namespace
llama::mapping::LinearizeArrayIndexRight>,
// llama::mapping::SoA<ArrayExtents, RecordDim, llama::mapping::Blobs::Single,
// llama::mapping::SubArrayAlignment::Align, llama::mapping::LinearizeArrayIndexLeft>,
llama::mapping::AoSoA<ArrayExtents, RecordDim, 4, llama::mapping::LinearizeArrayIndexRight>,
llama::mapping::AoSoA<
ArrayExtents,
RecordDim,
4,
llama::mapping::FieldAlignment::Align,
llama::mapping::LinearizeArrayIndexRight>,
// llama::mapping::AoSoA<ArrayExtents, RecordDim, 4, llama::mapping::LinearizeArrayIndexLeft>,
// llama::mapping::AoSoA<ArrayExtents, RecordDim, 8, llama::mapping::LinearizeArrayIndexRight>,
llama::mapping::AoSoA<ArrayExtents, RecordDim, 8, llama::mapping::LinearizeArrayIndexLeft>>;
llama::mapping::AoSoA<
ArrayExtents,
RecordDim,
8,
llama::mapping::FieldAlignment::Pack,
llama::mapping::LinearizeArrayIndexLeft>>;

using AllMappings = mp_append<AoSMappings, OtherMappings>;

Expand Down
18 changes: 14 additions & 4 deletions tests/dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,24 @@ TEST_CASE("dump.Particle.SoA_MB")
dump(llama::mapping::MultiBlobSoA<ArrayExtents, Particle>{extents});
}

TEST_CASE("dump.Particle.AoSoA8")
TEST_CASE("dump.Particle.AoSoA8.Align")
{
dump(llama::mapping::AoSoA<ArrayExtents, Particle, 8>{extents});
dump(llama::mapping::AoSoA<ArrayExtents, Particle, 8, llama::mapping::FieldAlignment::Align>{extents});
}

TEST_CASE("dump.Particle.AoSoA32")
TEST_CASE("dump.Particle.AoSoA8.Pack")
{
dump(llama::mapping::AoSoA<ArrayExtents, Particle, 32>{extents});
dump(llama::mapping::AoSoA<ArrayExtents, Particle, 8, llama::mapping::FieldAlignment::Pack>{extents});
}

TEST_CASE("dump.Particle.AoSoA32.Align")
{
dump(llama::mapping::AoSoA<ArrayExtents, Particle, 32, llama::mapping::FieldAlignment::Align>{extents});
}

TEST_CASE("dump.Particle.AoSoA32.Pack")
{
dump(llama::mapping::AoSoA<ArrayExtents, Particle, 32, llama::mapping::FieldAlignment::Pack>{extents});
}

TEST_CASE("dump.Particle.Split.SoA.AoS.1Buffer")
Expand Down
70 changes: 67 additions & 3 deletions tests/mapping.AoSoA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ TEST_CASE("mapping.maxLanes")
STATIC_REQUIRE(llama::mapping::maxLanes<RecordDim2, 512> == 32);
}

TEST_CASE("mapping.AoSoA.4.address")
TEST_CASE("mapping.AoSoA.4.Pack.address")
{
auto test = [](auto arrayExtents)
{
using Mapping = llama::mapping::AoSoA<decltype(arrayExtents), Particle, 4>;
using Mapping
= llama::mapping::AoSoA<decltype(arrayExtents), Particle, 4, llama::mapping::FieldAlignment::Pack>;
auto mapping = Mapping{arrayExtents};
using ArrayIndex = typename Mapping::ArrayExtents::Index;

Expand Down Expand Up @@ -86,10 +87,73 @@ TEST_CASE("mapping.AoSoA.4.address")
test(llama::ArrayExtents<int, 16, 16>{});
}

TEST_CASE("mapping.AoSoA.4.Align.address")
{
auto test = [](auto arrayExtents)
{
using Mapping
= llama::mapping::AoSoA<decltype(arrayExtents), Particle, 4, llama::mapping::FieldAlignment::Align>;
auto mapping = Mapping{arrayExtents};
using ArrayIndex = typename Mapping::ArrayExtents::Index;

{
const auto ai = ArrayIndex{0, 0};
CHECK(mapping.template blobNrAndOffset<0, 0>(ai).offset == 0);
CHECK(mapping.template blobNrAndOffset<0, 1>(ai).offset == 32);
CHECK(mapping.template blobNrAndOffset<0, 2>(ai).offset == 64);
CHECK(mapping.template blobNrAndOffset<1>(ai).offset == 96);
CHECK(mapping.template blobNrAndOffset<2, 0>(ai).offset == 128);
CHECK(mapping.template blobNrAndOffset<2, 1>(ai).offset == 160);
CHECK(mapping.template blobNrAndOffset<2, 2>(ai).offset == 192);
CHECK(mapping.template blobNrAndOffset<3, 0>(ai).offset == 224);
CHECK(mapping.template blobNrAndOffset<3, 1>(ai).offset == 228);
CHECK(mapping.template blobNrAndOffset<3, 2>(ai).offset == 232);
CHECK(mapping.template blobNrAndOffset<3, 3>(ai).offset == 236);
}

{
const auto ai = ArrayIndex{0, 1};
CHECK(mapping.template blobNrAndOffset<0, 0>(ai).offset == 8);
CHECK(mapping.template blobNrAndOffset<0, 1>(ai).offset == 40);
CHECK(mapping.template blobNrAndOffset<0, 2>(ai).offset == 72);
CHECK(mapping.template blobNrAndOffset<1>(ai).offset == 100);
CHECK(mapping.template blobNrAndOffset<2, 0>(ai).offset == 136);
CHECK(mapping.template blobNrAndOffset<2, 1>(ai).offset == 168);
CHECK(mapping.template blobNrAndOffset<2, 2>(ai).offset == 200);
CHECK(mapping.template blobNrAndOffset<3, 0>(ai).offset == 225);
CHECK(mapping.template blobNrAndOffset<3, 1>(ai).offset == 229);
CHECK(mapping.template blobNrAndOffset<3, 2>(ai).offset == 233);
CHECK(mapping.template blobNrAndOffset<3, 3>(ai).offset == 237);
}

{
const auto ai = ArrayIndex{1, 0};
CHECK(mapping.template blobNrAndOffset<0, 0>(ai).offset == 1024);
CHECK(mapping.template blobNrAndOffset<0, 1>(ai).offset == 1056);
CHECK(mapping.template blobNrAndOffset<0, 2>(ai).offset == 1088);
CHECK(mapping.template blobNrAndOffset<1>(ai).offset == 1120);
CHECK(mapping.template blobNrAndOffset<2, 0>(ai).offset == 1152);
CHECK(mapping.template blobNrAndOffset<2, 1>(ai).offset == 1184);
CHECK(mapping.template blobNrAndOffset<2, 2>(ai).offset == 1216);
CHECK(mapping.template blobNrAndOffset<3, 0>(ai).offset == 1248);
CHECK(mapping.template blobNrAndOffset<3, 1>(ai).offset == 1252);
CHECK(mapping.template blobNrAndOffset<3, 2>(ai).offset == 1256);
CHECK(mapping.template blobNrAndOffset<3, 3>(ai).offset == 1260);
}

STATIC_REQUIRE(mapping.blobCount == 1);
CHECK(mapping.blobSize(0) == 16384);
};
test(llama::ArrayExtentsDynamic<std::size_t, 2>{16, 16});
test(llama::ArrayExtents<int, 16, llama::dyn>{16});
test(llama::ArrayExtents<int, llama::dyn, 16>{16});
test(llama::ArrayExtents<int, 16, 16>{});
}

TEST_CASE("AoSoA.size_round_up")
{
using AoSoA = llama::mapping::AoSoA<llama::ArrayExtentsDynamic<std::size_t, 1>, Particle, 4>;
constexpr auto psize = llama::sizeOf<Particle>;
constexpr auto psize = llama::sizeOf<Particle, true>;

CHECK(AoSoA{{0}}.blobSize(0) == 0 * psize);
CHECK(AoSoA{{1}}.blobSize(0) == 4 * psize);
Expand Down

0 comments on commit f6b1102

Please sign in to comment.