diff --git a/DataFormats/Portable/README.md b/DataFormats/Portable/README.md index 1724c3123d9fc..7a9b80bea684d 100644 --- a/DataFormats/Portable/README.md +++ b/DataFormats/Portable/README.md @@ -126,3 +126,33 @@ should explicitly use the `PortableHostObject` and `PortableHostCollection Modules that implement portable interfaces (_e.g._ producers) should use the generic types based on `ALPAKA_ACCELERATOR_NAMESPACE::PortableObject` or `PortableObject`, and `ALPAKA_ACCELERATOR_NAMESPACE::PortableCollection` or `PortableCollection`. + +## Multi layout collections + +Some use cases require multiple sets of columns of different sizes. This is can be achieved in a single +`PortableCollection` using `PortableCollection2`, `PortableCollection3` and so on up to +`PortableCollection5<...>`. The numbered, fixed size wrappers are needed in order to be added to the ROOT dictionary. +Behind the scenes recursive `PortableHostMultiCollection` and +`ALPAKA_ACCELERATOR_NAMESPACE::PortableDeviceMultiCollection` (note the reversed parameter order) provide +the actual class definitions. + +## ROOT dictionary declaration helper scripts + +In order to be serialized by ROOT, the products need to be added to its dictionary. This happens during `scram build` +as instructed in `/src/classes_dev.xml` and `/src/alpaka/classes_cuda_def.xml` and +`/src/alpaka/classes_rocm_def.xml`. Two scripts generate the code to be added to the xml files. +Both scripts expect the collections to be aliased as in: +``` +using TestDeviceMultiCollection3 = PortableCollection3; +``` + +For the host xml, SoA layouts have to be listed and duplicates should be removed manually is multiple +collections share a same layout. The scripts are called as follows: +``` +./DataFormats/Portable/scripts/portableHostCollectionHints portabletest::TestHostMultiCollection3 \ + portabletest::TestSoALayout portabletest::TestSoALayout2 portabletest::TestSoALayout3 + +./DataFormats/Portable/scripts/portableDeviceCollectionHints portabletest::TestHostMultiCollection3 +``` +The layouts should not be added as parameters for the device collection. Those script can be use equally with the +single layout collections or multi layout collections. \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableCollection.h b/DataFormats/Portable/interface/PortableCollection.h index abc64b99cb0d3..3f69ffdd95491 100644 --- a/DataFormats/Portable/interface/PortableCollection.h +++ b/DataFormats/Portable/interface/PortableCollection.h @@ -22,12 +22,18 @@ namespace traits { using CollectionType = PortableHostCollection; }; + template + class PortableMultiCollectionTrait; } // namespace traits // type alias for a generic SoA-based product template >> using PortableCollection = typename traits::PortableCollectionTrait::CollectionType; +// type alias for a generic SoA-based product +template +using PortableMultiCollection = typename traits::PortableMultiCollectionTrait::CollectionType; + // define how to copy PortableCollection between host and device namespace cms::alpakatools { template @@ -40,6 +46,16 @@ namespace cms::alpakatools { } }; + template + struct CopyToHost> { + template + static auto copyAsync(TQueue& queue, PortableDeviceMultiCollection const& srcData) { + PortableHostMultiCollection dstData(srcData.sizes(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; + template struct CopyToDevice> { template @@ -50,6 +66,17 @@ namespace cms::alpakatools { return dstData; } }; + + template + struct CopyToDevice> { + template + static auto copyAsync(TQueue& queue, PortableHostMultiCollection const& srcData) { + using TDevice = typename alpaka::trait::DevType::type; + PortableDeviceMultiCollection dstData(srcData.sizes(), queue); + alpaka::memcpy(queue, dstData.buffer(), srcData.buffer()); + return dstData; + } + }; } // namespace cms::alpakatools #endif // DataFormats_Portable_interface_PortableCollection_h diff --git a/DataFormats/Portable/interface/PortableCollectionCommon.h b/DataFormats/Portable/interface/PortableCollectionCommon.h new file mode 100644 index 0000000000000..c1f98abc8f9f3 --- /dev/null +++ b/DataFormats/Portable/interface/PortableCollectionCommon.h @@ -0,0 +1,103 @@ +#ifndef DataFormats_Portable_interface_PortableCollectionCommon_h +#define DataFormats_Portable_interface_PortableCollectionCommon_h + +#include +#include +#include + +namespace portablecollection { + + // Note: if there are other uses for this, it could be moved to a central place + template + constexpr void constexpr_for(F&& f) { + if constexpr (Start < End) { + f(std::integral_constant()); + constexpr_for(std::forward(f)); + } + } + + template + struct CollectionLeaf { + CollectionLeaf() = default; + CollectionLeaf(std::byte* buffer, int32_t elements) : layout_(buffer, elements), view_(layout_) {} + template + CollectionLeaf(std::byte* buffer, std::array const& sizes) + : layout_(buffer, sizes[Idx]), view_(layout_) { + static_assert(N >= Idx); + } + using Layout = T; + using View = typename Layout::View; + using ConstView = typename Layout::ConstView; + Layout layout_; // + View view_; //! + // Make sure types are not void. + static_assert(not std::is_same::value); + }; + + template + struct CollectionImpl : public CollectionLeaf, public CollectionImpl { + CollectionImpl() = default; + CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf(buffer, elements) {} + + template + CollectionImpl(std::byte* buffer, std::array const& sizes) + : CollectionLeaf(buffer, sizes), + CollectionImpl(CollectionLeaf::layout_.metadata().nextByte(), sizes) {} + }; + + template + struct CollectionImpl : public CollectionLeaf { + CollectionImpl() = default; + CollectionImpl(std::byte* buffer, int32_t elements) : CollectionLeaf(buffer, elements) {} + + template + CollectionImpl(std::byte* buffer, std::array const& sizes) : CollectionLeaf(buffer, sizes) { + static_assert(N == Idx + 1); + } + }; + + template + struct Collections : public CollectionImpl<0, Args...> {}; + + // return the type at the Idx position in Args... + template + using TypeResolver = typename std::tuple_element>::type; + + // count how many times the type T occurs in Args... + template + inline constexpr std::size_t typeCount = ((std::is_same::value ? 1 : 0) + ... + 0); + + // count the non-void elements of Args... + template + inline constexpr std::size_t membersCount = sizeof...(Args); + + // if the type T occurs in Tuple, TupleTypeIndex has a static member value with the corresponding index; + // otherwise there is no such data member. + template + struct TupleTypeIndex {}; + + template + struct TupleTypeIndex> { + static_assert(typeCount == 0, "the requested type appears more than once among the arguments"); + static constexpr std::size_t value = 0; + }; + + template + struct TupleTypeIndex> { + static_assert(not std::is_same_v); + static_assert(typeCount == 1, "the requested type does not appear among the arguments"); + static constexpr std::size_t value = 1 + TupleTypeIndex>::value; + }; + + // if the type T occurs in Args..., TypeIndex has a static member value with the corresponding index; + // otherwise there is no such data member. + template + using TypeIndex = TupleTypeIndex>; + + // return the index where the type T occurs in Args... + template + inline constexpr std::size_t typeIndex = TypeIndex::value; + +} // namespace portablecollection + +#endif // DataFormats_Portable_interface_PortableCollectionCommon_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableDeviceCollection.h b/DataFormats/Portable/interface/PortableDeviceCollection.h index 84ed057e82f8c..568e15e2ca6df 100644 --- a/DataFormats/Portable/interface/PortableDeviceCollection.h +++ b/DataFormats/Portable/interface/PortableDeviceCollection.h @@ -9,6 +9,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "DataFormats/Portable/interface/PortableCollectionCommon.h" // generic SoA-based product in device memory template >> @@ -75,4 +76,215 @@ class PortableDeviceCollection { View view_; //! }; -#endif // DataFormats_Portable_interface_PortableDeviceCollection_h +// generic SoA-based product in device memory +template +class PortableDeviceMultiCollection { + //static_assert(alpaka::isDevice); + static_assert(not std::is_same_v, + "Use PortableHostCollection instead of PortableDeviceCollection"); + + template + static constexpr std::size_t count_t_ = portablecollection::typeCount; + + template + static constexpr std::size_t index_t_ = portablecollection::typeIndex; + + static constexpr std::size_t members_ = sizeof...(Args) + 1; + +public: + using Buffer = cms::alpakatools::device_buffer; + using ConstBuffer = cms::alpakatools::const_device_buffer; + using Implementation = portablecollection::CollectionImpl<0, T0, Args...>; + + using SizesArray = std::array; + + template + using Layout = portablecollection::TypeResolver; + + //template + //using View = typename Layout::View; + // Workaround for flaky expansion of tempaltes by nvcc (expanding with "Args" instead of "Args... + template + using View = typename std::tuple_element>::type::View; + + //template + //using ConstView = typename Layout::ConstView; + // Workaround for flaky expansion of tempaltes by nvcc (expanding with "Args" instead of "Args..." + template + using ConstView = typename std::tuple_element>::type::ConstView; + +private: + template + using Leaf = portablecollection::CollectionLeaf>; + + template + Leaf& get() { + return static_cast&>(impl_); + } + + template + Leaf const& get() const { + return static_cast const&>(impl_); + } + + template + Leaf>& get() { + return static_cast>&>(impl_); + } + + template + Leaf> const& get() const { + return static_cast> const&>(impl_); + } + +public: + PortableDeviceMultiCollection() = default; + + PortableDeviceMultiCollection(int32_t elements, TDev const& device) + : buffer_{cms::alpakatools::make_device_buffer(device, Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + template >> + PortableDeviceMultiCollection(int32_t elements, TQueue const& queue) + : buffer_{cms::alpakatools::make_device_buffer(queue, Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + static int32_t computeDataSize(const SizesArray& sizes) { + int32_t ret = 0; + portablecollection::constexpr_for<0, members_>( + [&sizes, &ret](auto i) { ret += Layout::computeDataSize(sizes[i]); }); + return ret; + } + + PortableDeviceMultiCollection(const SizesArray& sizes, TDev const& device) + // allocate device memory + : buffer_{cms::alpakatools::make_device_buffer(device, computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + template >> + PortableDeviceMultiCollection(const SizesArray& sizes, TQueue const& queue) + // allocate device memory asynchronously on the given work queue + : buffer_{cms::alpakatools::make_device_buffer(queue, computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + // non-copyable + PortableDeviceMultiCollection(PortableDeviceMultiCollection const&) = delete; + PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection const&) = delete; + + // movable + PortableDeviceMultiCollection(PortableDeviceMultiCollection&&) = default; + PortableDeviceMultiCollection& operator=(PortableDeviceMultiCollection&&) = default; + + // default destructor + ~PortableDeviceMultiCollection() = default; + + // access the View by index + template Idx)>> + View& view() { + return get().view_; + } + + template Idx)>> + ConstView const& view() const { + return get().view_; + } + + template Idx)>> + ConstView const& const_view() const { + return get().view_; + } + + template Idx)>> + View& operator*() { + return get().view_; + } + + template Idx)>> + ConstView const& operator*() const { + return get().view_; + } + + template Idx)>> + View* operator->() { + return &get().view_; + } + + template Idx)>> + ConstView const* operator->() const { + return &get().view_; + } + + // access the View by type + template + typename T::View& view() { + return get().view_; + } + + template + typename T::ConstView const& view() const { + return get().view_; + } + + template + typename T::ConstView const& const_view() const { + return get().view_; + } + + template + typename T::View& operator*() { + return get().view_; + } + + template + typename T::ConstView const& operator*() const { + return get().view_; + } + + template + typename T::View* operator->() { + return &get().view_; + } + + template + typename T::ConstView const* operator->() const { + return &get().view_; + } + + // access the Buffer + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + + // Extract the sizes array + SizesArray sizes() const { + SizesArray ret; + portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get().layout_.metadata().size(); }); + return ret; + } + +private: + std::optional buffer_; //! + Implementation impl_; // (serialized: this is where the layouts live) +}; + +#endif // DataFormats_Portable_interface_PortableDeviceCollection_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/PortableHostCollection.h b/DataFormats/Portable/interface/PortableHostCollection.h index 8b098688455e8..61dde4c58f425 100644 --- a/DataFormats/Portable/interface/PortableHostCollection.h +++ b/DataFormats/Portable/interface/PortableHostCollection.h @@ -9,6 +9,7 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "DataFormats/Portable/interface/PortableCollectionCommon.h" // generic SoA-based product in host memory template @@ -86,4 +87,236 @@ class PortableHostCollection { View view_; //! }; +// generic SoA-based product in host memory +template +class PortableHostMultiCollection { + template + static constexpr std::size_t count_t_ = portablecollection::typeCount; + + template + static constexpr std::size_t index_t_ = portablecollection::typeIndex; + + static constexpr std::size_t members_ = portablecollection::membersCount; + +public: + using Buffer = cms::alpakatools::host_buffer; + using ConstBuffer = cms::alpakatools::const_host_buffer; + using Implementation = portablecollection::CollectionImpl<0, T0, Args...>; + + using SizesArray = std::array; + + template Idx)>> + using Layout = portablecollection::TypeResolver; + template Idx)>> + using View = typename Layout::View; + template Idx)>> + using ConstView = typename Layout::ConstView; + +private: + template + using Leaf = portablecollection::CollectionLeaf>; + + template + Leaf& get() { + return static_cast&>(impl_); + } + + template + Leaf const& get() const { + return static_cast const&>(impl_); + } + + template + portablecollection::CollectionLeaf, T>& get() { + return static_cast, T>&>(impl_); + } + + template + const portablecollection::CollectionLeaf, T>& get() const { + return static_cast, T>&>(impl_); + } + + static int32_t computeDataSize(const std::array& sizes) { + int32_t ret = 0; + portablecollection::constexpr_for<0, members_>( + [&sizes, &ret](auto i) { ret += Layout::computeDataSize(sizes[i]); }); + return ret; + } + +public: + PortableHostMultiCollection() = default; + + PortableHostMultiCollection(int32_t elements, alpaka_common::DevHost const& host) + // allocate pageable host memory + : buffer_{cms::alpakatools::make_host_buffer(Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + template >> + PortableHostMultiCollection(int32_t elements, TQueue const& queue) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(queue, Layout<>::computeDataSize(elements))}, + impl_{buffer_->data(), elements} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + assert(reinterpret_cast(buffer_->data()) % Layout<>::alignment == 0); + static_assert(members_ == 1); + } + + PortableHostMultiCollection(const std::array& sizes, alpaka_common::DevHost const& host) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + template >> + PortableHostMultiCollection(const std::array& sizes, TQueue const& queue) + // allocate pinned host memory associated to the given work queue, accessible by the queue's device + : buffer_{cms::alpakatools::make_host_buffer(queue, computeDataSize(sizes))}, + impl_{buffer_->data(), sizes} { + // Alpaka set to a default alignment of 128 bytes defining ALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 + portablecollection::constexpr_for<0, members_>( + [&](auto i) { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); }); + constexpr auto alignment = Layout<0>::alignment; + portablecollection::constexpr_for<1, members_>( + [&alignment](auto i) { static_assert(alignment == Layout::alignment); }); + } + + // non-copyable + PortableHostMultiCollection(PortableHostMultiCollection const&) = delete; + PortableHostMultiCollection& operator=(PortableHostMultiCollection const&) = delete; + + // movable + PortableHostMultiCollection(PortableHostMultiCollection&&) = default; + PortableHostMultiCollection& operator=(PortableHostMultiCollection&&) = default; + + // default destructor + ~PortableHostMultiCollection() = default; + + // access the View by index + template Idx)>> + View& view() { + return get().view_; + } + + template Idx)>> + ConstView const& view() const { + return get().view_; + } + + template Idx)>> + ConstView const& const_view() const { + return get().view_; + } + + template Idx)>> + View& operator*() { + return get().view_; + } + + template Idx)>> + ConstView const& operator*() const { + return get().view_; + } + + template Idx)>> + View* operator->() { + return &get().view_; + } + + template Idx)>> + ConstView const* operator->() const { + return &get().view_; + } + + // access the View by type + template + typename T::View& view() { + return get().view_; + } + + template + typename T::ConstView const& view() const { + return get().view_; + } + + template + typename T::ConstView const& const_view() const { + return get().view_; + } + + template + typename T::View& operator*() { + return get().view_; + } + + template + typename T::ConstView const& operator*() const { + return get().view_; + } + + template + typename T::View* operator->() { + return &get().view_; + } + + template + typename T::ConstView const* operator->() const { + return &get().view_; + } + + // access the Buffer + Buffer buffer() { return *buffer_; } + ConstBuffer buffer() const { return *buffer_; } + ConstBuffer const_buffer() const { return *buffer_; } + + // Extract the sizes array + SizesArray sizes() const { + SizesArray ret; + portablecollection::constexpr_for<0, members_>([&](auto i) { ret[i] = get().layout_.metadata().size(); }); + return ret; + } + // part of the ROOT read streamer + static void ROOTReadStreamer(PortableHostMultiCollection* newObj, Implementation& onfileImpl) { + newObj->~PortableHostMultiCollection(); + // use the global "host" object returned by cms::alpakatools::host() + std::array sizes; + portablecollection::constexpr_for<0, members_>([&sizes, &onfileImpl](auto i) { + sizes[i] = static_cast const&>(onfileImpl).layout_.metadata().size(); + }); + new (newObj) PortableHostMultiCollection(sizes, cms::alpakatools::host()); + portablecollection::constexpr_for<0, members_>([&newObj, &onfileImpl](auto i) { + static_cast&>(newObj->impl_).layout_.ROOTReadStreamer(static_cast const&>(onfileImpl).layout_); + static_cast&>(onfileImpl).layout_.ROOTStreamerCleaner(); + }); + } + +private: + std::optional buffer_; //! + Implementation impl_; // (serialized: this is where the layouts live) +}; + +// Singleton case does not need to be aliased. A special template covers it. + +// This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. +template +using PortableHostCollection2 = ::PortableHostMultiCollection; + +template +using PortableHostCollection3 = ::PortableHostMultiCollection; + +template +using PortableHostCollection4 = ::PortableHostMultiCollection; + +template +using PortableHostCollection5 = ::PortableHostMultiCollection; + #endif // DataFormats_Portable_interface_PortableHostCollection_h diff --git a/DataFormats/Portable/interface/PortableHostCollectionReadRules.h b/DataFormats/Portable/interface/PortableHostCollectionReadRules.h index e207665b757e2..bc6a6e730e238 100644 --- a/DataFormats/Portable/interface/PortableHostCollectionReadRules.h +++ b/DataFormats/Portable/interface/PortableHostCollectionReadRules.h @@ -37,6 +37,35 @@ static void readPortableHostCollection_v1(char *target, TVirtualObject *from_buf Collection::ROOTReadStreamer(newObj, onfile.layout_); } +// read function for PortableHostCollection, called for every event +template +static void readPortableHostMultiCollection_v1(char *target, TVirtualObject *from_buffer) { + // extract the actual types + using Collection = T; + using Implementation = typename Collection::Implementation; + + // valid only for PortableHostCollection + //static_assert(std::is_same_v>); + + // proxy for the object being read from file + struct OnFile { + Implementation &impl_; + }; + + // address in memory of the buffer containing the object being read from file + char *address = static_cast(from_buffer->GetObject()); + // offset of the "layout_" data member + static ptrdiff_t impl_offset = from_buffer->GetClass()->GetDataMemberOffset("impl_"); + // reference to the Layout object being read from file + OnFile onfile = {*(Implementation *)(address + impl_offset)}; + + // pointer to the Collection object being constructed in memory + Collection *newObj = (Collection *)target; + + // move the data from the on-file layout to the newly constructed object + Collection::ROOTReadStreamer(newObj, onfile.impl_); +} + // put set_PortableHostCollection_read_rules in the ROOT namespace to let it forward declare GenerateInitInstance namespace ROOT { @@ -67,10 +96,42 @@ namespace ROOT { return true; } + + // set the read rules for PortableHostMultiCollection; + // this is called only once, when the dictionary is loaded. + template + static bool set_PortableHostMultiCollection_read_rules(std::string const &type) { + // forward declaration + TGenericClassInfo *GenerateInitInstance(T const *); + + // build the read rules + std::vector readrules(1); + ROOT::Internal::TSchemaHelper &rule = readrules[0]; + rule.fTarget = "buffer_,impl_"; + rule.fSourceClass = type; + rule.fSource = type + "::Implementation impl_;"; + rule.fCode = type + "::ROOTReadStreamer(newObj, onfile.impl_)"; + rule.fVersion = "[1-]"; + rule.fChecksum = ""; + rule.fInclude = ""; + rule.fEmbed = false; + rule.fFunctionPtr = reinterpret_cast(::readPortableHostMultiCollection_v1); + rule.fAttributes = ""; + + // set the read rules + TGenericClassInfo *instance = GenerateInitInstance((T const *)nullptr); + instance->SetReadRules(readrules); + + return true; + } } // namespace ROOT #define SET_PORTABLEHOSTCOLLECTION_READ_RULES(COLLECTION) \ static bool EDM_CONCATENATE(set_PortableHostCollection_read_rules_done_at_, __LINE__) [[maybe_unused]] = \ ROOT::set_PortableHostCollection_read_rules(EDM_STRINGIZE(COLLECTION)) +#define SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(COLLECTION) \ + static bool EDM_CONCATENATE(set_PortableHostMultiCollection_read_rules_done_at_, __LINE__) [[maybe_unused]] = \ + ROOT::set_PortableHostMultiCollection_read_rules(EDM_STRINGIZE(COLLECTION)) + #endif // DataFormats_Portable_interface_PortableHostCollectionReadRules_h diff --git a/DataFormats/Portable/interface/PortableObject.h b/DataFormats/Portable/interface/PortableObject.h index c9aadb160bb05..9df5ce0bcc96f 100644 --- a/DataFormats/Portable/interface/PortableObject.h +++ b/DataFormats/Portable/interface/PortableObject.h @@ -54,4 +54,4 @@ namespace cms::alpakatools { }; } // namespace cms::alpakatools -#endif // DataFormats_Portable_interface_PortableObject_h +#endif // DataFormats_Portable_interface_PortableObject_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/alpaka/PortableCollection.h b/DataFormats/Portable/interface/alpaka/PortableCollection.h index 1f9fa22e49cd8..e7bd78e4c0023 100644 --- a/DataFormats/Portable/interface/alpaka/PortableCollection.h +++ b/DataFormats/Portable/interface/alpaka/PortableCollection.h @@ -18,4 +18,56 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // namespace ALPAKA_ACCELERATOR_NAMESPACE -#endif // DataFormats_Portable_interface_alpaka_PortableCollection_h +namespace ALPAKA_ACCELERATOR_NAMESPACE { + +#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + // Singleton case does not need to be aliased. A special template covers it. + + // This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. + template + using PortableCollection2 = ::PortableHostMultiCollection; + + template + using PortableCollection3 = ::PortableHostMultiCollection; + + template + using PortableCollection4 = ::PortableHostMultiCollection; + + template + using PortableCollection5 = ::PortableHostMultiCollection; +#else + // Singleton case does not need to be aliased. A special template covers it. + + // This aliasing is needed to work with ROOT serialization. Bare templates make dictionary compilation fail. + template + using PortableCollection2 = ::PortableDeviceMultiCollection; + + template + using PortableCollection3 = ::PortableDeviceMultiCollection; + + template + using PortableCollection4 = ::PortableDeviceMultiCollection; + + template + using PortableCollection5 = ::PortableDeviceMultiCollection; +#endif // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +namespace traits { +// specialise the trait for the device provided by the ALPAKA_ACCELERATOR_NAMESPACE +#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED + template + class PortableMultiCollectionTrait { + using CollectionType = ::PortableHostMultiCollection; + }; +#else + template + class PortableMultiCollectionTrait { + using CollectionType = ::PortableDeviceMultiCollection; + }; +#endif + +} // namespace traits + +#endif // DataFormats_Portable_interface_alpaka_PortableCollection_h \ No newline at end of file diff --git a/DataFormats/Portable/interface/alpaka/PortableObject.h b/DataFormats/Portable/interface/alpaka/PortableObject.h index 417173176b203..05a5e2d7b64cd 100644 --- a/DataFormats/Portable/interface/alpaka/PortableObject.h +++ b/DataFormats/Portable/interface/alpaka/PortableObject.h @@ -18,4 +18,4 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // namespace ALPAKA_ACCELERATOR_NAMESPACE -#endif // DataFormats_Portable_interface_alpaka_PortableObject_h +#endif // DataFormats_Portable_interface_alpaka_PortableObject_h \ No newline at end of file diff --git a/DataFormats/Portable/scripts/portableDeviceCollectionHints b/DataFormats/Portable/scripts/portableDeviceCollectionHints new file mode 100755 index 0000000000000..56bc9e90350e0 --- /dev/null +++ b/DataFormats/Portable/scripts/portableDeviceCollectionHints @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import sys + +# Get collectionand SoAs names +if len(sys.argv) != 2: + raise RuntimeError("Expecting one portable collection name.") + +collectionName = sys.argv[1] + +print("In /src/alpaka/classes_cuda_def.xml (with necessary includes in /src/aplaka/classes_cuda.h):\n") +print("") +print(" ") +print(" " % collectionName) +print(" \" persistent=\"false\"/>" % collectionName) +print(" >\" persistent=\"false\"/>" % collectionName) +print("\n") + +print("In /src/alpaka/classes_rocm_def.xml (with necessary includes in /src/aplaka/classes_rocm.h):\n") +print("") +print(" ") +print(" " % collectionName) +print(" \" persistent=\"false\"/>" % collectionName) +print(" >\" persistent=\"false\"/>" % collectionName) +print("\n") \ No newline at end of file diff --git a/DataFormats/Portable/scripts/portableHostCollectionHints b/DataFormats/Portable/scripts/portableHostCollectionHints new file mode 100755 index 0000000000000..d92e9cb2f132e --- /dev/null +++ b/DataFormats/Portable/scripts/portableHostCollectionHints @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 + +import sys + +# Get collectionand SoAs names +if len(sys.argv) < 3: + raise RuntimeError("Expecting at least one portable collection name and one layout name.") + +collectionName = sys.argv[1] +layouts = sys.argv[2:] +for i in range(len(layouts)): + layouts[i] += "<128, false>" + +print("In /src/classes_def.xml (with necessary includes in /src/classes.h):\n") +print("") +for l in layouts: + print(" "% l) +print() +if len(layouts) > 1: + print(" ") + for i in range(0, len(layouts)): + print(" \"/>") + print("\n ") + for i in range(0, len(layouts)): + print(" \"/>" % (i, layouts[i])) + print("") +print(" ") +print(" "% collectionName) +print(" \" splitLevel=\"0\"/>"% collectionName) +print("") diff --git a/DataFormats/PortableTestObjects/interface/TestHostCollection.h b/DataFormats/PortableTestObjects/interface/TestHostCollection.h index f7f4ffd64b7d8..65150341ad1d2 100644 --- a/DataFormats/PortableTestObjects/interface/TestHostCollection.h +++ b/DataFormats/PortableTestObjects/interface/TestHostCollection.h @@ -9,6 +9,10 @@ namespace portabletest { // SoA with x, y, z, id fields in host memory using TestHostCollection = PortableHostCollection; + using TestHostMultiCollection2 = PortableHostCollection2; + + using TestHostMultiCollection3 = PortableHostCollection3; + } // namespace portabletest #endif // DataFormats_PortableTestObjects_interface_TestHostCollection_h diff --git a/DataFormats/PortableTestObjects/interface/TestSoA.h b/DataFormats/PortableTestObjects/interface/TestSoA.h index 0fd5c6d956269..acb4f6ed83308 100644 --- a/DataFormats/PortableTestObjects/interface/TestSoA.h +++ b/DataFormats/PortableTestObjects/interface/TestSoA.h @@ -34,6 +34,34 @@ namespace portabletest { using TestSoA = TestSoALayout<>; + GENERATE_SOA_LAYOUT(TestSoALayout2, + // columns: one value per element + SOA_COLUMN(double, x2), + SOA_COLUMN(double, y2), + SOA_COLUMN(double, z2), + SOA_COLUMN(int32_t, id2), + // scalars: one value for the whole structure + SOA_SCALAR(double, r2), + // Eigen columns + // the typedef is needed because commas confuse macros + SOA_EIGEN_COLUMN(Matrix, m2)) + + using TestSoA2 = TestSoALayout2<>; + + GENERATE_SOA_LAYOUT(TestSoALayout3, + // columns: one value per element + SOA_COLUMN(double, x3), + SOA_COLUMN(double, y3), + SOA_COLUMN(double, z3), + SOA_COLUMN(int32_t, id3), + // scalars: one value for the whole structure + SOA_SCALAR(double, r3), + // Eigen columns + // the typedef is needed because commas confuse macros + SOA_EIGEN_COLUMN(Matrix, m3)) + + using TestSoA3 = TestSoALayout3<>; + } // namespace portabletest #endif // DataFormats_PortableTestObjects_interface_TestSoA_h diff --git a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h index cde180c95b607..1facc29e19fd3 100644 --- a/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h +++ b/DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h @@ -20,6 +20,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // SoA with x, y, z, id fields in device global memory using TestDeviceCollection = PortableCollection; + using TestDeviceMultiCollection2 = PortableCollection2; + + using TestDeviceMultiCollection3 = PortableCollection3; + } // namespace portabletest } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml index fc3678362dc71..9e9cc36321fc7 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml @@ -6,4 +6,12 @@ + + + + + + + + diff --git a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml index 65ce1c888572b..8bc27b95ccb63 100644 --- a/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml +++ b/DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml @@ -6,4 +6,12 @@ + + + + + + + + diff --git a/DataFormats/PortableTestObjects/src/classes.cc b/DataFormats/PortableTestObjects/src/classes.cc index e11de1a3d7d36..88e2dea528d49 100644 --- a/DataFormats/PortableTestObjects/src/classes.cc +++ b/DataFormats/PortableTestObjects/src/classes.cc @@ -4,4 +4,6 @@ #include "DataFormats/PortableTestObjects/interface/TestHostObject.h" SET_PORTABLEHOSTCOLLECTION_READ_RULES(portabletest::TestHostCollection); +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection2); +SET_PORTABLEHOSTMULTICOLLECTION_READ_RULES(portabletest::TestHostMultiCollection3); SET_PORTABLEHOSTOBJECT_READ_RULES(portabletest::TestHostObject); diff --git a/DataFormats/PortableTestObjects/src/classes_def.xml b/DataFormats/PortableTestObjects/src/classes_def.xml index 54e9bc24249c2..b1162f5440329 100644 --- a/DataFormats/PortableTestObjects/src/classes_def.xml +++ b/DataFormats/PortableTestObjects/src/classes_def.xml @@ -7,4 +7,45 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc index e1834ff95a31f..cbeae5e4fef81 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/TestAlpakaAnalyzer.cc @@ -78,6 +78,50 @@ namespace { assert(view.metadata().addressOf_m() == &view[0].m().coeffRef(0, 0)); } + template + void checkViewAddresses2(T const& view) { + assert(view.metadata().addressOf_x2() == view.x2()); + assert(view.metadata().addressOf_x2() == &view.x2(0)); + assert(view.metadata().addressOf_x2() == &view[0].x2()); + assert(view.metadata().addressOf_y2() == view.y2()); + assert(view.metadata().addressOf_y2() == &view.y2(0)); + assert(view.metadata().addressOf_y2() == &view[0].y2()); + assert(view.metadata().addressOf_z2() == view.z2()); + assert(view.metadata().addressOf_z2() == &view.z2(0)); + assert(view.metadata().addressOf_z2() == &view[0].z2()); + assert(view.metadata().addressOf_id2() == view.id2()); + assert(view.metadata().addressOf_id2() == &view.id2(0)); + assert(view.metadata().addressOf_id2() == &view[0].id2()); + assert(view.metadata().addressOf_m2() == view.m2()); + assert(view.metadata().addressOf_m2() == &view.m2(0).coeffRef(0, 0)); + assert(view.metadata().addressOf_m2() == &view[0].m2().coeffRef(0, 0)); + assert(view.metadata().addressOf_r2() == &view.r2()); + //assert(view.metadata().addressOf_r2() == &view.r2(0)); // cannot access a scalar with an index + //assert(view.metadata().addressOf_r2() == &view[0].r2()); // cannot access a scalar via a SoA row-like accessor + } + + template + void checkViewAddresses3(T const& view) { + assert(view.metadata().addressOf_x3() == view.x3()); + assert(view.metadata().addressOf_x3() == &view.x3(0)); + assert(view.metadata().addressOf_x3() == &view[0].x3()); + assert(view.metadata().addressOf_y3() == view.y3()); + assert(view.metadata().addressOf_y3() == &view.y3(0)); + assert(view.metadata().addressOf_y3() == &view[0].y3()); + assert(view.metadata().addressOf_z3() == view.z3()); + assert(view.metadata().addressOf_z3() == &view.z3(0)); + assert(view.metadata().addressOf_z3() == &view[0].z3()); + assert(view.metadata().addressOf_id3() == view.id3()); + assert(view.metadata().addressOf_id3() == &view.id3(0)); + assert(view.metadata().addressOf_id3() == &view[0].id3()); + assert(view.metadata().addressOf_m3() == view.m3()); + assert(view.metadata().addressOf_m3() == &view.m3(0).coeffRef(0, 0)); + assert(view.metadata().addressOf_m3() == &view[0].m3().coeffRef(0, 0)); + assert(view.metadata().addressOf_r3() == &view.r3()); + //assert(view.metadata().addressOf_r3() == &view.r3(0)); // cannot access a scalar with an index + //assert(view.metadata().addressOf_r3() == &view[0].r3()); // cannot access a scalar via a SoA row-like accessor + } + } // namespace class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { @@ -85,6 +129,9 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { TestAlpakaAnalyzer(edm::ParameterSet const& config) : source_{config.getParameter("source")}, token_{consumes(source_)}, + //tokenMulti_{consumes(source_)}, + tokenMulti2_{consumes(source_)}, + tokenMulti3_{consumes(source_)}, expectSize_{config.getParameter("expectSize")}, expectXvalues_{config.getParameter>("expectXvalues")} { if (std::string const& eb = config.getParameter("expectBackend"); not eb.empty()) { @@ -165,6 +212,117 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { << ", got " << cms::alpakatools::toString(backend); } } + + // portabletest::TestHostMultiCollection const& productMulti = event.get(tokenMulti_); + // auto const& viewMulti0 = productMulti.const_view<0>(); + // auto& mviewMulti0 = productMulti.view<0>(); + // auto const& cmviewMulti0 = productMulti.view<0>(); + // auto const& viewMulti1 = productMulti.const_view<1>(); + // auto& mviewMulti1 = productMulti.view<1>(); + // auto const& cmviewMulti1 = productMulti.view<1>(); + + portabletest::TestHostMultiCollection2 const& productMulti2 = event.get(tokenMulti2_); + auto const& viewMulti2_0 = productMulti2.const_view<0>(); + auto& mviewMulti2_0 = productMulti2.view<0>(); + auto const& cmviewMulti2_0 = productMulti2.view<0>(); + auto const& viewMulti2_1 = productMulti2.const_view<1>(); + auto& mviewMulti2_1 = productMulti2.view<1>(); + auto const& cmviewMulti2_1 = productMulti2.view<1>(); + + checkViewAddresses(viewMulti2_0); + checkViewAddresses(mviewMulti2_0); + checkViewAddresses(cmviewMulti2_0); + checkViewAddresses2(viewMulti2_1); + checkViewAddresses2(mviewMulti2_1); + checkViewAddresses2(cmviewMulti2_1); + + assert(viewMulti2_0.r() == 1.); + for (int32_t i = 0; i < viewMulti2_0.metadata().size(); ++i) { + auto vi = viewMulti2_0[i]; + // std::stringstream s; + // s << "i=" << i << " x=" << vi.x() << " y=" << vi.y() << " z=" << vi.z() << " id=" << vi.id() << "'\nm=" << vi.m(); + // std::cout << s.str() << std::endl; + if (not expectXvalues_.empty() and vi.x() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x(); + } + //assert(vi.x() == 0.); + assert(vi.y() == 0.); + assert(vi.z() == 0.); + assert(vi.id() == i); + assert(vi.m() == matrix * i); + } + assert(viewMulti2_1.r2() == 2.); + for (int32_t i = 0; i < viewMulti2_1.metadata().size(); ++i) { + auto vi = viewMulti2_1[i]; + if (not expectXvalues_.empty() and vi.x2() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x2(); + } + assert(vi.y2() == 0.); + assert(vi.z2() == 0.); + assert(vi.id2() == i); + assert(vi.m2() == matrix * i); + } + + portabletest::TestHostMultiCollection3 const& productMulti3 = event.get(tokenMulti3_); + auto const& viewMulti3_0 = productMulti3.const_view<0>(); + auto& mviewMulti3_0 = productMulti3.view<0>(); + auto const& cmviewMulti3_0 = productMulti3.view<0>(); + auto const& viewMulti3_1 = productMulti3.const_view<1>(); + auto& mviewMulti3_1 = productMulti3.view<1>(); + auto const& cmviewMulti3_1 = productMulti3.view<1>(); + auto const& viewMulti3_2 = productMulti3.const_view<2>(); + auto& mviewMulti3_2 = productMulti3.view<2>(); + auto const& cmviewMulti3_2 = productMulti3.view<2>(); + + checkViewAddresses(viewMulti3_0); + checkViewAddresses(mviewMulti3_0); + checkViewAddresses(cmviewMulti3_0); + checkViewAddresses2(viewMulti3_1); + checkViewAddresses2(mviewMulti3_1); + checkViewAddresses2(cmviewMulti3_1); + checkViewAddresses3(viewMulti3_2); + checkViewAddresses3(mviewMulti3_2); + checkViewAddresses3(cmviewMulti3_2); + + assert(viewMulti3_0.r() == 1.); + for (int32_t i = 0; i < viewMulti3_0.metadata().size(); ++i) { + auto vi = viewMulti3_0[i]; + if (not expectXvalues_.empty() and vi.x() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x(); + } + assert(vi.y() == 0.); + assert(vi.z() == 0.); + assert(vi.id() == i); + assert(vi.m() == matrix * i); + } + assert(viewMulti3_1.r2() == 2.); + for (int32_t i = 0; i < viewMulti3_1.metadata().size(); ++i) { + auto vi = viewMulti3_1[i]; + if (not expectXvalues_.empty() and vi.x2() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x2(); + } + assert(vi.y2() == 0.); + assert(vi.z2() == 0.); + assert(vi.id2() == i); + assert(vi.m2() == matrix * i); + } + + assert(viewMulti3_2.r3() == 3.); + for (int32_t i = 0; i < viewMulti3_2.metadata().size(); ++i) { + auto vi = viewMulti3_2[i]; + if (not expectXvalues_.empty() and vi.x3() != expectXvalues_[i % expectXvalues_.size()]) { + throw cms::Exception("Assert") << "Index " << i << " expected value " + << expectXvalues_[i % expectXvalues_.size()] << ", got " << vi.x3(); + } + assert(vi.y3() == 0.); + assert(vi.z3() == 0.); + assert(vi.id3() == i); + assert(vi.m3() == matrix * i); + } } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -189,6 +347,9 @@ class TestAlpakaAnalyzer : public edm::global::EDAnalyzer<> { const edm::EDGetTokenT token_; edm::EDGetTokenT backendToken_; std::optional expectBackend_; + //const edm::EDGetTokenT tokenMulti_; + const edm::EDGetTokenT tokenMulti2_; + const edm::EDGetTokenT tokenMulti3_; const int expectSize_; const std::vector expectXvalues_; }; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc index e574da64ef84e..c56ecc3cf1234 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc @@ -19,10 +19,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlgoKernel { public: template >> - ALPAKA_FN_ACC void operator()(TAcc const& acc, - portabletest::TestDeviceCollection::View view, - int32_t size, - double xvalue) const { + ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, double xvalue) const { + // global index of the thread within the grid const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; const portabletest::Array flags = {{6, 4, 2, 0}}; @@ -32,12 +30,56 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } // make a strided loop over the kernel grid, covering up to "size" elements - for (int32_t i : elements_with_stride(acc, size)) { + for (int32_t i : elements_with_stride(acc, view.metadata().size())) { view[i] = {xvalue, 0., 0., i, flags, matrix * i}; } } }; + class TestAlgoMultiKernel2 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestDeviceMultiCollection2::View<1> view, + double xvalue) const { + // global index of the thread within the grid + const int32_t thread = alpaka::getIdx(acc)[0u]; + const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; + + // set this only once in the whole kernel grid + if (thread == 0) { + view.r2() = 2.; + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, view.metadata().size())) { + view[i] = {xvalue, 0., 0., i, matrix * i}; + } + } + }; + + class TestAlgoMultiKernel3 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestDeviceMultiCollection3::View<2> view, + double xvalue) const { + // global index of the thread within the grid + const int32_t thread = alpaka::getIdx(acc)[0u]; + const portabletest::Matrix matrix{{1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}, {3, 6, 9, 12, 15, 18}}; + + // set this only once in the whole kernel grid + if (thread == 0) { + view.r3() = 3.; + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, view.metadata().size())) { + view[i] = {xvalue, 0., 0., i, matrix * i}; + } + } + }; + void TestAlgo::fill(Queue& queue, portabletest::TestDeviceCollection& collection, double xvalue) const { // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) uint32_t items = 64; @@ -50,7 +92,25 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // - elements within a single thread on a CPU backend auto workDiv = make_workdiv(groups, items); - alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), collection->metadata().size(), xvalue); + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); + } + + void TestAlgo::fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue) const { + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + uint32_t groups = divide_up_by(collection->metadata().size(), items); + uint32_t groups2 = divide_up_by(collection.view<1>().metadata().size(), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + auto workDiv2 = make_workdiv(groups2, items); + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); + alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view(), xvalue); } class TestAlgoStructKernel { @@ -80,6 +140,27 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { alpaka::exec(queue, workDiv, TestAlgoStructKernel{}, object.data(), x, y, z, id); } + void TestAlgo::fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue) const { + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + uint32_t groups = divide_up_by(collection.view().metadata().size(), items); + uint32_t groups2 = divide_up_by(collection.view().metadata().size(), items); + uint32_t groups3 = divide_up_by(collection.view().metadata().size(), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + auto workDiv2 = make_workdiv(groups2, items); + auto workDiv3 = make_workdiv(groups3, items); + + alpaka::exec(queue, workDiv, TestAlgoKernel{}, collection.view(), xvalue); + alpaka::exec(queue, workDiv2, TestAlgoMultiKernel2{}, collection.view(), xvalue); + alpaka::exec(queue, workDiv3, TestAlgoMultiKernel3{}, collection.view(), xvalue); + } + class TestAlgoKernelUpdate { public: template >> @@ -103,6 +184,84 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { } }; + class TestAlgoKernelUpdateMulti2 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestSoA::ConstView input, + portabletest::TestSoA2::ConstView input2, + AlpakaESTestDataEDevice::ConstView esData, + portabletest::TestSoA::View output, + portabletest::TestSoA2::View output2) const { + // set this only once in the whole kernel grid + if (once_per_grid(acc)) { + output.r() = input.r(); + output2.r2() = input2.r2(); + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, output.metadata().size())) { + double x = input[i].x(); + if (i < esData.size()) { + x += esData.val(i) + esData.val2(i); + } + output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()}; + } + for (int32_t i : elements_with_stride(acc, output2.metadata().size())) { + double x2 = input2[i].x2(); + if (i < esData.size()) { + x2 += esData.val(i) + esData.val2(i); + } + output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()}; + } + } + }; + + class TestAlgoKernelUpdateMulti3 { + public: + template >> + ALPAKA_FN_ACC void operator()(TAcc const& acc, + portabletest::TestSoA::ConstView input, + portabletest::TestSoA2::ConstView input2, + portabletest::TestSoA3::ConstView input3, + AlpakaESTestDataEDevice::ConstView esData, + portabletest::TestSoA::View output, + portabletest::TestSoA2::View output2, + portabletest::TestSoA3::View output3) const { + // set this only once in the whole kernel grid + if (once_per_grid(acc)) { + output.r() = input.r(); + output2.r2() = input2.r2(); + output3.r3() = input3.r3(); + } + + // make a strided loop over the kernel grid, covering up to "size" elements + for (int32_t i : elements_with_stride(acc, output.metadata().size())) { + double x = input[i].x(); + if (i < esData.size()) { + x += esData.val(i) + esData.val2(i); + if (0 == i) + printf("Setting x[0] to %f\n", x); + } + output[i] = {x, input[i].y(), input[i].z(), input[i].id(), input[i].flags(), input[i].m()}; + } + for (int32_t i : elements_with_stride(acc, output2.metadata().size())) { + double x2 = input2[i].x2(); + if (i < esData.size()) { + x2 += esData.val(i) + esData.val2(i); + } + output2[i] = {x2, input2[i].y2(), input2[i].z2(), input2[i].id2(), input2[i].m2()}; + } + for (int32_t i : elements_with_stride(acc, output3.metadata().size())) { + double x3 = input3[i].x3(); + if (i < esData.size()) { + x3 += esData.val(i) + esData.val2(i); + } + output3[i] = {x3, input3[i].y3(), input3[i].z3(), input3[i].id3(), input3[i].m3()}; + } + } + }; + portabletest::TestDeviceCollection TestAlgo::update(Queue& queue, portabletest::TestDeviceCollection const& input, AlpakaESTestDataEDevice const& esData) const { @@ -124,4 +283,64 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return collection; } + portabletest::TestDeviceMultiCollection2 TestAlgo::updateMulti2(Queue& queue, + portabletest::TestDeviceMultiCollection2 const& input, + AlpakaESTestDataEDevice const& esData) const { + portabletest::TestDeviceMultiCollection2 collection{input.sizes(), queue}; + + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + auto sizes = collection.sizes(); + uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + + alpaka::exec(queue, + workDiv, + TestAlgoKernelUpdateMulti2{}, + input.view(), + input.view(), + esData.view(), + collection.view(), + collection.view()); + + return collection; + } + + portabletest::TestDeviceMultiCollection3 TestAlgo::updateMulti3(Queue& queue, + portabletest::TestDeviceMultiCollection3 const& input, + AlpakaESTestDataEDevice const& esData) const { + portabletest::TestDeviceMultiCollection3 collection{input.sizes(), queue}; + + // use 64 items per group (this value is arbitrary, but it's a reasonable starting point) + uint32_t items = 64; + + // use as many groups as needed to cover the whole problem + auto sizes = collection.sizes(); + uint32_t groups = divide_up_by(*std::max_element(sizes.begin(), sizes.end()), items); + + // map items to + // - threads with a single element per thread on a GPU backend + // - elements within a single thread on a CPU backend + auto workDiv = make_workdiv(groups, items); + + alpaka::exec(queue, + workDiv, + TestAlgoKernelUpdateMulti3{}, + input.view(), + input.view(), + input.view(), + esData.view(), + collection.view(), + collection.view(), + collection.view()); + + return collection; + } + } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h index e9eca3f364b54..9d620984ed186 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.h @@ -17,6 +17,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceCollection update(Queue& queue, portabletest::TestDeviceCollection const& input, AlpakaESTestDataEDevice const& esData) const; + portabletest::TestDeviceMultiCollection2 updateMulti2(Queue& queue, + portabletest::TestDeviceMultiCollection2 const& input, + AlpakaESTestDataEDevice const& esData) const; + portabletest::TestDeviceMultiCollection3 updateMulti3(Queue& queue, + portabletest::TestDeviceMultiCollection3 const& input, + AlpakaESTestDataEDevice const& esData) const; + + void fillMulti2(Queue& queue, portabletest::TestDeviceMultiCollection2& collection, double xvalue = 0.) const; + void fillMulti3(Queue& queue, portabletest::TestDeviceMultiCollection3& collection, double xvalue = 0.) const; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc index 78054eb48827e..499ce4b522e5f 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducer.cc @@ -23,18 +23,30 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaGlobalProducer(edm::ParameterSet const& config) : esToken_(esConsumes(config.getParameter("eventSetupSource"))), deviceToken_{produces()}, + deviceTokenMulti2_{produces()}, + deviceTokenMulti3_{produces()}, size_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size2_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size3_{config.getParameter("size").getParameter( EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))} {} void produce(edm::StreamID, device::Event& iEvent, device::EventSetup const& iSetup) const override { [[maybe_unused]] auto const& esData = iSetup.getData(esToken_); portabletest::TestDeviceCollection deviceProduct{size_, iEvent.queue()}; + portabletest::TestDeviceMultiCollection2 deviceProductMulti2{{{size_, size2_}}, iEvent.queue()}; + portabletest::TestDeviceMultiCollection3 deviceProductMulti3{{{size_, size2_, size3_}}, iEvent.queue()}; // run the algorithm, potentially asynchronously algo_.fill(iEvent.queue(), deviceProduct); + algo_.fillMulti2(iEvent.queue(), deviceProductMulti2); + algo_.fillMulti3(iEvent.queue(), deviceProductMulti3); iEvent.emplace(deviceToken_, std::move(deviceProduct)); + iEvent.emplace(deviceTokenMulti2_, std::move(deviceProductMulti2)); + iEvent.emplace(deviceTokenMulti3_, std::move(deviceProductMulti3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -53,7 +65,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esToken_; const device::EDPutToken deviceToken_; + const device::EDPutToken deviceTokenMulti2_; + const device::EDPutToken deviceTokenMulti3_; const int32_t size_; + const int32_t size2_; + const int32_t size3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc index 95d1423fdf2bc..253b8dcad8988 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaGlobalProducerE.cc @@ -24,16 +24,26 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaGlobalProducerE(edm::ParameterSet const& config) : esToken_(esConsumes(config.getParameter("eventSetupSource"))), getToken_(consumes(config.getParameter("source"))), - putToken_{produces()} {} + getTokenMulti2_(consumes(config.getParameter("source"))), + getTokenMulti3_(consumes(config.getParameter("source"))), + putToken_{produces()}, + putTokenMulti2_{produces()}, + putTokenMulti3_{produces()} {} void produce(edm::StreamID, device::Event& iEvent, device::EventSetup const& iSetup) const override { auto const& esData = iSetup.getData(esToken_); auto const& input = iEvent.get(getToken_); + auto const& inputMulti2 = iEvent.get(getTokenMulti2_); + auto const& inputMulti3 = iEvent.get(getTokenMulti3_); // run the algorithm, potentially asynchronously auto deviceProduct = algo_.update(iEvent.queue(), input, esData); + auto deviceProductMulti2 = algo_.updateMulti2(iEvent.queue(), inputMulti2, esData); + auto deviceProductMulti3 = algo_.updateMulti3(iEvent.queue(), inputMulti3, esData); iEvent.emplace(putToken_, std::move(deviceProduct)); + iEvent.emplace(putTokenMulti2_, std::move(deviceProductMulti2)); + iEvent.emplace(putTokenMulti3_, std::move(deviceProductMulti3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -47,7 +57,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esToken_; const device::EDGetToken getToken_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const device::EDPutToken putToken_; + const device::EDPutToken putTokenMulti2_; + const device::EDPutToken putTokenMulti3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc index d65850985d8fd..3d4e4692a6961 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaProducer.cc @@ -19,7 +19,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlpakaProducer : public global::EDProducer<> { public: TestAlpakaProducer(edm::ParameterSet const& config) - : objectToken_{produces()}, collectionToken_{produces()}, size_{config.getParameter("size")} {} + : objectToken_{produces()}, + collectionToken_{produces()}, + deviceTokenMulti2_{produces()}, + deviceTokenMulti3_{produces()}, + size_{config.getParameter("size")}, + size2_{config.getParameter("size2")}, + size3_{config.getParameter("size3")} {} void produce(edm::StreamID sid, device::Event& event, device::EventSetup const&) const override { // run the algorithm, potentially asynchronously @@ -29,21 +35,38 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { portabletest::TestDeviceObject deviceObject{event.queue()}; algo_.fillObject(event.queue(), deviceObject, 5., 12., 13., 42); + portabletest::TestDeviceCollection deviceProduct{size_, event.queue()}; + algo_.fill(event.queue(), deviceProduct); + + portabletest::TestDeviceMultiCollection2 deviceMultiProduct2{{{size_, size2_}}, event.queue()}; + algo_.fillMulti2(event.queue(), deviceMultiProduct2); + + portabletest::TestDeviceMultiCollection3 deviceMultiProduct3{{{size_, size2_, size3_}}, event.queue()}; + algo_.fillMulti3(event.queue(), deviceMultiProduct3); + // put the asynchronous products into the event without waiting - event.emplace(objectToken_, std::move(deviceObject)); event.emplace(collectionToken_, std::move(deviceCollection)); + event.emplace(objectToken_, std::move(deviceObject)); + event.emplace(deviceTokenMulti2_, std::move(deviceMultiProduct2)); + event.emplace(deviceTokenMulti3_, std::move(deviceMultiProduct3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; desc.add("size"); + desc.add("size2"); + desc.add("size3"); descriptions.addWithDefaultLabel(desc); } private: const device::EDPutToken objectToken_; const device::EDPutToken collectionToken_; + const device::EDPutToken deviceTokenMulti2_; + const device::EDPutToken deviceTokenMulti3_; const int32_t size_; + const int32_t size2_; + const int32_t size3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc index 8eee00da8e774..74cd08e39f56a 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamProducer.cc @@ -25,10 +25,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { public: TestAlpakaStreamProducer(edm::ParameterSet const& config) : size_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size2_{config.getParameter("size").getParameter( + EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))}, + size3_{config.getParameter("size").getParameter( EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE))} { getToken_ = consumes(config.getParameter("source")); esToken_ = esConsumes(config.getParameter("eventSetupSource")); devicePutToken_ = produces(config.getParameter("productInstanceName")); + devicePutTokenMulti2_ = produces(config.getParameter("productInstanceName")); + devicePutTokenMulti3_ = produces(config.getParameter("productInstanceName")); } void produce(device::Event& iEvent, device::EventSetup const& iSetup) override { @@ -36,11 +42,19 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { [[maybe_unused]] auto const& esData = iSetup.getData(esToken_); auto deviceProduct = std::make_unique(size_, iEvent.queue()); + auto deviceProductMulti2 = std::make_unique( + portabletest::TestDeviceMultiCollection2::SizesArray{{size_, size2_}}, iEvent.queue()); + auto deviceProductMulti3 = std::make_unique( + portabletest::TestDeviceMultiCollection3::SizesArray{{size_, size2_, size3_}}, iEvent.queue()); // run the algorithm, potentially asynchronously algo_.fill(iEvent.queue(), *deviceProduct); + algo_.fillMulti2(iEvent.queue(), *deviceProductMulti2); + algo_.fillMulti3(iEvent.queue(), *deviceProductMulti3); iEvent.put(devicePutToken_, std::move(deviceProduct)); + iEvent.put(devicePutTokenMulti2_, std::move(deviceProductMulti2)); + iEvent.put(devicePutTokenMulti3_, std::move(deviceProductMulti3)); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -62,7 +76,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { edm::EDGetTokenT getToken_; device::ESGetToken, AlpakaESTestRecordB> esToken_; device::EDPutToken devicePutToken_; + device::EDPutToken devicePutTokenMulti2_; + device::EDPutToken devicePutTokenMulti3_; const int32_t size_; + const int32_t size2_; + const int32_t size3_; // implementation of the algorithm TestAlgo algo_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc index 5c53e5aa9de16..613c31498746a 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlpakaStreamSynchronizingProducer.cc @@ -27,6 +27,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestAlpakaStreamSynchronizingProducer(edm::ParameterSet const& iConfig) : esTokenDevice_(esConsumes()), putToken_{produces()}, + putTokenMulti2_{produces()}, + putTokenMulti3_{produces()}, helper_{iConfig, consumesCollector()}, hostHelper_{iConfig, consumesCollector()}, expectedInt_{iConfig.getParameter("expectedInt")} {} @@ -44,6 +46,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void produce(device::Event& iEvent, device::EventSetup const& iSetup) override { iEvent.emplace(putToken_, helper_.moveFrom()); + iEvent.emplace(putTokenMulti2_, helper_.moveFromMulti2()); + iEvent.emplace(putTokenMulti3_, helper_.moveFromMulti3()); } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { @@ -57,6 +61,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { private: const device::ESGetToken esTokenDevice_; const edm::EDPutTokenT putToken_; + const edm::EDPutTokenT putTokenMulti2_; + const edm::EDPutTokenT putTokenMulti3_; TestHelperClass helper_; cms::alpakatest::TestHostOnlyHelperClass const hostHelper_; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc index a6c9a7370d717..aa4c26e4c93bd 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.cc @@ -5,6 +5,8 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { TestHelperClass::TestHelperClass(edm::ParameterSet const& iConfig, edm::ConsumesCollector iC) : getToken_(iC.consumes(iConfig.getParameter("source"))), + getTokenMulti2_(iC.consumes(iConfig.getParameter("source"))), + getTokenMulti3_(iC.consumes(iConfig.getParameter("source"))), esTokenHost_(iC.esConsumes()), esTokenDevice_(iC.esConsumes()) {} @@ -14,9 +16,15 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { [[maybe_unused]] auto esDataHostHandle = iSetup.getHandle(esTokenHost_); [[maybe_unused]] auto const& esDataDevice = iSetup.getData(esTokenDevice_); portabletest::TestDeviceCollection const& deviceProduct = iEvent.get(getToken_); + portabletest::TestDeviceMultiCollection2 const& deviceProductMulti2 = iEvent.get(getTokenMulti2_); + portabletest::TestDeviceMultiCollection3 const& deviceProductMulti3 = iEvent.get(getTokenMulti3_); hostProduct_ = portabletest::TestHostCollection{deviceProduct->metadata().size(), iEvent.queue()}; + hostProductMulti2_ = portabletest::TestHostMultiCollection2{deviceProductMulti2.sizes(), iEvent.queue()}; + hostProductMulti3_ = portabletest::TestHostMultiCollection3{deviceProductMulti3.sizes(), iEvent.queue()}; alpaka::memcpy(iEvent.queue(), hostProduct_.buffer(), deviceProduct.const_buffer()); + alpaka::memcpy(iEvent.queue(), hostProductMulti2_.buffer(), deviceProductMulti2.const_buffer()); + alpaka::memcpy(iEvent.queue(), hostProductMulti3_.buffer(), deviceProductMulti3.const_buffer()); } } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h index bc10779d9229d..d592fb40b4210 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestHelperClass.h @@ -24,14 +24,20 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { void makeAsync(device::Event const& iEvent, device::EventSetup const& iSetup); portabletest::TestHostCollection moveFrom() { return std::move(hostProduct_); } + portabletest::TestHostMultiCollection2 moveFromMulti2() { return std::move(hostProductMulti2_); } + portabletest::TestHostMultiCollection3 moveFromMulti3() { return std::move(hostProductMulti3_); } private: const device::EDGetToken getToken_; + const device::EDGetToken getTokenMulti2_; + const device::EDGetToken getTokenMulti3_; const edm::ESGetToken esTokenHost_; const device::ESGetToken esTokenDevice_; // hold the output product between acquire() and produce() portabletest::TestHostCollection hostProduct_; + portabletest::TestHostMultiCollection2 hostProductMulti2_; + portabletest::TestHostMultiCollection3 hostProductMulti3_; }; } // namespace ALPAKA_ACCELERATOR_NAMESPACE diff --git a/HeterogeneousCore/AlpakaTest/test/writer.py b/HeterogeneousCore/AlpakaTest/test/writer.py index d23ac528629b8..87c0fc37e45e3 100644 --- a/HeterogeneousCore/AlpakaTest/test/writer.py +++ b/HeterogeneousCore/AlpakaTest/test/writer.py @@ -15,6 +15,8 @@ # either run the producer on a gpu (if available) and copy the product to the cpu, or run the producer directly on the cpu process.testProducer = cms.EDProducer('TestAlpakaProducer@alpaka', size = cms.int32(42), + size2 = cms.int32(33), + size3 = cms.int32(61) # alpaka.backend can be set to a specific backend to force using it, or be omitted or left empty to use the defult backend; # depending on the architecture and available hardware, the supported backends are "serial_sync", "cuda_async", "rocm_async" #alpaka = cms.untracked.PSet( @@ -33,7 +35,9 @@ # run a second producer explicitly on the cpu process.testProducerSerial = makeSerialClone(process.testProducer, - size = cms.int32(99) + size = cms.int32(99), + size2 = cms.int32(51), + size3 = cms.int32(43) ) # analyse the second set of products