Remove the usage of ExampleDefaultAcc in all examples.

Examples will now be executed for all enabled accelerators. Fix that `ExampleDefaultAcc` was used in the test `MapIdxPitchBytes`.
psychocoderHPC · Jun 5, 2024 · af8f62c · af8f62c
1 parent c5ec200
commit af8f62c
Show file tree

Hide file tree

Showing 21 changed files with 453 additions and 171 deletions.
diff --git a/cmake/alpakaCommon.cmake b/cmake/alpakaCommon.cmake
@@ -581,7 +581,7 @@ if(alpaka_ACC_GPU_HIP_ENABLE)
         # https://github.com/llvm/llvm-project/commit/b86e0992bfa6
         # https://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#150
         # for example, is required to create alpaka::EnabledAccTags
-        # TODO(SimeonEhrig): restict HIP version, if first HIP version is release using Clang 19 
+        # TODO(SimeonEhrig): restict HIP version, if first HIP version is release using Clang 19
         alpaka_set_compiler_options(HOST_DEVICE target alpaka "$<$<COMPILE_LANGUAGE:HIP>:SHELL:-frelaxed-template-template-args>")
 
         alpaka_compiler_option(HIP_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps 'CMakeFiles/<targetname>.dir'" OFF)

diff --git a/docs/source/basic/library.rst b/docs/source/basic/library.rst
@@ -124,7 +124,7 @@ Kernels can also be defined via lambda expressions.
 
       int main() {
           // ...
-	  using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+	  using Acc = alpaka::AccGpuCudaRt<Dim, Idx>;
 
 	  auto kernel = [] ALPAKA_FN_ACC (Acc const & acc /* , ... */) -> void {
 	      // ...

diff --git a/example/bufferCopy/src/bufferCopy.cpp b/example/bufferCopy/src/bufferCopy.cpp
@@ -4,7 +4,7 @@
  */
 
 #include <alpaka/alpaka.hpp>
-#include <alpaka/example/ExampleDefaultAcc.hpp>
+#include <alpaka/example/ExecuteForEachAccTag.hpp>
 
 #include <cstdint>
 #include <iostream>
@@ -64,7 +64,12 @@ struct FillBufferKernel
     }
 };
 
-auto main() -> int
+// In standard projects, you typically do not execute the code with any available accelerator.
+// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
+// selected accelerator only. If you use the example as the starting point for your project, you can rename the
+// example() function to main() and move the accelerator tag to the function body.
+template<typename TAccTag>
+auto example(TAccTag const&) -> int
 {
     // Define the index domain
     using Dim = alpaka::DimInt<3u>;
@@ -81,7 +86,7 @@ auto main() -> int
     // - AccCpuTbbBlocks
     // - AccCpuSerial
     // using Acc = alpaka::AccCpuSerial<Dim, Idx>;
-    using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+    using Acc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
     std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;
     // Defines the synchronization behavior of a queue
     //
@@ -90,12 +95,6 @@ auto main() -> int
     using DevQueue = alpaka::Queue<Acc, AccQueueProperty>;
 
     // Define the device accelerator
-    //
-    // It is possible to choose from a set of accelerators:
-    // - AccCpuThreads
-    // - AccCpuOmp2Threads
-    // - AccCpuOmp2Blocks
-    // - AccCpuSerial
     using Host = alpaka::AccCpuSerial<Dim, Idx>;
     // Defines the synchronization behavior of a queue
     //
@@ -257,3 +256,19 @@ auto main() -> int
 
     return EXIT_SUCCESS;
 }
+
+auto main() -> int
+{
+    // Execute the example once for each enabled accelerator.
+    // If you would like to execute it for a single accelerator only you can use the following code.
+    //  \code{.cpp}
+    //  auto tag = TagCpuSerial;
+    //  return example(tag);
+    //  \endcode
+    //
+    // valid tags:
+    //   TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
+    //   TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
+    //   TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
+    return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
+}
diff --git a/example/complex/src/complex.cpp b/example/complex/src/complex.cpp
@@ -3,7 +3,7 @@
  */
 
 #include <alpaka/alpaka.hpp>
-#include <alpaka/example/ExampleDefaultAcc.hpp>
+#include <alpaka/example/ExecuteForEachAccTag.hpp>
 
 #include <cstdint>
 #include <iostream>
@@ -28,29 +28,17 @@ struct ComplexKernel
     }
 };
 
-auto main() -> int
+// In standard projects, you typically do not execute the code with any available accelerator.
+// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
+// selected accelerator only. If you use the example as the starting point for your project, you can rename the
+// example() function to main() and move the accelerator tag to the function body.
+template<typename TAccTag>
+auto example(TAccTag const&) -> int
 {
     using Idx = std::size_t;
 
     // Define the accelerator
-    //
-    // It is possible to choose from a set of accelerators:
-    // - AccGpuCudaRt
-    // - AccGpuHipRt
-    // - AccCpuThreads
-    // - AccCpuOmp2Threads
-    // - AccCpuOmp2Blocks
-    // - AccCpuTbbBlocks
-    // - AccCpuSerial
-    //
-    // Each accelerator has strengths and weaknesses. Therefore,
-    // they need to be choosen carefully depending on the actual
-    // use case. Furthermore, some accelerators only support a
-    // particular workdiv, but workdiv can also be generated
-    // automatically.
-
-    // By exchanging the Acc and Queue types you can select where to execute the kernel.
-    using Acc = alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, Idx>;
+    using Acc = alpaka::TagToAcc<TAccTag, alpaka::DimInt<1>, Idx>;
     std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;
 
     // Defines the synchronization behavior of a queue
@@ -88,3 +76,19 @@ auto main() -> int
 
     return EXIT_SUCCESS;
 }
+
+auto main() -> int
+{
+    // Execute the example once for each enabled accelerator.
+    // If you would like to execute it for a single accelerator only you can use the following code.
+    //  \code{.cpp}
+    //  auto tag = TagCpuSerial;
+    //  return example(tag);
+    //  \endcode
+    //
+    // valid tags:
+    //   TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
+    //   TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
+    //   TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
+    return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
+}
diff --git a/example/convolution1D/src/convolution1D.cpp b/example/convolution1D/src/convolution1D.cpp
@@ -3,7 +3,7 @@
  */
 
 #include <alpaka/alpaka.hpp>
-#include <alpaka/example/ExampleDefaultAcc.hpp>
+#include <alpaka/example/ExecuteForEachAccTag.hpp>
 
 #include <cmath>
 #include <iomanip>
@@ -64,7 +64,12 @@ auto FuzzyEqual(float a, float b) -> bool
     return std::fabs(a - b) < std::numeric_limits<float>::epsilon() * 10.0f;
 }
 
-auto main() -> int
+// In standard projects, you typically do not execute the code with any available accelerator.
+// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
+// selected accelerator only. If you use the example as the starting point for your project, you can rename the
+// example() function to main() and move the accelerator tag to the function body.
+template<typename TAccTag>
+auto example(TAccTag const&) -> int
 {
     // Size of 1D arrays to be used in convolution integral
     // Here instead of "convolution kernel" the term "filter" is used because kernel has a different meaning in GPU
@@ -80,7 +85,7 @@ auto main() -> int
     using Idx = std::size_t;
 
     // Define the accelerator
-    using DevAcc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+    using DevAcc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
     using QueueProperty = alpaka::Blocking;
     using QueueAcc = alpaka::Queue<DevAcc, QueueProperty>;
     using BufAcc = alpaka::Buf<DevAcc, DataType, Dim, Idx>;
@@ -176,3 +181,19 @@ auto main() -> int
     std::cout << "All results are correct!\n";
     return EXIT_SUCCESS;
 }
+
+auto main() -> int
+{
+    // Execute the example once for each enabled accelerator.
+    // If you would like to execute it for a single accelerator only you can use the following code.
+    //  \code{.cpp}
+    //  auto tag = TagCpuSerial;
+    //  return example(tag);
+    //  \endcode
+    //
+    // valid tags:
+    //   TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
+    //   TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
+    //   TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
+    return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
+}
diff --git a/example/convolution2D/src/convolution2D.cpp b/example/convolution2D/src/convolution2D.cpp
@@ -3,7 +3,7 @@
  */
 
 #include <alpaka/alpaka.hpp>
-#include <alpaka/example/ExampleDefaultAcc.hpp>
+#include <alpaka/example/ExecuteForEachAccTag.hpp>
 
 #include <iomanip>
 #include <iostream>
@@ -208,15 +208,20 @@ auto FuzzyEqual(float a, float b) -> bool
     return std::fabs(a - b) < std::numeric_limits<float>::epsilon() * 1000.0f;
 }
 
-auto main() -> int
+// In standard projects, you typically do not execute the code with any available accelerator.
+// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
+// selected accelerator only. If you use the example as the starting point for your project, you can rename the
+// example() function to main() and move the accelerator tag to the function body.
+template<typename TAccTag>
+auto example(TAccTag const&) -> int
 {
     // Define the index domain
     using Dim = alpaka::DimInt<2>;
     // Index type
     using Idx = std::uint32_t;
     using Vec = alpaka::Vec<Dim, Idx>;
     // Define the accelerator
-    using DevAcc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+    using DevAcc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
     using QueueAcc = alpaka::Queue<DevAcc, alpaka::NonBlocking>;
 
     using DataType = float;
@@ -379,3 +384,19 @@ auto main() -> int
     std::cout << "Sampled result checks are correct!\n";
     return EXIT_SUCCESS;
 }
+
+auto main() -> int
+{
+    // Execute the example once for each enabled accelerator.
+    // If you would like to execute it for a single accelerator only you can use the following code.
+    //  \code{.cpp}
+    //  auto tag = TagCpuSerial;
+    //  return example(tag);
+    //  \endcode
+    //
+    // valid tags:
+    //   TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
+    //   TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
+    //   TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
+    return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
+}
diff --git a/example/counterBasedRng/src/counterBasedRng.cpp b/example/counterBasedRng/src/counterBasedRng.cpp
@@ -3,7 +3,7 @@
  */
 
 #include <alpaka/alpaka.hpp>
-#include <alpaka/example/ExampleDefaultAcc.hpp>
+#include <alpaka/example/ExecuteForEachAccTag.hpp>
 #include <alpaka/rand/RandPhiloxStateless.hpp>
 
 #include <chrono>
@@ -92,25 +92,19 @@ class CounterBasedRngKernel
     }
 };
 
-auto main() -> int
+// In standard projects, you typically do not execute the code with any available accelerator.
+// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
+// selected accelerator only. If you use the example as the starting point for your project, you can rename the
+// example() function to main() and move the accelerator tag to the function body.
+template<typename TAccTag>
+auto example(TAccTag const&) -> int
 {
     // Define the index domain
     using Dim = alpaka::DimInt<3u>;
     using Idx = std::size_t;
 
     // Define the accelerator
-    //
-    // It is possible to choose from a set of accelerators:
-    // - AccGpuCudaRt
-    // - AccGpuHipRt
-    // - AccCpuThreads
-    // - AccCpuFibers
-    // - AccCpuOmp2Threads
-    // - AccCpuOmp2Blocks
-    // - AccCpuTbbBlocks
-    // - AccCpuSerial
-    // using Acc = alpaka::AccCpuSerial<Dim, Idx>;
-    using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+    using Acc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
     std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;
 
     using AccHost = alpaka::AccCpuSerial<Dim, Idx>;
@@ -222,3 +216,19 @@ auto main() -> int
         return EXIT_FAILURE;
     }
 }
+
+auto main() -> int
+{
+    // Execute the example once for each enabled accelerator.
+    // If you would like to execute it for a single accelerator only you can use the following code.
+    //  \code{.cpp}
+    //  auto tag = TagCpuSerial;
+    //  return example(tag);
+    //  \endcode
+    //
+    // valid tags:
+    //   TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
+    //   TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
+    //   TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
+    return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
+}
diff --git a/example/heatEquation/src/heatEquation.cpp b/example/heatEquation/src/heatEquation.cpp
@@ -3,7 +3,7 @@
  */
 
 #include <alpaka/alpaka.hpp>
-#include <alpaka/example/ExampleDefaultAcc.hpp>
+#include <alpaka/example/ExecuteForEachAccTag.hpp>
 
 #include <algorithm>
 #include <cmath>
@@ -62,7 +62,13 @@ auto exactSolution(double const x, double const t) -> double
 //! Every time step the kernel will be executed numNodesX-times
 //! After every step the curr-buffer will be set to the calculated values
 //! from the next-buffer.
-auto main() -> int
+//!
+//! In standard projects, you typically do not execute the code with any available accelerator.
+//! Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
+//! selected accelerator only. If you use the example as the starting point for your project, you can rename the
+//! example() function to main() and move the accelerator tag to the function body.
+template<typename TAccTag>
+auto example(TAccTag const&) -> int
 {
     // Parameters (a user is supposed to change numNodesX, numTimeSteps)
     uint32_t const numNodesX = 1000;
@@ -84,9 +90,8 @@ auto main() -> int
     using Dim = alpaka::DimInt<1u>;
     using Idx = uint32_t;
 
-    // Select accelerator-types for host and device
-    // using Acc = alpaka::AccCpuSerial<Dim, Idx>;
-    using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
+    // Define the accelerator
+    using Acc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
     std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;
 
     // Select specific devices
@@ -179,3 +184,19 @@ auto main() -> int
         return EXIT_FAILURE;
     }
 }
+
+auto main() -> int
+{
+    // Execute the example once for each enabled accelerator.
+    // If you would like to execute it for a single accelerator only you can use the following code.
+    //  \code{.cpp}
+    //  auto tag = TagCpuSerial;
+    //  return example(tag);
+    //  \endcode
+    //
+    // valid tags:
+    //   TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
+    //   TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
+    //   TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
+    return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
+}