alpaka-group · bernhardmgruber · Nov 16, 2023 · Nov 16, 2023 · Nov 16, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -166,6 +166,7 @@ if (LLAMA_BUILD_EXAMPLES)
 	add_subdirectory("examples/memmap")
 	add_subdirectory("examples/stream")
 	add_subdirectory("examples/falsesharing")
+	add_subdirectory("examples/comptime")
 
 	# alpaka examples
 	find_package(alpaka 1.0)

diff --git a/examples/comptime/CMakeLists.txt b/examples/comptime/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Copyright 2023 Bernhard Manfred Gruber
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+cmake_minimum_required (VERSION 3.18.3)
+project(llama-comptime CXX)
+
+set(LLAMA_COMPTIME_RECORD_DIM_SIZE 20 CACHE STRING "comptime example record dimension size")
+
+if (NOT TARGET llama::llama)
+	find_package(llama REQUIRED)
+endif()
+add_executable(${PROJECT_NAME} comptime.cpp)
+target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama)
+target_compile_definitions(${PROJECT_NAME} PRIVATE -DLLAMA_COMPTIME_RECORD_DIM_SIZE=${LLAMA_COMPTIME_RECORD_DIM_SIZE})
+
+if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
+	target_compile_options(${PROJECT_NAME} PRIVATE -fbracket-depth=1500)
+endif()
diff --git a/examples/comptime/benchmark.sh b/examples/comptime/benchmark.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# the record dim has 1095 entries
+for i in {0..1000..20}; do
+  cmake -DLLAMA_COMPTIME_RECORD_DIM_SIZE=$i .. > /dev/null 2>&1
+  s=$(\time -f "%e" make llama-comptime 2>&1 > /dev/null)
+  echo $i $s
+done
diff --git a/examples/comptime/comptime.cpp b/examples/comptime/comptime.cpp
@@ -0,0 +1,38 @@
+// Copyright 2023 Bernhard Manfred Gruber
+// SPDX-License-Identifier: LGPL-3.0-or-later
+
+#ifndef LLAMA_COMPTIME_RECORD_DIM_SIZE
+#    define LLAMA_COMPTIME_RECORD_DIM_SIZE 20
+#endif
+
+#include "../common/ttjet_13tev_june2019.hpp"
+
+#include <llama/llama.hpp>
+
+using RecordDim = boost::mp11::mp_take_c<Event, LLAMA_COMPTIME_RECORD_DIM_SIZE>;
+
+auto main() -> int
+try
+{
+    constexpr auto extents = llama::ArrayExtents{1024 * 1024};
+    using ArrayExtents = std::remove_const_t<decltype(extents)>;
+    //    const auto packedAoSMapping = llama::mapping::PackedAoS<ArrayExtents, RecordDim>{extents};
+    const auto alignedAoSMapping = llama::mapping::AlignedAoS<ArrayExtents, RecordDim>{extents};
+    //    const auto multiBlobSoAMapping = llama::mapping::MultiBlobSoA<ArrayExtents, RecordDim>{extents};
+    //    const auto aosoa8Mapping = llama::mapping::AoSoA<ArrayExtents, RecordDim, 8>{extents};
+    //    const auto aosoa32Mapping = llama::mapping::AoSoA<ArrayExtents, RecordDim, 32>{extents};
+    //    const auto aosoa64Mapping = llama::mapping::AoSoA<ArrayExtents, RecordDim, 64>{extents};
+
+    auto view = llama::allocViewUninitialized(alignedAoSMapping);
+    llama::forEachLeafCoord<RecordDim>(
+        [&](auto coord)
+        {
+            using Type = llama::GetType<Event, decltype(coord)>;
+            for(std::size_t i = 0; i < extents[0]; i++)
+                view(i)(coord) = Type{};
+        });
+}
+catch(const std::exception& e)
+{
+    std::cerr << "Exception: " << e.what() << '\n';
+}
diff --git a/include/llama/Core.hpp b/include/llama/Core.hpp
@@ -603,8 +603,9 @@ namespace llama
 
             // final padding, so next struct can start right away
             if constexpr(Align && IncludeTailPadding)
-                size = roundUpToMultiple(size, maxAlign); // TODO(bgruber): we could use flatAlignOf<TypeList> here, at
-                                                          // the cost of more template instantiations
+                if(maxAlign > 0)
+                    size = roundUpToMultiple(size, maxAlign); // TODO(bgruber): we could use flatAlignOf<TypeList>
+                                                              // here, at the cost of more template instantiations
             return size;
         }
 

diff --git a/tests/core.cpp b/tests/core.cpp
@@ -3,6 +3,16 @@
 
 #include "common.hpp"
 
+TEST_CASE("flatSizeOf")
+{
+    STATIC_REQUIRE(llama::flatSizeOf<mp_list<std::int32_t>, true, true> == 4);
+    STATIC_REQUIRE(llama::flatSizeOf<mp_list<char, std::int32_t, char>, true, true> == 12);
+    STATIC_REQUIRE(llama::flatSizeOf<mp_list<char, std::int32_t, char>, false, true> == 6);
+    STATIC_REQUIRE(llama::flatSizeOf<mp_list<char, std::int32_t, char>, true, false> == 9);
+    STATIC_REQUIRE(llama::flatSizeOf<mp_list<char, std::int32_t, char>, false, false> == 6);
+    STATIC_REQUIRE(llama::flatSizeOf<mp_list<>, true, true> == 0);
+}
+
 TEST_CASE("prettyPrintType")
 {
     auto str = prettyPrintType<Particle>();