diff --git a/CMakeLists.txt b/CMakeLists.txt index 521d7cc5db..ff46a4de2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -166,6 +166,7 @@ if (LLAMA_BUILD_EXAMPLES) add_subdirectory("examples/memmap") add_subdirectory("examples/stream") add_subdirectory("examples/falsesharing") + add_subdirectory("examples/comptime") # alpaka examples find_package(alpaka 1.0) diff --git a/examples/comptime/CMakeLists.txt b/examples/comptime/CMakeLists.txt new file mode 100644 index 0000000000..20b203584c --- /dev/null +++ b/examples/comptime/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright 2023 Bernhard Manfred Gruber +# SPDX-License-Identifier: LGPL-3.0-or-later + +cmake_minimum_required (VERSION 3.18.3) +project(llama-comptime CXX) + +set(LLAMA_COMPTIME_RECORD_DIM_SIZE 20 CACHE STRING "comptime example record dimension size") + +if (NOT TARGET llama::llama) + find_package(llama REQUIRED) +endif() +add_executable(${PROJECT_NAME} comptime.cpp) +target_link_libraries(${PROJECT_NAME} PRIVATE llama::llama) +target_compile_definitions(${PROJECT_NAME} PRIVATE -DLLAMA_COMPTIME_RECORD_DIM_SIZE=${LLAMA_COMPTIME_RECORD_DIM_SIZE}) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") + target_compile_options(${PROJECT_NAME} PRIVATE -fbracket-depth=1500) +endif() diff --git a/examples/comptime/benchmark.sh b/examples/comptime/benchmark.sh new file mode 100755 index 0000000000..aa1f9bb7bd --- /dev/null +++ b/examples/comptime/benchmark.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# the record dim has 1095 entries +for i in {0..1000..20}; do + cmake -DLLAMA_COMPTIME_RECORD_DIM_SIZE=$i .. > /dev/null 2>&1 + s=$(\time -f "%e" make llama-comptime 2>&1 > /dev/null) + echo $i $s +done diff --git a/examples/comptime/comptime.cpp b/examples/comptime/comptime.cpp new file mode 100644 index 0000000000..1158379461 --- /dev/null +++ b/examples/comptime/comptime.cpp @@ -0,0 +1,38 @@ +// Copyright 2023 Bernhard Manfred Gruber +// SPDX-License-Identifier: LGPL-3.0-or-later + +#ifndef LLAMA_COMPTIME_RECORD_DIM_SIZE +# define LLAMA_COMPTIME_RECORD_DIM_SIZE 20 +#endif + +#include "../common/ttjet_13tev_june2019.hpp" + +#include + +using RecordDim = boost::mp11::mp_take_c; + +auto main() -> int +try +{ + constexpr auto extents = llama::ArrayExtents{1024 * 1024}; + using ArrayExtents = std::remove_const_t; + // const auto packedAoSMapping = llama::mapping::PackedAoS{extents}; + const auto alignedAoSMapping = llama::mapping::AlignedAoS{extents}; + // const auto multiBlobSoAMapping = llama::mapping::MultiBlobSoA{extents}; + // const auto aosoa8Mapping = llama::mapping::AoSoA{extents}; + // const auto aosoa32Mapping = llama::mapping::AoSoA{extents}; + // const auto aosoa64Mapping = llama::mapping::AoSoA{extents}; + + auto view = llama::allocViewUninitialized(alignedAoSMapping); + llama::forEachLeafCoord( + [&](auto coord) + { + using Type = llama::GetType; + for(std::size_t i = 0; i < extents[0]; i++) + view(i)(coord) = Type{}; + }); +} +catch(const std::exception& e) +{ + std::cerr << "Exception: " << e.what() << '\n'; +} diff --git a/include/llama/Core.hpp b/include/llama/Core.hpp index 22cd85fbe0..b4ae58d379 100644 --- a/include/llama/Core.hpp +++ b/include/llama/Core.hpp @@ -603,8 +603,9 @@ namespace llama // final padding, so next struct can start right away if constexpr(Align && IncludeTailPadding) - size = roundUpToMultiple(size, maxAlign); // TODO(bgruber): we could use flatAlignOf here, at - // the cost of more template instantiations + if(maxAlign > 0) + size = roundUpToMultiple(size, maxAlign); // TODO(bgruber): we could use flatAlignOf + // here, at the cost of more template instantiations return size; } diff --git a/tests/core.cpp b/tests/core.cpp index ab2495de6d..0ab050c6d5 100644 --- a/tests/core.cpp +++ b/tests/core.cpp @@ -3,6 +3,16 @@ #include "common.hpp" +TEST_CASE("flatSizeOf") +{ + STATIC_REQUIRE(llama::flatSizeOf, true, true> == 4); + STATIC_REQUIRE(llama::flatSizeOf, true, true> == 12); + STATIC_REQUIRE(llama::flatSizeOf, false, true> == 6); + STATIC_REQUIRE(llama::flatSizeOf, true, false> == 9); + STATIC_REQUIRE(llama::flatSizeOf, false, false> == 6); + STATIC_REQUIRE(llama::flatSizeOf, true, true> == 0); +} + TEST_CASE("prettyPrintType") { auto str = prettyPrintType();