Skip to content

Commit

Permalink
Rework internal allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
tpadioleau committed Oct 9, 2024
1 parent 1fd4a85 commit 0892e27
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 84 deletions.
59 changes: 29 additions & 30 deletions common/src/KokkosFFT_transpose.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ auto get_map_axes(const ViewType& view, int axis) {
return get_map_axes(view, axis_type<1>({axis}));
}

template <class ViewType>
axis_type<ViewType::rank()> compute_transpose_extents(
ViewType const& view, axis_type<ViewType::rank()> const& map) {
constexpr std::size_t rank = ViewType::rank();

std::array<int, rank> out_extents;
for (std::size_t i = 0; i < rank; ++i) {
out_extents.at(i) = view.extent(map.at(i));
}

return out_extents;
}

template <class InViewType, class OutViewType, std::size_t DIMS>
void prep_transpose_view(InViewType& in, OutViewType& out,
axis_type<DIMS> map) {
Expand Down Expand Up @@ -108,8 +121,8 @@ void prep_transpose_view(InViewType& in, OutViewType& out,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<2> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<2> /*_map*/) {
constexpr std::size_t DIM = 2;

using range_type = Kokkos::MDRangePolicy<
Expand All @@ -125,16 +138,14 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::parallel_for(
"KokkosFFT::transpose", range,
KOKKOS_LAMBDA(int i0, int i1) { out(i1, i0) = in(i0, i1); });
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<3> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<3> _map) {
constexpr std::size_t DIM = 3;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -151,8 +162,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
tile_type{{4, 4, 4}} // [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, 3> map = {_map[0], _map[1], _map[2]};
Kokkos::parallel_for(
"KokkosFFT::transpose", range, KOKKOS_LAMBDA(int i0, int i1, int i2) {
Expand All @@ -166,8 +175,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<4> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<4> _map) {
constexpr std::size_t DIM = 4;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -185,8 +194,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3]};
Kokkos::parallel_for(
"KokkosFFT::transpose", range,
Expand All @@ -202,8 +209,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<5> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<5> _map) {
constexpr std::size_t DIM = 5;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -222,8 +229,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3], _map[4]};
Kokkos::parallel_for(
"KokkosFFT::transpose", range,
Expand All @@ -240,8 +245,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<6> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<6> _map) {
constexpr std::size_t DIM = 6;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -261,8 +266,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2],
_map[3], _map[4], _map[5]};
Kokkos::parallel_for(
Expand All @@ -281,8 +284,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<7> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<7> _map) {
constexpr std::size_t DIM = 6;
constexpr std::size_t rank = InViewType::rank();

Expand All @@ -302,8 +305,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6]};
Kokkos::parallel_for(
Expand All @@ -326,8 +327,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
}

template <typename ExecutionSpace, typename InViewType, typename OutViewType>
void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<8> _map) {
void transpose_impl(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<8> _map) {
constexpr std::size_t DIM = 6;

constexpr std::size_t rank = InViewType::rank();
Expand All @@ -349,8 +350,6 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
// [TO DO] Choose optimal tile sizes for each device
);

prep_transpose_view(in, out, _map);

Kokkos::Array<int, rank> map = {_map[0], _map[1], _map[2], _map[3],
_map[4], _map[5], _map[6], _map[7]};
Kokkos::parallel_for(
Expand Down Expand Up @@ -396,8 +395,8 @@ void transpose_impl(const ExecutionSpace& exec_space, InViewType& in,
*/
template <typename ExecutionSpace, typename InViewType, typename OutViewType,
std::size_t DIM = 1>
void transpose(const ExecutionSpace& exec_space, InViewType& in,
OutViewType& out, axis_type<DIM> map) {
void transpose(const ExecutionSpace& exec_space, const InViewType& in,
const OutViewType& out, axis_type<DIM> map) {
static_assert(
KokkosFFT::Impl::are_operatable_views_v<ExecutionSpace, InViewType,
OutViewType>,
Expand Down
7 changes: 7 additions & 0 deletions common/src/KokkosFFT_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,13 @@ auto extract_extents(const ViewType& view) {
return extents;
}

template <typename Layout, std::size_t N>
Layout create_layout(const std::array<int, N>& extents) {
Layout layout;
std::copy_n(extents.begin(), N, layout.dimension);
return layout;
}

template <typename ViewType, typename Label>
void create_view(ViewType& out, const Label& label,
const std::array<int, 1>& extents) {
Expand Down
Loading

0 comments on commit 0892e27

Please sign in to comment.