rapidsai · rapids-bot · Jan 18, 2024 · Nov 28, 2023 · Nov 28, 2023 · Dec 5, 2023
@@ -43,9 +43,9 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
 /**
  * @brief Returns whether or not `value` is a power of 2.
  *
- * @param[in] value to check.
+ * @param[in] value value to check.
  *
- * @return Whether the input a power of two with non-negative exponent
+ * @return True if the input a power of two with non-negative exponent, false otherwise.
- * @return True if the input a power of two with non-negative exponent, false otherwise.
+ * @return True if the input is a power of two with non-negative exponent, false otherwise.
- * @return True if the input a power of two with non-negative exponent, false otherwise.
+ * @return True if the input is a power of two with non-negative exponent, false otherwise.
  */
 [[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept
 {
@@ -57,7 +57,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  *
  * @param[in] alignment to check
  *
- * @return Whether the alignment is valid
+ * @return True if the alignment is valid, false otherwise.
  */
 [[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept
 {
@@ -70,7 +70,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  * @param[in] value value to align
  * @param[in] alignment amount, in bytes, must be a power of 2
  *
- * @return Return the aligned value, as one would expect
+ * @return the aligned value
  */
 [[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
 {
@@ -84,7 +84,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
  * @param[in] value value to align
  * @param[in] alignment amount, in bytes, must be a power of 2
  *
- * @return Return the aligned value, as one would expect
+ * @return the aligned value
  */
 [[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
 {

@@ -125,6 +125,7 @@ namespace rmm::detail {
  * from `alloc`.
  *
  * If `alignment` is not a power of 2, behavior is undefined.
+ * If `Alloc` does not allocate host-accessible memory, behavior is undefined.
  *
  * @param bytes The desired size of the allocation
  * @param alignment Desired alignment of allocation
@@ -137,7 +138,7 @@ namespace rmm::detail {
 template <typename Alloc>
 void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
 {
-  assert(rmm::is_pow2(alignment));
+  assert(rmm::is_supported_alignment(alignment));
 
   // allocate memory for bytes, plus potential alignment correction,
   // plus store of the correction offset
@@ -179,9 +180,12 @@ void* aligned_allocate(std::size_t bytes, std::size_t alignment, Alloc alloc)
  */
 template <typename Dealloc>
 // NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
-void aligned_deallocate(void* ptr, std::size_t bytes, std::size_t alignment, Dealloc dealloc)
+void aligned_deallocate(void* ptr,
+                        [[maybe_unused]] std::size_t bytes,
+                        [[maybe_unused]] std::size_t alignment,
+                        Dealloc dealloc) noexcept
 {
-  (void)alignment;
+  assert(rmm::is_supported_alignment(alignment));
 
   // Get offset from the location immediately prior to the aligned pointer
   // NOLINTNEXTLINE

@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/aligned.hpp>
+#include <rmm/detail/aligned.hpp>
+#include <rmm/detail/error.hpp>
+
+#include <cuda/memory_resource>
+#include <cuda/stream_ref>
+
+#include <cuda_runtime_api.h>
+
+#include <cstddef>
+#include <utility>
+
+namespace rmm::mr {
+
+/**
+ * @brief Memory resource class for allocating pinned host memory.
+ *
+ * This class uses CUDA's `cudaHostAlloc` to allocate pinned host memory. It implements the
+ * `cuda::mr::memory_resource` and `cuda::mr::device_memory_resource` concepts, and
+ * the `cuda::mr::host_accessible` and `cuda::mr::device_accessible` properties.
+ */
+class pinned_host_memory_resource {
+ public:
+  // Disable clang-tidy complaining about the easily swappable size and alignment parameters
+  // of allocate and deallocate
+  // NOLINTBEGIN(bugprone-easily-swappable-parameters)
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * reason.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param alignment Alignment in bytes. Default alignment is used if unspecified.
+   *
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate(std::size_t bytes,
+                        [[maybe_unused]] std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT)
+  {
+    // don't allocate anything if the user requested zero bytes
+    if (0 == bytes) { return nullptr; }
+
+    return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) {
+      void* ptr{nullptr};
+      RMM_CUDA_TRY_ALLOC(cudaHostAlloc(&ptr, size, cudaHostAllocDefault));
+      return ptr;
+    });
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param alignment Alignment in bytes. Default alignment is used if unspecified.
+   */
+  static void deallocate(void* ptr,
+                         std::size_t bytes,
+                         std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) noexcept
+  {
+    rmm::detail::aligned_deallocate(
+      ptr, bytes, alignment, [](void* ptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); });
+  }
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to allocate.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * error.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param stream CUDA stream on which to perform the allocation (ignored).
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate_async(std::size_t bytes, [[maybe_unused]] cuda::stream_ref stream)
+  {
+    return allocate(bytes);
+  }
+
+  /**
+   * @brief Allocates pinned host memory of size at least \p bytes bytes and alignment \p alignment.
+   *
+   * @note Stream argument is ignored and behavior is identical to allocate.
+   *
+   * @throws `rmm::out_of_memory` if the requested allocation could not be fulfilled due to to a
+   * CUDA out of memory error.
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled due to any other
+   * error.
+   *
+   * @param bytes The size, in bytes, of the allocation.
+   * @param alignment Alignment in bytes.
+   * @param stream CUDA stream on which to perform the allocation (ignored).
+   * @return Pointer to the newly allocated memory.
+   */
+  static void* allocate_async(std::size_t bytes,
+                              std::size_t alignment,
+                              [[maybe_unused]] cuda::stream_ref stream)
+  {
+    return allocate(bytes, alignment);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to deallocate.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   */
+  static void deallocate_async(void* ptr,
+                               std::size_t bytes,
+                               [[maybe_unused]] cuda::stream_ref stream) noexcept
+  {
+    return deallocate(ptr, bytes);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p ptr of size \p bytes bytes and alignment \p
+   * alignment bytes.
+   *
+   * @note Stream argument is ignored and behavior is identical to deallocate.
+   *
+   * @throws Nothing.
+   *
+   * @param ptr Pointer to be deallocated.
+   * @param bytes Size of the allocation.
+   * @param alignment Alignment in bytes.
+   * @param stream CUDA stream on which to perform the deallocation (ignored).
+   */
+  static void deallocate_async(void* ptr,
+                               std::size_t bytes,
+                               std::size_t alignment,
+                               [[maybe_unused]] cuda::stream_ref stream) noexcept
+  {
+    return deallocate(ptr, bytes, alignment);
+  }
+  // NOLINTEND(bugprone-easily-swappable-parameters)
+
+  /**
+   * @briefreturn{true if the specified resource is the same type as this resource, otherwise
+   * false.}
+   */
+  bool operator==(const pinned_host_memory_resource&) const { return true; }
+
+  /**
+   * @briefreturn{true if the specified resource is not the same type as this resource, otherwise
+   * false.}
+   */
+  bool operator!=(const pinned_host_memory_resource&) const { return false; }
+
+  /**
+   * @brief Query whether the resource supports reporting free and available memory.
+   *
+   * @return false
+   */
+  static bool supports_get_mem_info() { return false; }
+
+  /**
+   * @brief Query the total amount of memory and free memory available for allocation by this
+   * resource.
+   *
+   * @throws nothing
+   *
+   * @return std::pair containing 0 for both total and free memory.
+   */
+  [[nodiscard]] static std::pair<std::size_t, std::size_t> get_mem_info(cuda::stream_ref) noexcept
+  {
+    return {0, 0};
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::device_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides device accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::device_accessible) noexcept
+  {
+  }
+
+  /**
+   * @brief Enables the `cuda::mr::host_accessible` property
+   *
+   * This property declares that a `pinned_host_memory_resource` provides host accessible memory
+   */
+  friend void get_property(pinned_host_memory_resource const&, cuda::mr::host_accessible) noexcept
+  {
+  }
+};
+
+static_assert(cuda::mr::async_resource_with<pinned_host_memory_resource,
+                                            cuda::mr::device_accessible,
+                                            cuda::mr::host_accessible>);
+}  // namespace rmm::mr
@@ -54,11 +54,25 @@ namespace rmm::test {
  * @brief Returns if a pointer points to a device memory or managed memory
  * allocation.
  */
-inline bool is_device_memory(void* ptr)
+inline bool is_device_accessible_memory(void* ptr)
 {
   cudaPointerAttributes attributes{};
   if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; }
-  return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged);
+  return (attributes.type == cudaMemoryTypeDevice) or (attributes.type == cudaMemoryTypeManaged) or
+         ((attributes.type == cudaMemoryTypeHost) and (attributes.devicePointer != nullptr));
+}
+
+inline bool is_host_memory(void* ptr)
+{
+  cudaPointerAttributes attributes{};
+  if (cudaSuccess != cudaPointerGetAttributes(&attributes, ptr)) { return false; }
+  return attributes.type == cudaMemoryTypeHost;
+}
+
+inline bool is_properly_aligned(void* ptr)
+{
+  if (is_host_memory(ptr)) { return rmm::is_pointer_aligned(ptr, rmm::RMM_DEFAULT_HOST_ALIGNMENT); }
+  return rmm::is_pointer_aligned(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT);
 }
 
 enum size_in_bytes : size_t {};
@@ -79,8 +93,8 @@ inline void test_allocate(resource_ref ref, std::size_t bytes)
   try {
     void* ptr = ref.allocate(bytes);
     EXPECT_NE(nullptr, ptr);
-    EXPECT_TRUE(rmm::is_pointer_aligned(ptr));
-    EXPECT_TRUE(is_device_memory(ptr));
+    EXPECT_TRUE(is_properly_aligned(ptr));
+    EXPECT_TRUE(is_device_accessible_memory(ptr));
     ref.deallocate(ptr, bytes);
   } catch (rmm::out_of_memory const& e) {
     EXPECT_NE(std::string{e.what()}.find("out_of_memory"), std::string::npos);
@@ -95,8 +109,8 @@ inline void test_allocate_async(async_resource_ref ref,
     void* ptr = ref.allocate_async(bytes, stream);
     if (not stream.is_default()) { stream.synchronize(); }
     EXPECT_NE(nullptr, ptr);
-    EXPECT_TRUE(rmm::is_pointer_aligned(ptr));
-    EXPECT_TRUE(is_device_memory(ptr));
+    EXPECT_TRUE(is_properly_aligned(ptr));
+    EXPECT_TRUE(is_device_accessible_memory(ptr));
     ref.deallocate_async(ptr, bytes, stream);
     if (not stream.is_default()) { stream.synchronize(); }
   } catch (rmm::out_of_memory const& e) {
@@ -203,7 +217,7 @@ inline void test_random_allocations(resource_ref ref,
       alloc.size = distribution(generator);
       EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size));
       EXPECT_NE(nullptr, alloc.ptr);
-      EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr));
+      EXPECT_TRUE(is_properly_aligned(alloc.ptr));
     });
 
   std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) {
@@ -229,7 +243,7 @@ inline void test_random_async_allocations(async_resource_ref ref,
                   EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size));
                   if (not stream.is_default()) { stream.synchronize(); }
                   EXPECT_NE(nullptr, alloc.ptr);
-                  EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr));
+                  EXPECT_TRUE(is_properly_aligned(alloc.ptr));
                 });
 
   std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) {
@@ -270,7 +284,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref,
       EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size));
       auto new_allocation = allocations.back();
       EXPECT_NE(nullptr, new_allocation.ptr);
-      EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr));
+      EXPECT_TRUE(is_properly_aligned(new_allocation.ptr));
     } else {
       auto const index = static_cast<int>(index_distribution(generator) % active_allocations);
       active_allocations--;
@@ -317,7 +331,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref,
       EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size));
       auto new_allocation = allocations.back();
       EXPECT_NE(nullptr, new_allocation.ptr);
-      EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr));
+      EXPECT_TRUE(is_properly_aligned(new_allocation.ptr));
     } else {
       auto const index = static_cast<int>(index_distribution(generator) % active_allocations);
       active_allocations--;