Add Statistics Resource Adaptor and cython bindings to `tracking_reso…

…urce_adaptor` and `statistics_resource_adaptor` (#626) Closes #622 and Closes #623 This PR updates the C++ `tracking_resource_adaptor` with stack trace information and also adds a new MR, `statistics_resource_adaptor`. Summary of all changes: - `tracking_resource_adaptor` changes: - Added Cython wrapper `rmm.mr.TrackingResourceAdaptor` which wraps all available methods - Updated `tracking_resource_adaptor` to correctly log stack trace information with `capture_stacks=True` - Added `statistics_resource_adaptor` memory resource: - This MR will keep track of the current, peak and total allocated bytes and number of allocations - Added Cython wrapper `rmm.mr.StatisticsResourceAdaptor` which wraps all available methods These two MR can be used separately and together to track memory allocations, check for memory leaks, and identify incorrect deallocations. While both MRs can track the current number of allocations, they have different areas of focus. The `tracking_resource_adaptor` is designed more towards identifying and fixing memory leaks, and will log stack trace information for every memory allocation. This MR will have significant performance impacts since it logs a large amount of information for every allocation. The `statistics_resource_adaptor` is a lightweight MR that adds simple counters to track the allocated bytes and allocation count. This MR will have significantly less of a performance impact but cannot identify the cause of memory leaks, only that they exist. This MR is also great at tracking peak memory usage and can be helpful in identifying areas that require large amounts of memory or helping developers measure memory usage reductions during optimization. Authors: - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Keith Kraus (https://github.com/kkraus14) - Rong Ou (https://github.com/rongou) - Mark Harris (https://github.com/harrism) - Jake Hemstad (https://github.com/jrhemstad) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #626
rapidsai · Jun 8, 2021 · 2f85111 · 2f85111
1 parent aa2a2f3
commit 2f85111
Show file tree

Hide file tree

Showing 11 changed files with 922 additions and 11 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -81,6 +81,7 @@ endif(CUDA_STATIC_RUNTIME)
 
 target_link_libraries(rmm INTERFACE rmm::Thrust)
 target_link_libraries(rmm INTERFACE spdlog::spdlog_header_only)
+target_link_libraries(rmm INTERFACE dl)
 target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 
 # Set logging level. Must go before including gtests and benchmarks.

diff --git a/include/rmm/detail/stack_trace.hpp b/include/rmm/detail/stack_trace.hpp
@@ -25,6 +25,8 @@
 #include <sstream>
 
 #if defined(RMM_ENABLE_STACK_TRACES)
+#include <cxxabi.h>
+#include <dlfcn.h>
 #include <execinfo.h>
 #include <memory>
 #include <vector>
@@ -60,12 +62,32 @@ class stack_trace {
 #if defined(RMM_ENABLE_STACK_TRACES)
     std::unique_ptr<char*, decltype(&::free)> strings(
       backtrace_symbols(st.stack_ptrs.data(), st.stack_ptrs.size()), &::free);
+
     if (strings.get() == nullptr) {
       os << "But no stack trace could be found!" << std::endl;
     } else {
-      ///@todo: support for demangling of C++ symbol names
+      // Iterate over the stack pointers converting to a string
       for (std::size_t i = 0; i < st.stack_ptrs.size(); ++i) {
-        os << "#" << i << " in " << strings.get()[i] << std::endl;
+        // Leading index
+        os << "#" << i << " in ";
+
+        auto const str = [&] {
+          Dl_info info;
+          if (dladdr(st.stack_ptrs[i], &info)) {
+            int status = -1;  // Demangle the name. This can occasionally fail
+
+            std::unique_ptr<char, decltype(&::free)> demangled(
+              abi::__cxa_demangle(info.dli_sname, nullptr, 0, &status), &::free);
+            // If it fails, fallback to the dli_name.
+            if (status == 0 or info.dli_sname) {
+              auto name = status == 0 ? demangled.get() : info.dli_sname;
+              return name + std::string(" from ") + info.dli_fname;
+            }
+          }
+          return std::string(strings.get()[i]);
+        }();
+
+        os << str << std::endl;
       }
     }
 #else

diff --git a/include/rmm/mr/device/statistics_resource_adaptor.hpp b/include/rmm/mr/device/statistics_resource_adaptor.hpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <mutex>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <shared_mutex>
+
+namespace rmm {
+namespace mr {
+/**
+ * @brief Resource that uses `Upstream` to allocate memory and tracks statistics
+ * on memory allocations.
+ *
+ * An instance of this resource can be constructed with an existing, upstream
+ * resource in order to satisfy allocation requests, but any existing
+ * allocations will be untracked. Tracking statistics stores the current, peak
+ * and total memory allocations for both the number of bytes and number of calls
+ * to the memory resource. `statistics_resource_adaptor` is intended as a debug
+ * adaptor and shouldn't be used in performance-sensitive code.
+ *
+ * @tparam Upstream Type of the upstream resource used for
+ * allocation/deallocation.
+ */
+template <typename Upstream>
+class statistics_resource_adaptor final : public device_memory_resource {
+ public:
+  // can be a std::shared_mutex once C++17 is adopted
+  using read_lock_t  = std::shared_lock<std::shared_timed_mutex>;
+  using write_lock_t = std::unique_lock<std::shared_timed_mutex>;
+
+  /**
+   * @brief Utility struct for counting the current, peak, and total value of a number
+   */
+  struct counter {
+    int64_t value{0};  // Current value
+    int64_t peak{0};   // Max value of `value`
+    int64_t total{0};  // Sum of all added values
+
+    counter& operator+=(int64_t x)
+    {
+      value += x;
+      total += x;
+      peak = std::max(value, peak);
+      return *this;
+    }
+
+    counter& operator-=(int64_t x)
+    {
+      value -= x;
+      return *this;
+    }
+  };
+
+  /**
+   * @brief Construct a new statistics resource adaptor using `upstream` to satisfy
+   * allocation requests.
+   *
+   * @throws `rmm::logic_error` if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   */
+  statistics_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  {
+    RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  statistics_resource_adaptor()                                   = delete;
+  virtual ~statistics_resource_adaptor()                          = default;
+  statistics_resource_adaptor(statistics_resource_adaptor const&) = delete;
+  statistics_resource_adaptor(statistics_resource_adaptor&&)      = default;
+  statistics_resource_adaptor& operator=(statistics_resource_adaptor const&) = delete;
+  statistics_resource_adaptor& operator=(statistics_resource_adaptor&&) = default;
+
+  /**
+   * @brief Return pointer to the upstream resource.
+   *
+   * @return Upstream* Pointer to the upstream resource.
+   */
+  Upstream* get_upstream() const noexcept { return upstream_; }
+
+  /**
+   * @brief Checks whether the upstream resource supports streams.
+   *
+   * @return true The upstream resource supports streams
+   * @return false The upstream resource does not support streams.
+   */
+  bool supports_streams() const noexcept override { return upstream_->supports_streams(); }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return bool true if the upstream resource supports get_mem_info, false otherwise.
+   */
+  bool supports_get_mem_info() const noexcept override
+  {
+    return upstream_->supports_get_mem_info();
+  }
+
+  /**
+   * @brief Returns a `counter` struct for this adaptor containing the current,
+   * peak, and total number of allocated bytes for this
+   * adaptor since it was created.
+   *
+   * @return counter struct containing bytes count
+   */
+  counter get_bytes_counter() const noexcept
+  {
+    read_lock_t lock(mtx_);
+
+    return bytes_;
+  }
+
+  /**
+   * @brief Returns a `counter` struct for this adaptor containing the current,
+   * peak, and total number of allocation counts for this adaptor since it was
+   * created.
+   *
+   * @return counter struct containing allocations count
+   */
+  counter get_allocations_counter() const noexcept
+  {
+    read_lock_t lock(mtx_);
+
+    return allocations_;
+  }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using the upstream
+   * resource as long as it fits inside the allocation limit.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   * by the upstream resource.
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @param stream Stream on which to perform the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
+  {
+    void* p = upstream_->allocate(bytes, stream);
+
+    // increment the stats
+    {
+      write_lock_t lock(mtx_);
+
+      // Increment the allocation_count_ while we have the lock
+      bytes_ += bytes;
+      allocations_ += 1;
+    }
+
+    return p;
+  }
+
+  /**
+   * @brief Free allocation of size `bytes` pointed to by `p`
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   * @param bytes Size of the allocation
+   * @param stream Stream on which to perform the deallocation
+   */
+  void do_deallocate(void* p, std::size_t bytes, cuda_stream_view stream) override
+  {
+    upstream_->deallocate(p, bytes, stream);
+
+    {
+      write_lock_t lock(mtx_);
+
+      // Decrement the current allocated counts.
+      bytes_ -= bytes;
+      allocations_ -= 1;
+    }
+  }
+
+  /**
+   * @brief Compare the upstream resource to another.
+   *
+   * @throws Nothing.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    if (this == &other)
+      return true;
+    else {
+      auto cast = dynamic_cast<statistics_resource_adaptor<Upstream> const*>(&other);
+      return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
+                             : upstream_->is_equal(other);
+    }
+  }
+
+  /**
+   * @brief Get free and available memory from upstream resource.
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   *
+   * @param stream Stream on which to get the mem info.
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  std::pair<std::size_t, std::size_t> do_get_mem_info(cuda_stream_view stream) const override
+  {
+    return upstream_->get_mem_info(stream);
+  }
+
+  counter bytes_;                        // peak, current and total allocated bytes
+  counter allocations_;                  // peak, current and total allocation count
+  std::shared_timed_mutex mutable mtx_;  // mutex for thread safe access to allocations_
+  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+};
+
+/**
+ * @brief Convenience factory to return a `statistics_resource_adaptor` around the
+ * upstream resource `upstream`.
+ *
+ * @tparam Upstream Type of the upstream `device_memory_resource`.
+ * @param upstream Pointer to the upstream resource
+ */
+template <typename Upstream>
+statistics_resource_adaptor<Upstream> make_statistics_adaptor(Upstream* upstream)
+{
+  return statistics_resource_adaptor<Upstream>{upstream};
+}
+
+}  // namespace mr
+}  // namespace rmm
diff --git a/include/rmm/mr/device/tracking_resource_adaptor.hpp b/include/rmm/mr/device/tracking_resource_adaptor.hpp
@@ -81,7 +81,7 @@ class tracking_resource_adaptor final : public device_memory_resource {
   }
 
   tracking_resource_adaptor()                                 = delete;
-  ~tracking_resource_adaptor()                                = default;
+  virtual ~tracking_resource_adaptor()                        = default;
   tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;
   tracking_resource_adaptor(tracking_resource_adaptor&&)      = default;
   tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;
@@ -136,24 +136,42 @@ class tracking_resource_adaptor final : public device_memory_resource {
   std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; }
 
   /**
-   * @brief Log any outstanding allocations via RMM_LOG_DEBUG
+   * @brief Gets a string containing the outstanding allocation pointers, their
+   * size, and optionally the stack trace for when each pointer was allocated.
+   *
+   * Stack traces are only included if this resource adaptor was created with
+   * `capture_stack == true`. Otherwise, outstanding allocation pointers will be
+   * shown with their size and empty stack traces.
    *
+   * @return std::string Containing the outstanding allocation pointers.
    */
-  void log_outstanding_allocations() const
+  std::string get_outstanding_allocations_str() const
   {
-#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
     read_lock_t lock(mtx_);
-    if (not allocations_.empty()) {
-      std::ostringstream oss;
+
+    std::ostringstream oss;
+
+    if (!allocations_.empty()) {
       for (auto const& al : allocations_) {
         oss << al.first << ": " << al.second.allocation_size << " B";
         if (al.second.strace != nullptr) {
           oss << " : callstack:" << std::endl << *al.second.strace;
         }
         oss << std::endl;
       }
-      RMM_LOG_DEBUG("Outstanding Allocations: {}", oss.str());
     }
+
+    return oss.str();
+  }
+
+  /**
+   * @brief Log any outstanding allocations via RMM_LOG_DEBUG
+   *
+   */
+  void log_outstanding_allocations() const
+  {
+#if SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
+    RMM_LOG_DEBUG("Outstanding Allocations: {}", get_outstanding_allocations_str());
 #endif  // SPDLOG_ACTIVE_LEVEL <= SPDLOG_LEVEL_DEBUG
   }
 
@@ -199,7 +217,33 @@ class tracking_resource_adaptor final : public device_memory_resource {
     upstream_->deallocate(p, bytes, stream);
     {
       write_lock_t lock(mtx_);
-      allocations_.erase(p);
+
+      const auto found = allocations_.find(p);
+
+      // Ensure the allocation is found and the number of bytes match
+      if (found == allocations_.end()) {
+        // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call
+        // std::terminate
+        RMM_LOG_ERROR(
+          "Deallocating a pointer that was not tracked. Ptr: {:p} [{}B], Current Num. Allocations: "
+          "{}",
+          fmt::ptr(p),
+          bytes,
+          this->allocations_.size());
+      } else {
+        allocations_.erase(found);
+
+        auto allocated_bytes = found->second.allocation_size;
+
+        if (allocated_bytes != bytes) {
+          // Don't throw but log an error. Throwing in a descructor (or any noexcept) will call
+          // std::terminate
+          RMM_LOG_ERROR(
+            "Alloc bytes ({}) and Dealloc bytes ({}) do not match", allocated_bytes, bytes);
+
+          bytes = allocated_bytes;
+        }
+      }
     }
     allocated_bytes_ -= bytes;
   }