From 35bdff288f2f9aa6d1e975fce6665897d056f6c2 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 12 May 2021 11:37:55 +1000 Subject: [PATCH 1/7] Make streams explicit in device_buffer --- include/rmm/device_buffer.hpp | 51 ++++----- include/rmm/device_scalar.hpp | 4 +- tests/cuda_stream_tests.cpp | 4 +- tests/device_buffer_tests.cu | 196 ++++++++++++++++++++++------------ 4 files changed, 159 insertions(+), 96 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index 97095f3b2..61457274c 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,11 +95,11 @@ class device_buffer { * @param mr Memory resource to use for the device memory allocation. */ explicit device_buffer(std::size_t size, - cuda_stream_view stream = cuda_stream_view{}, + cuda_stream_view stream, mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { - allocate(size); + allocate_async(size, stream); } /** @@ -118,12 +118,12 @@ class device_buffer { */ device_buffer(void const* source_data, std::size_t size, - cuda_stream_view stream = cuda_stream_view{}, + cuda_stream_view stream, mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { - allocate(size); - copy(source_data, size); + allocate_async(size, stream); + copy_async(source_data, size, stream); } /** @@ -143,7 +143,7 @@ class device_buffer { * @param mr The resource to use for allocating the new `device_buffer` */ device_buffer(device_buffer const& other, - cuda_stream_view stream = cuda_stream_view{}, + cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) : device_buffer{other.data(), other.size(), stream, mr} { @@ -203,14 +203,14 @@ class device_buffer { // compatible, just reuse the existing memory if ((capacity() > other.size()) and _mr->is_equal(*other._mr)) { resize(other.size(), other.stream()); - copy(other.data(), other.size()); + copy_async(other.data(), other.size(), other.stream()); } else { // Otherwise, need to deallocate and allocate new memory - deallocate(); + deallocate_async(stream()); set_stream(other.stream()); _mr = other._mr; - allocate(other.size()); - copy(other.data(), other.size()); + allocate_async(other.size(), stream()); + copy_async(other.data(), other.size(), stream()); } } return *this; @@ -231,7 +231,7 @@ class device_buffer { device_buffer& operator=(device_buffer&& other) noexcept { if (&other != this) { - deallocate(); + deallocate_async(stream()); _data = other._data; _size = other._size; @@ -256,7 +256,7 @@ class device_buffer { */ ~device_buffer() noexcept { - deallocate(); + deallocate_async(stream()); _mr = nullptr; _stream = cuda_stream_view{}; } @@ -286,7 +286,7 @@ class device_buffer { * @param new_size The requested new size, in bytes * @param stream The stream to use for allocation and copy */ - void resize(std::size_t new_size, cuda_stream_view stream = cuda_stream_view{}) + void resize(std::size_t new_size, cuda_stream_view stream) { set_stream(stream); // If the requested size is smaller than the current capacity, just update @@ -297,7 +297,7 @@ class device_buffer { void* const new_data = _mr->allocate(new_size, this->stream()); RMM_CUDA_TRY( cudaMemcpyAsync(new_data, data(), size(), cudaMemcpyDefault, this->stream().value())); - deallocate(); + deallocate_async(stream); _data = new_data; _size = new_size; _capacity = new_size; @@ -317,7 +317,7 @@ class device_buffer { * * @param stream The stream on which the allocation and copy are performed */ - void shrink_to_fit(cuda_stream_view stream = cuda_stream_view{}) + void shrink_to_fit(cuda_stream_view stream) { set_stream(stream); if (size() != capacity()) { @@ -394,19 +394,18 @@ class device_buffer { ///< allocate/deallocate device memory /** - * @brief Allocates the specified amount of memory and updates the - * size/capacity accordingly. + * @brief Allocates the specified amount of memory and updates the size/capacity accordingly. * * If `bytes == 0`, sets `_data = nullptr`. * * @param bytes The amount of memory to allocate - * @param stream The stream on which to allocate + * @param stream The stream on which to allocate. Not synchronized. */ - void allocate(std::size_t bytes) + void allocate_async(std::size_t bytes, cuda_stream_view stream) { _size = bytes; _capacity = bytes; - _data = (bytes > 0) ? _mr->allocate(bytes, stream()) : nullptr; + _data = (bytes > 0) ? _mr->allocate(bytes, stream) : nullptr; } /** @@ -416,10 +415,11 @@ class device_buffer { * If the buffer doesn't hold any memory, i.e., `capacity() == 0`, doesn't * call the resource deallocation. * + * @param stream The stream on which to deallocate. Not synchronized. */ - void deallocate() noexcept + void deallocate_async(cuda_stream_view stream) noexcept { - if (capacity() > 0) { _mr->deallocate(data(), capacity(), stream()); } + if (capacity() > 0) { _mr->deallocate(data(), capacity(), stream); } _size = 0; _capacity = 0; _data = nullptr; @@ -436,13 +436,14 @@ class device_buffer { * * @param source The pointer to copy from * @param bytes The number of bytes to copy + * @param stream The stream on which to perform the copy. Not synchronized. */ - void copy(void const* source, std::size_t bytes) + void copy_async(void const* source, std::size_t bytes, cuda_stream_view stream) { if (bytes > 0) { RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr."); - RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value())); + RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream.value())); } } }; diff --git a/include/rmm/device_scalar.hpp b/include/rmm/device_scalar.hpp index c54fc7937..40a7b43bc 100644 --- a/include/rmm/device_scalar.hpp +++ b/include/rmm/device_scalar.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -314,7 +314,7 @@ class device_scalar { device_scalar &operator=(device_scalar &&) = delete; private: - rmm::device_buffer buffer{sizeof(T)}; + rmm::device_buffer buffer{sizeof(T), cuda_stream_default}; inline void _memcpy(void *dst, const void *src, cuda_stream_view stream) const { diff --git a/tests/cuda_stream_tests.cpp b/tests/cuda_stream_tests.cpp index 59ac07f3d..55e3185fe 100644 --- a/tests/cuda_stream_tests.cpp +++ b/tests/cuda_stream_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ TEST_F(CudaStreamTest, Equality) EXPECT_NE(view_a, rmm::cuda_stream()); EXPECT_NE(stream_a, rmm::cuda_stream()); - rmm::device_buffer buff(0); + rmm::device_buffer buff{}; EXPECT_EQ(buff.stream(), view_default); } diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 95ea23a93..28e0836c2 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -51,7 +53,7 @@ TYPED_TEST_CASE(DeviceBufferTest, resources); TYPED_TEST(DeviceBufferTest, DefaultMemoryResource) { - rmm::device_buffer buff(this->size); + rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}); EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); @@ -97,32 +99,36 @@ TYPED_TEST(DeviceBufferTest, CopyFromRawDevicePointer) { void *device_memory{nullptr}; EXPECT_EQ(cudaSuccess, cudaMalloc(&device_memory, this->size)); - rmm::device_buffer buff(device_memory, this->size); + rmm::device_buffer buff(device_memory, this->size, rmm::cuda_stream_view{}); EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); + // TODO check for equality between the contents of the two allocations + buff.stream().synchronize(); EXPECT_EQ(cudaSuccess, cudaFree(device_memory)); } TYPED_TEST(DeviceBufferTest, CopyFromRawHostPointer) { std::vector host_data(this->size); - rmm::device_buffer buff(static_cast(host_data.data()), this->size); + rmm::device_buffer buff( + static_cast(host_data.data()), this->size, rmm::cuda_stream_view{}); EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(this->size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); EXPECT_EQ(rmm::mr::get_current_device_resource(), buff.memory_resource()); EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); + buff.stream().synchronize(); // TODO check for equality between the contents of the two allocations } TYPED_TEST(DeviceBufferTest, CopyFromNullptr) { // can copy from a nullptr only if size == 0 - rmm::device_buffer buff(nullptr, 0); + rmm::device_buffer buff(nullptr, 0, rmm::cuda_stream_view{}); EXPECT_EQ(nullptr, buff.data()); EXPECT_EQ(0, buff.size()); EXPECT_EQ(0, buff.capacity()); @@ -133,7 +139,7 @@ TYPED_TEST(DeviceBufferTest, CopyFromNullptr) TYPED_TEST(DeviceBufferTest, CopyFromNullptrNonZero) { // can copy from a nullptr only if size == 0 - EXPECT_THROW(rmm::device_buffer buff(nullptr, 1), rmm::logic_error); + EXPECT_THROW(rmm::device_buffer buff(nullptr, 1, rmm::cuda_stream_view{}), rmm::logic_error); } TYPED_TEST(DeviceBufferTest, CopyConstructor) @@ -141,12 +147,12 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); // Initialize buffer - thrust::sequence(thrust::device, + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), static_cast(buff.data()) + buff.size(), 0); - rmm::device_buffer buff_copy(buff); // uses default stream and MR + rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default); // uses default MR EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); EXPECT_EQ(buff.size(), buff_copy.size()); @@ -155,7 +161,7 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_current_device_resource())); EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{}); - EXPECT_TRUE(thrust::equal(thrust::device, + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), static_cast(buff.data()), static_cast(buff.data()) + buff.size(), static_cast(buff_copy.data()))); @@ -166,25 +172,25 @@ TYPED_TEST(DeviceBufferTest, CopyConstructor) EXPECT_TRUE(buff_copy2.memory_resource()->is_equal(*buff.memory_resource())); EXPECT_EQ(buff_copy2.stream(), buff.stream()); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); // Resizing smaller to make `size()` < `capacity()` auto new_size = this->size - 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); - // Can't do this until RMM cmake is setup to build cuda files - // thrust::sequence(thrust::device, static_cast(buff.data()), - // static_cast(buffer.data()) + buff.size(), - // 0); - rmm::device_buffer buff_copy(buff); + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); + rmm::device_buffer buff_copy(buff, rmm::cuda_stream_default); EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); EXPECT_EQ(buff.size(), buff_copy.size()); @@ -195,19 +201,20 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSize) EXPECT_TRUE(buff_copy.memory_resource()->is_equal(*rmm::mr::get_current_device_resource())); EXPECT_EQ(buff_copy.stream(), rmm::cuda_stream_view{}); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); - // Can't do this until RMM cmake is setup to build cuda files - // thrust::sequence(thrust::device, static_cast(buff.data()), - // static_cast(buffer.data()) + buff.size(), - // 0); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); @@ -217,24 +224,24 @@ TYPED_TEST(DeviceBufferTest, CopyConstructorExplicitMr) EXPECT_TRUE(buff.memory_resource()->is_equal(*buff_copy.memory_resource())); EXPECT_NE(buff.stream(), buff_copy.stream()); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); // Resizing smaller to make `size()` < `capacity()` auto new_size = this->size - 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); - // Can't do this until RMM cmake is setup to build cuda files - // thrust::sequence(thrust::device, static_cast(buff.data()), - // static_cast(buffer.data()) + buff.size(), - // 0); + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); rmm::device_buffer buff_copy(buff, this->stream, &this->mr); EXPECT_NE(nullptr, buff_copy.data()); EXPECT_NE(buff.data(), buff_copy.data()); @@ -247,15 +254,21 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) EXPECT_TRUE(buff.memory_resource()->is_equal(*buff_copy.memory_resource())); EXPECT_NE(buff.stream(), buff_copy.stream()); - // EXPECT_TRUE( - // thrust::equal(thrust::device, static_cast(buff.data()), - // static_cast(buff.data()) + buff.size(), - // static_cast(buff_copy.data()))); + EXPECT_TRUE(thrust::equal(rmm::exec_policy(buff_copy.stream()), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(buff_copy.data()))); } TYPED_TEST(DeviceBufferTest, CopyAssignmentToDefault) { - rmm::device_buffer const from(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(from.data()), + static_cast(from.data()) + from.size(), + 0); + rmm::device_buffer to{}; EXPECT_NO_THROW(to = from); EXPECT_NE(nullptr, to.data()); @@ -265,13 +278,29 @@ TYPED_TEST(DeviceBufferTest, CopyAssignmentToDefault) EXPECT_EQ(from.capacity(), to.capacity()); EXPECT_EQ(from.stream(), to.stream()); EXPECT_EQ(from.memory_resource(), to.memory_resource()); - // TODO Check contents of memory + + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(from.data()), + static_cast(from.data()) + from.size(), + static_cast(to.data()))); } TYPED_TEST(DeviceBufferTest, CopyAssignment) { - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(from.data()), + static_cast(from.data()) + from.size(), + 0); + + rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(to.data()), + static_cast(to.data()) + to.size(), + 100); + EXPECT_NO_THROW(to = from); EXPECT_NE(nullptr, to.data()); EXPECT_NE(nullptr, from.data()); @@ -280,14 +309,31 @@ TYPED_TEST(DeviceBufferTest, CopyAssignment) EXPECT_EQ(from.capacity(), to.capacity()); EXPECT_EQ(from.stream(), to.stream()); EXPECT_EQ(from.memory_resource(), to.memory_resource()); - // TODO Check contents of memory + + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(to.data()), + static_cast(to.data()) + to.size(), + static_cast(from.data()))); } TYPED_TEST(DeviceBufferTest, CopyAssignmentCapacityLargerThanSize) { - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); - from.resize(from.size() - 1); - rmm::device_buffer to(42, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(from.data()), + static_cast(from.data()) + from.size(), + 0); + + from.resize(from.size() - 1, rmm::cuda_stream_default); + + rmm::device_buffer to(42, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(to.data()), + static_cast(to.data()) + to.size(), + 100); + EXPECT_NO_THROW(to = from); EXPECT_NE(nullptr, to.data()); EXPECT_NE(nullptr, from.data()); @@ -297,12 +343,16 @@ TYPED_TEST(DeviceBufferTest, CopyAssignmentCapacityLargerThanSize) to.capacity()); // copy doesn't copy the larger capacity EXPECT_EQ(from.stream(), to.stream()); EXPECT_EQ(from.memory_resource(), to.memory_resource()); - // TODO Check contents of memory + + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(to.data()), + static_cast(to.data()) + to.size(), + static_cast(from.data()))); } TYPED_TEST(DeviceBufferTest, SelfCopyAssignment) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto p = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); @@ -320,7 +370,7 @@ TYPED_TEST(DeviceBufferTest, SelfCopyAssignment) TYPED_TEST(DeviceBufferTest, MoveConstructor) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto p = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); @@ -340,7 +390,7 @@ TYPED_TEST(DeviceBufferTest, MoveConstructor) EXPECT_EQ(nullptr, buff.data()); EXPECT_EQ(0, buff.size()); EXPECT_EQ(0, buff.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, buff.stream()); + EXPECT_EQ(rmm::cuda_stream_default, buff.stream()); EXPECT_NE(nullptr, buff.memory_resource()); } @@ -374,7 +424,7 @@ TYPED_TEST(DeviceBufferTest, MoveConstructorStream) TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) { - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); auto p = from.data(); auto size = from.size(); auto capacity = from.capacity(); @@ -396,20 +446,20 @@ TYPED_TEST(DeviceBufferTest, MoveAssignmentToDefault) EXPECT_EQ(nullptr, from.data()); EXPECT_EQ(0, from.size()); EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, from.stream()); + EXPECT_EQ(rmm::cuda_stream_default, from.stream()); EXPECT_NE(nullptr, from.memory_resource()); } TYPED_TEST(DeviceBufferTest, MoveAssignment) { - rmm::device_buffer from(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); auto p = from.data(); auto size = from.size(); auto capacity = from.capacity(); auto mr = from.memory_resource(); auto stream = from.stream(); - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr); EXPECT_NO_THROW(to = std::move(from)); // contents of `from` should be in `to` @@ -424,13 +474,13 @@ TYPED_TEST(DeviceBufferTest, MoveAssignment) EXPECT_EQ(nullptr, from.data()); EXPECT_EQ(0, from.size()); EXPECT_EQ(0, from.capacity()); - EXPECT_EQ(rmm::cuda_stream_view{}, from.stream()); + EXPECT_EQ(rmm::cuda_stream_default, from.stream()); EXPECT_NE(nullptr, from.memory_resource()); } TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto p = buff.data(); auto size = buff.size(); auto capacity = buff.capacity(); @@ -448,31 +498,43 @@ TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) TYPED_TEST(DeviceBufferTest, ResizeSmaller) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); + + thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + 0); + auto old_data = buff.data(); + rmm::device_buffer old_content( + old_data, buff.size(), rmm::cuda_stream_default, &this->mr); // for comparison + auto new_size = this->size - 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(this->size, buff.capacity()); // Capacity should be unchanged // Resizing smaller means the existing allocation should remain unchanged EXPECT_EQ(old_data, buff.data()); - EXPECT_NO_THROW(buff.shrink_to_fit()); + EXPECT_NO_THROW(buff.shrink_to_fit(rmm::cuda_stream_default)); EXPECT_NE(nullptr, buff.data()); // A reallocation should have occured EXPECT_NE(old_data, buff.data()); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(buff.capacity(), buff.size()); - // TODO Verify device memory contents are equal + EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), + static_cast(buff.data()), + static_cast(buff.data()) + buff.size(), + static_cast(old_content.data()))); } TYPED_TEST(DeviceBufferTest, ResizeBigger) { - rmm::device_buffer buff(this->size, rmm::cuda_stream_view{}, &this->mr); + rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); auto old_data = buff.data(); auto new_size = this->size + 1; - buff.resize(new_size); + buff.resize(new_size, rmm::cuda_stream_default); EXPECT_EQ(new_size, buff.size()); EXPECT_EQ(new_size, buff.capacity()); // Resizing bigger means the data should point to a new allocation From d7ef4bceec3cb390f1dc3b4dc0474f1df9549913 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 18 May 2021 13:43:44 +1000 Subject: [PATCH 2/7] Delete device_buffer copy constructor and copy assignment operator --- include/rmm/device_buffer.hpp | 46 +++++------------------------------ 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index 61457274c..0afc8f6e6 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -72,6 +72,11 @@ namespace rmm { */ class device_buffer { public: + // The copy constructor and copy assignment operator without a stream are deleted because they + // provide no way to specify an explicit stream + device_buffer(device_buffer const& other) = delete; + device_buffer& operator=(device_buffer const& other) = delete; + /** * @brief Default constructor creates an empty `device_buffer` */ @@ -175,46 +180,7 @@ class device_buffer { other.set_stream(cuda_stream_view{}); } - /** - * @brief Copies the contents of `other` into this `device_buffer`. - * - * All operations on the data in this `device_buffer` on all streams must be - * complete before using this operator, otherwise behavior is undefined. - * - * If the existing capacity is large enough, and the memory resources are - * compatible, then this `device_buffer`'s existing memory will be reused and - * `other`s contents will simply be copied on `other.stream()`. I.e., if - * `capcity() > other.size()` and - * `memory_resource()->is_equal(*other.memory_resource())`. - * - * Otherwise, the existing memory will be deallocated using - * `memory_resource()` on `stream()` and new memory will be allocated using - * `other.memory_resource()` on `other.stream()`. - * - * @throws rmm::bad_alloc if allocation fails - * @throws rmm::cuda_error if the copy from `other` fails - * - * @param other The `device_buffer` to copy. - */ - device_buffer& operator=(device_buffer const& other) - { - if (&other != this) { - // If the current capacity is large enough and the resources are - // compatible, just reuse the existing memory - if ((capacity() > other.size()) and _mr->is_equal(*other._mr)) { - resize(other.size(), other.stream()); - copy_async(other.data(), other.size(), other.stream()); - } else { - // Otherwise, need to deallocate and allocate new memory - deallocate_async(stream()); - set_stream(other.stream()); - _mr = other._mr; - allocate_async(other.size(), stream()); - copy_async(other.data(), other.size(), stream()); - } - } - return *this; - } + device_buffer& operator=(device_buffer const& other) = delete; /** * @brief Move assignment operator moves the contents from `other`. From ac74cc4eea00eaaab3c18cebae7283b4688882c3 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Tue, 18 May 2021 13:50:20 +1000 Subject: [PATCH 3/7] Remove copy assignment tests --- include/rmm/device_buffer.hpp | 2 - tests/device_buffer_tests.cu | 110 +--------------------------------- 2 files changed, 1 insertion(+), 111 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index 0afc8f6e6..b1d82c04c 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -180,8 +180,6 @@ class device_buffer { other.set_stream(cuda_stream_view{}); } - device_buffer& operator=(device_buffer const& other) = delete; - /** * @brief Move assignment operator moves the contents from `other`. * diff --git a/tests/device_buffer_tests.cu b/tests/device_buffer_tests.cu index 28e0836c2..448c9259d 100644 --- a/tests/device_buffer_tests.cu +++ b/tests/device_buffer_tests.cu @@ -260,114 +260,6 @@ TYPED_TEST(DeviceBufferTest, CopyCapacityLargerThanSizeExplicitMr) static_cast(buff_copy.data()))); } -TYPED_TEST(DeviceBufferTest, CopyAssignmentToDefault) -{ - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - - thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(from.data()), - static_cast(from.data()) + from.size(), - 0); - - rmm::device_buffer to{}; - EXPECT_NO_THROW(to = from); - EXPECT_NE(nullptr, to.data()); - EXPECT_NE(nullptr, from.data()); - EXPECT_NE(from.data(), to.data()); - EXPECT_EQ(from.size(), to.size()); - EXPECT_EQ(from.capacity(), to.capacity()); - EXPECT_EQ(from.stream(), to.stream()); - EXPECT_EQ(from.memory_resource(), to.memory_resource()); - - EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(from.data()), - static_cast(from.data()) + from.size(), - static_cast(to.data()))); -} - -TYPED_TEST(DeviceBufferTest, CopyAssignment) -{ - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - - thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(from.data()), - static_cast(from.data()) + from.size(), - 0); - - rmm::device_buffer to(this->size - 1, rmm::cuda_stream_default, &this->mr); - - thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(to.data()), - static_cast(to.data()) + to.size(), - 100); - - EXPECT_NO_THROW(to = from); - EXPECT_NE(nullptr, to.data()); - EXPECT_NE(nullptr, from.data()); - EXPECT_NE(from.data(), to.data()); - EXPECT_EQ(from.size(), to.size()); - EXPECT_EQ(from.capacity(), to.capacity()); - EXPECT_EQ(from.stream(), to.stream()); - EXPECT_EQ(from.memory_resource(), to.memory_resource()); - - EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(to.data()), - static_cast(to.data()) + to.size(), - static_cast(from.data()))); -} - -TYPED_TEST(DeviceBufferTest, CopyAssignmentCapacityLargerThanSize) -{ - rmm::device_buffer from(this->size, rmm::cuda_stream_default, &this->mr); - - thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(from.data()), - static_cast(from.data()) + from.size(), - 0); - - from.resize(from.size() - 1, rmm::cuda_stream_default); - - rmm::device_buffer to(42, rmm::cuda_stream_default, &this->mr); - - thrust::sequence(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(to.data()), - static_cast(to.data()) + to.size(), - 100); - - EXPECT_NO_THROW(to = from); - EXPECT_NE(nullptr, to.data()); - EXPECT_NE(nullptr, from.data()); - EXPECT_NE(from.data(), to.data()); - EXPECT_EQ(from.size(), to.size()); - EXPECT_NE(from.capacity(), - to.capacity()); // copy doesn't copy the larger capacity - EXPECT_EQ(from.stream(), to.stream()); - EXPECT_EQ(from.memory_resource(), to.memory_resource()); - - EXPECT_TRUE(thrust::equal(rmm::exec_policy(rmm::cuda_stream_default), - static_cast(to.data()), - static_cast(to.data()) + to.size(), - static_cast(from.data()))); -} - -TYPED_TEST(DeviceBufferTest, SelfCopyAssignment) -{ - rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); - auto p = buff.data(); - auto size = buff.size(); - auto capacity = buff.capacity(); - auto mr = buff.memory_resource(); - auto stream = buff.stream(); - - buff = buff; // self-assignment shouldn't modify the buffer - EXPECT_NE(nullptr, buff.data()); - EXPECT_EQ(p, buff.data()); - EXPECT_EQ(size, buff.size()); - EXPECT_EQ(capacity, buff.capacity()); - EXPECT_EQ(stream, buff.stream()); - EXPECT_EQ(mr, buff.memory_resource()); -} - TYPED_TEST(DeviceBufferTest, MoveConstructor) { rmm::device_buffer buff(this->size, rmm::cuda_stream_default, &this->mr); @@ -487,7 +379,7 @@ TYPED_TEST(DeviceBufferTest, SelfMoveAssignment) auto mr = buff.memory_resource(); auto stream = buff.stream(); - buff = buff; // self-assignment shouldn't modify the buffer + buff = std::move(buff); // self-move-assignment shouldn't modify the buffer EXPECT_NE(nullptr, buff.data()); EXPECT_EQ(p, buff.data()); EXPECT_EQ(size, buff.size()); From 688a93957b3e37d8cb128f7c8ebc5c7b4658a362 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 19 May 2021 12:02:58 +1000 Subject: [PATCH 4/7] A bit more consistency and a move example --- include/rmm/device_buffer.hpp | 38 +++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index b1d82c04c..69ee6b4e1 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -50,8 +50,12 @@ namespace rmm { * cuda_stream_view stream = cuda_stream_view{}; * device_buffer custom_buff(100, stream, &mr); * - * // deep copies `buff` into a new device buffer using the default stream - * device_buffer buff_copy(buff); + * // deep copies `buff` into a new device buffer using the specified stream + * device_buffer buff_copy(buff, stream); + * + * // moves the memory in `from_buff` to `to_buff`. Deallocates previously allocated + * // to_buff memory on `to_buff.stream()`. + * device_buffer to_buff(std::move(from_buff)); * * // deep copies `buff` into a new device buffer using the specified stream * device_buffer buff_copy(buff, stream); @@ -104,7 +108,7 @@ class device_buffer { mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { - allocate_async(size, stream); + allocate_async(size); } /** @@ -127,8 +131,8 @@ class device_buffer { mr::device_memory_resource* mr = mr::get_current_device_resource()) : _stream{stream}, _mr{mr} { - allocate_async(size, stream); - copy_async(source_data, size, stream); + allocate_async(size); + copy_async(source_data, size); } /** @@ -195,7 +199,7 @@ class device_buffer { device_buffer& operator=(device_buffer&& other) noexcept { if (&other != this) { - deallocate_async(stream()); + deallocate_async(); _data = other._data; _size = other._size; @@ -220,7 +224,7 @@ class device_buffer { */ ~device_buffer() noexcept { - deallocate_async(stream()); + deallocate_async(); _mr = nullptr; _stream = cuda_stream_view{}; } @@ -261,7 +265,7 @@ class device_buffer { void* const new_data = _mr->allocate(new_size, this->stream()); RMM_CUDA_TRY( cudaMemcpyAsync(new_data, data(), size(), cudaMemcpyDefault, this->stream().value())); - deallocate_async(stream); + deallocate_async(); _data = new_data; _size = new_size; _capacity = new_size; @@ -360,16 +364,17 @@ class device_buffer { /** * @brief Allocates the specified amount of memory and updates the size/capacity accordingly. * + * Allocates on `stream()` using the memory resource passed to the constructor. + * * If `bytes == 0`, sets `_data = nullptr`. * * @param bytes The amount of memory to allocate - * @param stream The stream on which to allocate. Not synchronized. */ - void allocate_async(std::size_t bytes, cuda_stream_view stream) + void allocate_async(std::size_t bytes) { _size = bytes; _capacity = bytes; - _data = (bytes > 0) ? _mr->allocate(bytes, stream) : nullptr; + _data = (bytes > 0) ? memory_resource()->allocate(bytes, stream()) : nullptr; } /** @@ -379,11 +384,11 @@ class device_buffer { * If the buffer doesn't hold any memory, i.e., `capacity() == 0`, doesn't * call the resource deallocation. * - * @param stream The stream on which to deallocate. Not synchronized. + * Deallocates on `stream()` using the memory resource passed to the constructor. */ - void deallocate_async(cuda_stream_view stream) noexcept + void deallocate_async() noexcept { - if (capacity() > 0) { _mr->deallocate(data(), capacity(), stream); } + if (capacity() > 0) { memory_resource()->deallocate(data(), capacity(), stream()); } _size = 0; _capacity = 0; _data = nullptr; @@ -400,14 +405,13 @@ class device_buffer { * * @param source The pointer to copy from * @param bytes The number of bytes to copy - * @param stream The stream on which to perform the copy. Not synchronized. */ - void copy_async(void const* source, std::size_t bytes, cuda_stream_view stream) + void copy_async(void const* source, std::size_t bytes) { if (bytes > 0) { RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr."); - RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream.value())); + RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value())); } } }; From 314c94e78c302101b0c11c4eb3cd28edcf1f876b Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 19 May 2021 13:38:19 +1000 Subject: [PATCH 5/7] Improve comment --- include/rmm/device_buffer.hpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/include/rmm/device_buffer.hpp b/include/rmm/device_buffer.hpp index 69ee6b4e1..300f62ae6 100644 --- a/include/rmm/device_buffer.hpp +++ b/include/rmm/device_buffer.hpp @@ -66,11 +66,9 @@ namespace rmm { * // Default construction. Buffer is empty * device_buffer buff_default{}; * - * // If the requested size is larger than the current size, resizes allocation - * // to the new size and deep copies any previous contents. Otherwise, simply - * // updates the value of `size()` to the newly requested size without any - * // allocations or copies. Uses the optionally specified stream or the default - * // stream if none specified. + * // If the requested size is larger than the current size, resizes allocation to the new size and + * // deep copies any previous contents. Otherwise, simply updates the value of `size()` to the + * // newly requested size without any allocations or copies. Uses the specified stream. * buff_default.resize(100, stream); *``` */ From df69121f175cda2f0b5e6a35dffc541d7ed61dda Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Wed, 19 May 2021 17:49:46 +1000 Subject: [PATCH 6/7] Fix Cython --- python/rmm/_lib/device_buffer.pxd | 9 +++------ python/rmm/_lib/device_buffer.pyx | 4 ++-- python/rmm/_lib/tests/test_device_buffer.pyx | 5 ++++- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pxd b/python/rmm/_lib/device_buffer.pxd index 3b6b4face..635b1ed8a 100644 --- a/python/rmm/_lib/device_buffer.pxd +++ b/python/rmm/_lib/device_buffer.pxd @@ -23,14 +23,11 @@ from rmm._lib.memory_resource cimport DeviceMemoryResource cdef extern from "rmm/device_buffer.hpp" namespace "rmm" nogil: cdef cppclass device_buffer: device_buffer() - device_buffer(size_t size) except + device_buffer(size_t size, cuda_stream_view stream) except + - device_buffer(const void* source_data, size_t size) except + device_buffer(const void* source_data, size_t size, cuda_stream_view stream) except + - device_buffer(const device_buffer& other) except + - void resize(size_t new_size) except + - void shrink_to_fit() except + + void resize(size_t new_size, cuda_stream_view stream) except + + void shrink_to_fit(cuda_stream_view stream) except + void* data() size_t size() size_t capacity() @@ -60,7 +57,7 @@ cdef class DeviceBuffer: cpdef bytes tobytes(self, Stream stream=*) cdef size_t c_size(self) except * - cpdef void resize(self, size_t new_size) except * + cpdef void resize(self, size_t new_size, Stream stream=*) except * cpdef size_t capacity(self) except * cdef void* c_data(self) except * diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 3401b4802..5b9a42960 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -284,8 +284,8 @@ cdef class DeviceBuffer: cdef size_t c_size(self) except *: return self.c_obj.get()[0].size() - cpdef void resize(self, size_t new_size) except *: - self.c_obj.get()[0].resize(new_size) + cpdef void resize(self, size_t new_size, Stream stream=DEFAULT_STREAM) except *: + self.c_obj.get()[0].resize(new_size, stream.view()) cpdef size_t capacity(self) except *: return self.c_obj.get()[0].capacity() diff --git a/python/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/_lib/tests/test_device_buffer.pyx index d346e8ed0..372b92a24 100644 --- a/python/rmm/_lib/tests/test_device_buffer.pyx +++ b/python/rmm/_lib/tests/test_device_buffer.pyx @@ -18,14 +18,17 @@ import numpy as np from libcpp.memory cimport make_unique from libcpp.utility cimport move +from rmm._lib.cuda_stream_view cimport cuda_stream_default + from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer def test_release(): expect = DeviceBuffer.to_device(b'abc') cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') + got = DeviceBuffer.c_from_unique_ptr( - make_unique[device_buffer](buf.c_release()) + make_unique[device_buffer](buf.c_release(), cuda_stream_default.value()) ) np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host()) From 77f8f695539516a92f88395e9db72a3937b2b303 Mon Sep 17 00:00:00 2001 From: Mark Harris Date: Thu, 20 May 2021 09:01:58 +1000 Subject: [PATCH 7/7] Python style --- python/rmm/_lib/device_buffer.pyx | 4 +++- python/rmm/_lib/tests/test_device_buffer.pyx | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python/rmm/_lib/device_buffer.pyx b/python/rmm/_lib/device_buffer.pyx index 5b9a42960..cbe0bdb33 100644 --- a/python/rmm/_lib/device_buffer.pyx +++ b/python/rmm/_lib/device_buffer.pyx @@ -284,7 +284,9 @@ cdef class DeviceBuffer: cdef size_t c_size(self) except *: return self.c_obj.get()[0].size() - cpdef void resize(self, size_t new_size, Stream stream=DEFAULT_STREAM) except *: + cpdef void resize(self, + size_t new_size, + Stream stream=DEFAULT_STREAM) except *: self.c_obj.get()[0].resize(new_size, stream.view()) cpdef size_t capacity(self) except *: diff --git a/python/rmm/_lib/tests/test_device_buffer.pyx b/python/rmm/_lib/tests/test_device_buffer.pyx index 372b92a24..c4f5e5ae3 100644 --- a/python/rmm/_lib/tests/test_device_buffer.pyx +++ b/python/rmm/_lib/tests/test_device_buffer.pyx @@ -19,7 +19,6 @@ from libcpp.memory cimport make_unique from libcpp.utility cimport move from rmm._lib.cuda_stream_view cimport cuda_stream_default - from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer @@ -28,7 +27,8 @@ def test_release(): cdef DeviceBuffer buf = DeviceBuffer.to_device(b'abc') got = DeviceBuffer.c_from_unique_ptr( - make_unique[device_buffer](buf.c_release(), cuda_stream_default.value()) + make_unique[device_buffer](buf.c_release(), + cuda_stream_default.value()) ) np.testing.assert_equal(expect.copy_to_host(), got.copy_to_host())