Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit streams in device_buffer #775

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 32 additions & 63 deletions include/rmm/device_buffer.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -50,8 +50,12 @@ namespace rmm {
* cuda_stream_view stream = cuda_stream_view{};
* device_buffer custom_buff(100, stream, &mr);
*
* // deep copies `buff` into a new device buffer using the default stream
* device_buffer buff_copy(buff);
* // deep copies `buff` into a new device buffer using the specified stream
* device_buffer buff_copy(buff, stream);
*
* // moves the memory in `from_buff` to `to_buff`. Deallocates previously allocated
* // to_buff memory on `to_buff.stream()`.
* device_buffer to_buff(std::move(from_buff));
*
* // deep copies `buff` into a new device buffer using the specified stream
* device_buffer buff_copy(buff, stream);
Expand All @@ -72,6 +76,11 @@ namespace rmm {
*/
class device_buffer {
public:
// The copy constructor and copy assignment operator without a stream are deleted because they
// provide no way to specify an explicit stream
device_buffer(device_buffer const& other) = delete;
device_buffer& operator=(device_buffer const& other) = delete;

/**
* @brief Default constructor creates an empty `device_buffer`
*/
Expand All @@ -95,11 +104,11 @@ class device_buffer {
* @param mr Memory resource to use for the device memory allocation.
*/
explicit device_buffer(std::size_t size,
cuda_stream_view stream = cuda_stream_view{},
cuda_stream_view stream,
mr::device_memory_resource* mr = mr::get_current_device_resource())
: _stream{stream}, _mr{mr}
{
allocate(size);
allocate_async(size);
}

/**
Expand All @@ -118,12 +127,12 @@ class device_buffer {
*/
device_buffer(void const* source_data,
std::size_t size,
cuda_stream_view stream = cuda_stream_view{},
cuda_stream_view stream,
mr::device_memory_resource* mr = mr::get_current_device_resource())
: _stream{stream}, _mr{mr}
{
allocate(size);
copy(source_data, size);
allocate_async(size);
copy_async(source_data, size);
}

/**
Expand All @@ -143,7 +152,7 @@ class device_buffer {
* @param mr The resource to use for allocating the new `device_buffer`
*/
device_buffer(device_buffer const& other,
cuda_stream_view stream = cuda_stream_view{},
cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
: device_buffer{other.data(), other.size(), stream, mr}
{
Expand Down Expand Up @@ -175,47 +184,6 @@ class device_buffer {
other.set_stream(cuda_stream_view{});
}

/**
* @brief Copies the contents of `other` into this `device_buffer`.
*
* All operations on the data in this `device_buffer` on all streams must be
* complete before using this operator, otherwise behavior is undefined.
*
* If the existing capacity is large enough, and the memory resources are
* compatible, then this `device_buffer`'s existing memory will be reused and
* `other`s contents will simply be copied on `other.stream()`. I.e., if
* `capcity() > other.size()` and
* `memory_resource()->is_equal(*other.memory_resource())`.
*
* Otherwise, the existing memory will be deallocated using
* `memory_resource()` on `stream()` and new memory will be allocated using
* `other.memory_resource()` on `other.stream()`.
*
* @throws rmm::bad_alloc if allocation fails
* @throws rmm::cuda_error if the copy from `other` fails
*
* @param other The `device_buffer` to copy.
*/
device_buffer& operator=(device_buffer const& other)
{
if (&other != this) {
// If the current capacity is large enough and the resources are
// compatible, just reuse the existing memory
if ((capacity() > other.size()) and _mr->is_equal(*other._mr)) {
resize(other.size(), other.stream());
copy(other.data(), other.size());
} else {
// Otherwise, need to deallocate and allocate new memory
deallocate();
set_stream(other.stream());
_mr = other._mr;
allocate(other.size());
copy(other.data(), other.size());
}
}
return *this;
}

/**
* @brief Move assignment operator moves the contents from `other`.
*
Expand All @@ -231,7 +199,7 @@ class device_buffer {
device_buffer& operator=(device_buffer&& other) noexcept
{
if (&other != this) {
deallocate();
deallocate_async();

_data = other._data;
_size = other._size;
Expand All @@ -256,7 +224,7 @@ class device_buffer {
*/
~device_buffer() noexcept
{
deallocate();
deallocate_async();
_mr = nullptr;
_stream = cuda_stream_view{};
}
Expand Down Expand Up @@ -286,7 +254,7 @@ class device_buffer {
* @param new_size The requested new size, in bytes
* @param stream The stream to use for allocation and copy
*/
void resize(std::size_t new_size, cuda_stream_view stream = cuda_stream_view{})
void resize(std::size_t new_size, cuda_stream_view stream)
{
set_stream(stream);
// If the requested size is smaller than the current capacity, just update
Expand All @@ -297,7 +265,7 @@ class device_buffer {
void* const new_data = _mr->allocate(new_size, this->stream());
RMM_CUDA_TRY(
cudaMemcpyAsync(new_data, data(), size(), cudaMemcpyDefault, this->stream().value()));
deallocate();
deallocate_async();
_data = new_data;
_size = new_size;
_capacity = new_size;
Expand All @@ -317,7 +285,7 @@ class device_buffer {
*
* @param stream The stream on which the allocation and copy are performed
*/
void shrink_to_fit(cuda_stream_view stream = cuda_stream_view{})
void shrink_to_fit(cuda_stream_view stream)
{
set_stream(stream);
if (size() != capacity()) {
Expand Down Expand Up @@ -394,19 +362,19 @@ class device_buffer {
///< allocate/deallocate device memory

/**
* @brief Allocates the specified amount of memory and updates the
* size/capacity accordingly.
* @brief Allocates the specified amount of memory and updates the size/capacity accordingly.
*
* Allocates on `stream()` using the memory resource passed to the constructor.
*
* If `bytes == 0`, sets `_data = nullptr`.
*
* @param bytes The amount of memory to allocate
* @param stream The stream on which to allocate
*/
void allocate(std::size_t bytes)
void allocate_async(std::size_t bytes)
{
_size = bytes;
_capacity = bytes;
_data = (bytes > 0) ? _mr->allocate(bytes, stream()) : nullptr;
_data = (bytes > 0) ? memory_resource()->allocate(bytes, stream()) : nullptr;
}

/**
Expand All @@ -416,10 +384,11 @@ class device_buffer {
* If the buffer doesn't hold any memory, i.e., `capacity() == 0`, doesn't
* call the resource deallocation.
*
* Deallocates on `stream()` using the memory resource passed to the constructor.
*/
void deallocate() noexcept
void deallocate_async() noexcept
{
if (capacity() > 0) { _mr->deallocate(data(), capacity(), stream()); }
if (capacity() > 0) { memory_resource()->deallocate(data(), capacity(), stream()); }
_size = 0;
_capacity = 0;
_data = nullptr;
Expand All @@ -437,7 +406,7 @@ class device_buffer {
* @param source The pointer to copy from
* @param bytes The number of bytes to copy
*/
void copy(void const* source, std::size_t bytes)
void copy_async(void const* source, std::size_t bytes)
{
if (bytes > 0) {
RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
Expand Down
4 changes: 2 additions & 2 deletions include/rmm/device_scalar.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -314,7 +314,7 @@ class device_scalar {
device_scalar &operator=(device_scalar &&) = delete;

private:
rmm::device_buffer buffer{sizeof(T)};
rmm::device_buffer buffer{sizeof(T), cuda_stream_default};

inline void _memcpy(void *dst, const void *src, cuda_stream_view stream) const
{
Expand Down
4 changes: 2 additions & 2 deletions tests/cuda_stream_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,7 +38,7 @@ TEST_F(CudaStreamTest, Equality)
EXPECT_NE(view_a, rmm::cuda_stream());
EXPECT_NE(stream_a, rmm::cuda_stream());

rmm::device_buffer buff(0);
rmm::device_buffer buff{};
EXPECT_EQ(buff.stream(), view_default);
}

Expand Down
Loading