Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make device_buffer streams explicit and enforce move construction #8280

Merged
2 changes: 1 addition & 1 deletion cpp/benchmarks/iterator/iterator_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite
nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init);

// Allocate temporary storage
rmm::device_buffer d_temp_storage(temp_storage_bytes);
rmm::device_buffer d_temp_storage(temp_storage_bytes, rmm::cuda_stream_default);

// Run reduction
cub::DeviceReduce::Reduce(
Expand Down
9 changes: 6 additions & 3 deletions cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "../fixture/benchmark_fixture.hpp"
#include "../synchronization/synchronization.hpp"
#include "rmm/device_buffer.hpp"
harrism marked this conversation as resolved.
Show resolved Hide resolved

#include <cudf_test/column_wrapper.hpp>

Expand Down Expand Up @@ -186,10 +187,12 @@ void type_dispatcher_benchmark(::benchmark::State& state)
cudf::mutable_table_view source_table{source_columns};

// For no dispatching
std::vector<rmm::device_buffer> h_vec(n_cols,
rmm::device_buffer(source_size * sizeof(TypeParam)));
std::vector<rmm::device_buffer> h_vec(n_cols);
std::vector<TypeParam*> h_vec_p(n_cols);
for (int c = 0; c < n_cols; c++) { h_vec_p[c] = static_cast<TypeParam*>(h_vec[c].data()); }
std::transform(h_vec.begin(), h_vec.end(), h_vec_p.begin(), [source_size](auto& col) {
col.resize(source_size * sizeof(TypeParam), rmm::cuda_stream_default);
return static_cast<TypeParam*>(col.data());
});
rmm::device_uvector<TypeParam*> d_vec(n_cols, rmm::cuda_stream_default);

if (dispatching_type == NO_DISPATCHING) {
Expand Down
16 changes: 10 additions & 6 deletions cpp/docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -414,21 +414,25 @@ Allocates a specified number of bytes of untyped, uninitialized device memory us
`device_memory_resource`. If no resource is explicitly provided, uses
`rmm::mr::get_current_device_resource()`.

`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s
device memory, whereas a move moves ownership of the device memory from one `device_buffer` to
another.
`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the
`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the
device memory from one `device_buffer` to another.

```c++
// Allocates at least 100 bytes of uninitialized device memory
// using the specified resource and stream
rmm::device_buffer buff(100, stream, mr);
void * raw_data = buff.data(); // Raw pointer to underlying device memory

rmm::device_buffer copy(buff); // Deep copies `buff` into `copy`
rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to`
// Deep copies `buff` into `copy` on `stream`
rmm::device_buffer copy(buff, stream);

// Moves contents of `buff` into `moved_to`
rmm::device_buffer moved_to(std::move(buff));

custom_memory_resource *mr...;
rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource
// Allocates 100 bytes from the custom_memory_resource
rmm::device_buffer custom_buff(100, mr, stream);
```

#### `rmm::device_scalar<T>`
Expand Down
34 changes: 15 additions & 19 deletions cpp/include/cudf/column/column.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,13 +49,6 @@ class column {
column& operator=(column const& other) = delete;
column& operator=(column&& other) = delete;

/**
* @brief Construct a new column by deep copying the contents of `other`.
*
* @param other The column to copy
*/
column(column const& other);

/**
* @brief Construct a new column object by deep copying the contents of
*`other`.
Expand All @@ -68,7 +61,7 @@ class column {
* @param mr Device memory resource to use for all device memory allocations
*/
column(column const& other,
rmm::cuda_stream_view stream,
rmm::cuda_stream_view stream = rmm::cuda_stream_view{},
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -165,18 +158,21 @@ class column {
/**
* @brief Sets the column's null value indicator bitmask to `new_null_mask`.
*
* @throws cudf::logic_error if new_null_count is larger than 0 and the size
* of `new_null_mask` does not match the size of this column.
*
* @param new_null_mask New null value indicator bitmask (lvalue overload &
* copied) to set the column's null value indicator mask. May be empty if
* `new_null_count` is 0 or `UNKOWN_NULL_COUNT`.
* @param new_null_count Optional, the count of null elements. If unknown,
* specify `UNKNOWN_NULL_COUNT` to indicate that the null count should be
* computed on the first invocation of `null_count()`.
* @throws cudf::logic_error if new_null_count is larger than 0 and the size of `new_null_mask`
* does not match the size of this column.
*
* @param new_null_mask New null value indicator bitmask (lvalue overload & copied) to set the
* column's null value indicator mask. May be empty if `new_null_count` is 0 or
* `UNKOWN_NULL_COUNT`.
* @param new_null_count Optional, the count of null elements. If unknown, specify
* `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on the first invocation
* of `null_count()`.
* @param stream The stream on which to perform the allocation and copy. Uses the default CUDA
* stream if none is specified.
*/
void set_null_mask(rmm::device_buffer const& new_null_mask,
size_type new_null_count = UNKNOWN_NULL_COUNT);
size_type new_null_count = UNKNOWN_NULL_COUNT,
rmm::cuda_stream_view stream = rmm::cuda_stream_view{});

/**
* @brief Updates the count of null elements.
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ struct list_child_constructor {
auto const num_child_rows{
cudf::detail::get_value<size_type>(list_offsets, list_offsets.size() - 1, stream)};

auto const child_null_mask =
auto child_null_mask =
source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
? construct_child_nullmask(
list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
Expand All @@ -354,7 +354,7 @@ struct list_child_constructor {

auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(),
num_child_rows,
child_null_mask.first,
std::move(child_null_mask.first),
child_null_mask.second,
stream,
mr);
Expand Down Expand Up @@ -652,7 +652,7 @@ struct list_child_constructor {
std::make_unique<column>(structs_list_offsets, stream, mr),
std::make_unique<column>(structs_member, stream, mr),
structs_list_null_count,
rmm::device_buffer(structs_list_nullmask),
rmm::device_buffer(structs_list_nullmask, stream),
stream,
mr);
};
Expand Down
Loading