rapidsai · rapids-bot · Mar 23, 2022 · Mar 15, 2022 · Mar 16, 2022 · Mar 16, 2022
@@ -48,7 +48,7 @@ __global__ void valid_if_kernel(
 {
   constexpr size_type leader_lane{0};
   auto const lane_id{threadIdx.x % warp_size};
-  size_type i = threadIdx.x + blockIdx.x * blockDim.x;
+  std::size_t i = threadIdx.x + blockIdx.x * blockDim.x;
   size_type warp_valid_count{0};
 
   auto active_mask = __ballot_sync(0xFFFF'FFFF, i < size);

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -47,7 +47,7 @@ __global__ void marking_bitmask_kernel(mutable_column_device_view destination,
                                        MapIterator scatter_map,
                                        size_type num_scatter_rows)
 {
-  size_type row = threadIdx.x + blockIdx.x * blockDim.x;
+  std::size_t row = threadIdx.x + blockIdx.x * blockDim.x;
 
   while (row < num_scatter_rows) {
     size_type const output_row = scatter_map[row];
@@ -351,8 +351,13 @@ std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>>
 
   // Transform negative indices to index + target size
   auto scatter_rows = indices.size();
+  // note: the intermediate ((in % n_rows) + n_rows) will overflow a size_type for any value of `in`
+  // > (2^31)/2, but the end result after the final (% n_rows) will fit. so we'll do the computation
+  // using a signed 64 bit value.
   auto scatter_iter = thrust::make_transform_iterator(
-    map_begin, [n_rows] __device__(size_type in) { return ((in % n_rows) + n_rows) % n_rows; });
+    map_begin, [n_rows = static_cast<int64_t>(n_rows)] __device__(size_type in) -> size_type {
+      return ((static_cast<int64_t>(in) % n_rows) + n_rows) % n_rows;
+    });
 
   // Dispatch over data type per column
   auto result          = std::vector<std::unique_ptr<column>>(target.num_columns());

@@ -63,7 +63,7 @@ __global__ void replace_nulls_strings(cudf::column_device_view input,
                                       cudf::size_type* valid_counter)
 {
   cudf::size_type nrows = input.size();
-  cudf::size_type i     = blockIdx.x * blockDim.x + threadIdx.x;
+  std::size_t i         = blockIdx.x * blockDim.x + threadIdx.x;
 
   uint32_t active_mask = 0xffffffff;
   active_mask          = __ballot_sync(active_mask, i < nrows);
@@ -115,7 +115,7 @@ __global__ void replace_nulls(cudf::column_device_view input,
                               cudf::size_type* output_valid_count)
 {
   cudf::size_type nrows = input.size();
-  cudf::size_type i     = blockIdx.x * blockDim.x + threadIdx.x;
+  std::size_t i         = blockIdx.x * blockDim.x + threadIdx.x;
 
   uint32_t active_mask = 0xffffffff;
   active_mask          = __ballot_sync(active_mask, i < nrows);
@@ -247,6 +247,7 @@ std::unique_ptr<cudf::column> replace_nulls_column_kernel_forwarder::operator()<
 
   std::unique_ptr<cudf::column> offsets = cudf::strings::detail::make_offsets_child_column(
     sizes_view.begin<int32_t>(), sizes_view.end<int32_t>(), stream, mr);
+
   auto offsets_view = offsets->mutable_view();
 
   auto const bytes =

@@ -51,7 +51,7 @@ __global__ void gpu_rolling_new(cudf::size_type nrows,
                                 FollowingWindowType following_window_begin,
                                 cudf::size_type min_periods)
 {
-  cudf::size_type i      = blockIdx.x * blockDim.x + threadIdx.x;
+  std::size_t i          = blockIdx.x * blockDim.x + threadIdx.x;
   cudf::size_type stride = blockDim.x * gridDim.x;
 
   cudf::size_type warp_valid_count{0};
@@ -66,8 +66,8 @@ __global__ void gpu_rolling_new(cudf::size_type nrows,
     cudf::size_type following_window = get_window(following_window_begin, i);
 
     // compute bounds
-    cudf::size_type start       = min(nrows, max(0, i - preceding_window + 1));
-    cudf::size_type end         = min(nrows, max(0, i + following_window + 1));
+    cudf::size_type start = min(nrows, max(0, static_cast<size_type>(i) - preceding_window + 1));
+    cudf::size_type end   = min(nrows, max(0, static_cast<size_type>(i) + following_window + 1));
     cudf::size_type start_index = min(start, end);
     cudf::size_type end_index   = max(start, end);
 

@@ -1008,7 +1008,7 @@ __launch_bounds__(block_size) __global__
                    PrecedingWindowIterator preceding_window_begin,
                    FollowingWindowIterator following_window_begin)
 {
-  size_type i      = blockIdx.x * block_size + threadIdx.x;
+  std::size_t i    = blockIdx.x * block_size + threadIdx.x;
   size_type stride = block_size * gridDim.x;
 
   size_type warp_valid_count{0};
@@ -1020,10 +1020,12 @@ __launch_bounds__(block_size) __global__
     int64_t following_window = following_window_begin[i];
 
     // compute bounds
-    auto start = static_cast<size_type>(
-      min(static_cast<int64_t>(input.size()), max(0L, i - preceding_window + 1)));
-    auto end = static_cast<size_type>(
-      min(static_cast<int64_t>(input.size()), max(0L, i + following_window + 1)));
+    auto start =
+      static_cast<size_type>(min(static_cast<int64_t>(input.size()),
+                                 max(0L, static_cast<cudf::size_type>(i) - preceding_window + 1)));
+    auto end =
+      static_cast<size_type>(min(static_cast<int64_t>(input.size()),
+                                 max(0L, static_cast<cudf::size_type>(i) + following_window + 1)));
     size_type start_index = min(start, end);
     size_type end_index   = max(start, end);
 

@@ -72,7 +72,7 @@ __launch_bounds__(max_block_size) __global__
   auto evaluator =
     cudf::ast::detail::expression_evaluator<has_nulls>(table, device_expression_data);
 
-  for (cudf::size_type row_index = start_idx; row_index < table.num_rows(); row_index += stride) {
+  for (std::size_t row_index = start_idx; row_index < table.num_rows(); row_index += stride) {
     auto output_dest = ast::detail::mutable_column_expression_result<has_nulls>(output_column);
     evaluator.evaluate(output_dest, row_index, thread_intermediate_storage);
   }