Skip to content

Commit

Permalink
Tests for RNN seq API (#2493)
Browse files Browse the repository at this point in the history
  • Loading branch information
shurale-nkn authored Dec 12, 2023
1 parent f4864aa commit a64e600
Show file tree
Hide file tree
Showing 14 changed files with 7,239 additions and 1,652 deletions.
2 changes: 1 addition & 1 deletion driver/rnn_seq_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ int RNNSeqDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
}

// Unless seed is persistent between runs validation using cache stored in file is impossible.
srand(0);
prng::reset_seed();

auto fill_array_via_gen = [](auto& dst, size_t dst_sz, double range_l, double range_r) {
for(size_t it = 0; it < dst_sz; it++)
Expand Down
180 changes: 101 additions & 79 deletions src/ocl/rnnocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,26 @@ void RNNDescriptor::RNNForwardTraining_MS(Handle& handle,
const std::vector<size_t> hcy_dst_stride{
static_cast<size_t>(hidden_size * max_batch), static_cast<size_t>(hidden_size), 1};

if(in_n.at(0) < max_batch)
{
float beta = 0.;
const std::vector<size_t> zero_set_size{1,
static_cast<size_t>(max_batch - in_n.at(0)),
static_cast<size_t>(hidden_size)};
auto set_batch_offset = in_n.at(0) * hidden_size;

auto set_desc =
miopen::TensorDescriptor(wDesc.GetType(), zero_set_size, hcy_dst_stride);
if(hy != nullptr)
{
SetTensor(handle, set_desc, hy, &beta, hcy_layer_offset + set_batch_offset);
}
if(cy != nullptr)
{
SetTensor(handle, set_desc, cy, &beta, hcy_layer_offset + set_batch_offset);
}
}

for(int time_i = seq_len - 1; time_i >= 0; time_i--)
{
auto copy_batch = (time_i == seq_len - 1) ? in_n.at(time_i)
Expand Down Expand Up @@ -2879,86 +2899,89 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors(
}
else
{
sp_size[1] = batch_n - in_n.at(0);
sp_size[2] = wei_len;
sp_desc = miopen::TensorDescriptor(wDesc.GetType(), sp_size, sp_stride);
w_size[1] = 1;
w_size[2] = wei_len;
w_desc = miopen::TensorDescriptor(wDesc.GetType(), w_size, w_stride);
if(batch_n - in_n.at(0) > 0)
{
sp_size[1] = batch_n - in_n.at(0);
sp_size[2] = wei_len;
sp_desc = miopen::TensorDescriptor(wDesc.GetType(), sp_size, sp_stride);
w_size[1] = 1;
w_size[2] = wei_len;
w_desc = miopen::TensorDescriptor(wDesc.GetType(), w_size, w_stride);

OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + in_n.at(0) * hy_stride,
wei_shift_bias_temp,
hid_shift + in_n.at(0) * hy_stride,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + in_n.at(0) * hy_stride,
wei_shift_bias_temp,
hid_shift + in_n.at(0) * hy_stride,
true);
// Update time
profileRNNkernels(handle, 1, ctime);

if(dirMode != 0u)
{
if(in_n.at(0) == in_n.at(seqLen - 1))
{
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + wei_len,
wei_shift_bias_temp + wei_len,
hid_shift + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
}
else
if(dirMode != 0u)
{
int cur_batch = 0;
for(int ti = 0; ti < seqLen; ti++)
if(in_n.at(0) == in_n.at(seqLen - 1))
{
if(ti != (seqLen - 1))
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
hid_shift + wei_len,
wei_shift_bias_temp + wei_len,
hid_shift + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
}
else
{
int cur_batch = 0;
for(int ti = 0; ti < seqLen; ti++)
{
offset = hid_shift + cur_batch * hy_stride;
if(ti != (seqLen - 1))
{
offset = hid_shift + cur_batch * hy_stride;

sp_size[1] = in_n.at(ti + 1);
sp_size[2] = wei_len;
sp_desc =
miopen::TensorDescriptor(wDesc.GetType(), sp_size, sp_stride);
sp_size[1] = in_n.at(ti + 1);
sp_size[2] = wei_len;
sp_desc = miopen::TensorDescriptor(
wDesc.GetType(), sp_size, sp_stride);

OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
static_cast<int>(offset) + wei_len,
wei_shift_bias_temp + wei_len,
static_cast<int>(offset) + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
OpTensor(handle,
miopenTensorOpAdd,
&alpha0,
sp_desc,
reserveSpace,
&alpha1,
w_desc,
w,
&beta_t,
sp_desc,
reserveSpace,
static_cast<int>(offset) + wei_len,
wei_shift_bias_temp + wei_len,
static_cast<int>(offset) + wei_len,
true);
// Update time
profileRNNkernels(handle, 1, ctime);
}
cur_batch += in_n.at(ti);
}
cur_batch += in_n.at(ti);
}
}
}
Expand Down Expand Up @@ -5374,18 +5397,17 @@ void RNNDescriptor::RNNBackwardDataPackedTensors(
// dinput
if(inputMode == miopenRNNskip)
{
sp_size[1] = batch_n;
sp_size[2] = hy_h;
x_size[1] = batch_n;
x_size[2] = hy_h;
x_desc = miopen::TensorDescriptor(rnn_data_type, x_size, x_stride);
sp_desc = miopen::TensorDescriptor(rnn_data_type, sp_size, sp_stride);
const std::vector<int> dx_size{1, batch_n, hy_h};
x_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, x_stride);
sp_desc = miopen::TensorDescriptor(rnn_data_type, dx_size, sp_stride);

alpha0 = 1;
alpha1 = 1;
beta_t = 0;

for(int gi = 0; gi < nHiddenTensorsPerLayer * bi; gi++)
CopyTensor(handle, sp_desc, workSpace, x_desc, dx, 0, 0, true);
profileRNNkernels(handle, 1, ctime);
for(int gi = 1; gi < nHiddenTensorsPerLayer * bi; gi++)
{
OpTensor(handle,
miopenTensorOpAdd,
Expand Down
4 changes: 4 additions & 0 deletions src/rnn/rnn_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ void RNNTensorBaseLayoutConverter::ChangeTensorGPUDataPadding(
const std::vector<size_t> packed_stride =
get_packed_stride(copy_size, tensor_desc.GetLayoutVector());

// Nothing to copy, avoiding error with zero lens in TensorDescriptor
if(!std::all_of(copy_size.cbegin(), copy_size.cend(), [](size_t x) { return x > 0; }))
continue;

const auto packed_desc =
miopen::TensorDescriptor(tensor_desc.GetType(), copy_size, packed_stride);
const auto padded_desc =
Expand Down
31 changes: 20 additions & 11 deletions src/seq_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ SeqTensorDescriptor::SeqTensorDescriptor(miopenDataType_t t,
: SeqTensorDescriptor(t,
layout_in,
ConvertLengthsOrThrow(lens_in, "Lengths must be > 0"),
ConvertLengthsOrThrow(seq_len, "SequenceLengths must be >= 0"),
ConvertLengthsOrThrow(seq_len, "SequenceLengths must be >= 0", true),
{},
padding_marker_in,
use_seq_len,
Expand Down Expand Up @@ -429,22 +429,31 @@ std::vector<size_t> SeqTensorDescriptor::GetBatchesPerSequence() const
}
else
{
batches.reserve(sequence_len[0]);
auto block_begin = sequence_len.rbegin();
auto sample_ptr = sequence_len.rbegin();
auto batch_size = sequence_len.size();

batches.insert(batches.end(), *block_begin, batch_size);
while(block_begin != sequence_len.rend() && *block_begin == 0)
++block_begin;

while(sample_ptr != sequence_len.rend())
if(block_begin != sequence_len.rend())
{
if(*sample_ptr != *block_begin)
auto sample_ptr = block_begin;
auto batch_size = sequence_len.rend() - block_begin;

batches.insert(batches.end(), *block_begin, batch_size);

while(sample_ptr != sequence_len.rend())
{
batch_size = batch_size - (sample_ptr - block_begin);
const auto seq_count = *sample_ptr - *block_begin;
batches.insert(batches.end(), seq_count, batch_size);
block_begin = sample_ptr;
if(*sample_ptr != *block_begin)
{
batch_size = batch_size - (sample_ptr - block_begin);
const auto seq_count = *sample_ptr - *block_begin;
batches.insert(batches.end(), seq_count, batch_size);

block_begin = sample_ptr;
}
sample_ptr++;
}
sample_ptr++;
}
}
return batches;
Expand Down
Loading

0 comments on commit a64e600

Please sign in to comment.