Skip to content

Commit

Permalink
Fix issue #1476
Browse files Browse the repository at this point in the history
  • Loading branch information
aska-0096 committed Apr 9, 2022
1 parent fb4590d commit 8e5fdfe
Show file tree
Hide file tree
Showing 27 changed files with 973 additions and 943 deletions.
22 changes: 20 additions & 2 deletions src/hip/general_tensor_reorder_sol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ static inline std::string GetKernelNameType(std::size_t type_size)
MIOPEN_THROW("data type not supported");
}

static inline std::string GetKernelFileName(std::size_t data_size,
const GeneralReorderParam* kparam)
{
if(kparam == nullptr)
MIOPEN_THROW("Memory access fault, kparam is a nullptr");
std::ostringstream kernel_file_name;
kernel_file_name << "general_tensor_reorder_" << kparam->tile_x << "x" << kparam->tile_y << "_";
kernel_file_name << GetKernelNameType(data_size) << ".cpp";
return kernel_file_name.str();
}

static inline std::string GetKernelName(std::size_t data_size,
uint32_t order_0,
uint32_t order_1,
Expand Down Expand Up @@ -140,9 +151,10 @@ solver::KernelInfo GenericReorderSolutionImpl::GetKernelInfo() const
(block_size * kernel_param_heuristic.tile_x);
std::size_t grid_size = dim_total;

std::string kernel_name = GetKernelName();
std::string kernel_name = GetKernelName();
std::string kernel_file_name = GetKernelFileName();
solver::KernelInfo kernel;
kernel.kernel_file = "general_tensor_reorder.cpp";
kernel.kernel_file = kernel_file_name;
kernel.kernel_name = kernel_name;
kernel.g_wk.clear();
kernel.g_wk.push_back(grid_size * block_size);
Expand Down Expand Up @@ -191,6 +203,12 @@ std::vector<OpKernelArg> GenericReorderSolutionImpl::GetKernelArg() const
return opArgs;
}

std::string GenericReorderSolutionImpl::GetKernelFileName() const
{
return tensor_reorder::GetKernelFileName(miopen::GetTypeSize(data_type),
&kernel_param_heuristic);
}

std::string GenericReorderSolutionImpl::GetKernelName() const
{
return tensor_reorder::GetKernelName(miopen::GetTypeSize(data_type),
Expand Down
1 change: 1 addition & 0 deletions src/include/miopen/general_tensor_reorder_sol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ struct GenericReorderSolutionImpl
// TODO batched transpose API
solver::KernelInfo GetKernelInfo() const;
std::vector<OpKernelArg> GetKernelArg() const;
std::string GetKernelFileName() const;
std::string GetKernelName() const;
bool IsSkippable() const;
size_t GetOutputTensorSize() const;
Expand Down
6 changes: 2 additions & 4 deletions src/include/miopen/tensor_reorder_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,9 @@ MakeTensorReorderAttributes(const ExecutionContext& ctx_,
uint32_t order_2_,
uint32_t order_3_)
{
std::unique_ptr<TensorReorderAttributesBase> default_ptr;
if(!ctx_.use_hip_kernels)
{
return default_ptr;
return nullptr;
}
// Default using general reorder
if(data_type_ == miopenBFloat16)
Expand Down Expand Up @@ -235,9 +234,8 @@ MakeTensorReorderAttributes(const ExecutionContext& ctx_,
case 5:
return std::make_unique<BatchedTransposeSolution_3012>(
ctx_, data_type_, dim_0_, dim_1_, dim_2_, dim_3_);
default: MIOPEN_THROW("Unsupported reorder sequence"); break;
default: MIOPEN_THROW("Unsupported reorder sequence"); return nullptr;
}
return default_ptr;
}

} // namespace miopen
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0132, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0213, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0231, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0312, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0321, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1023, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1032, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1203, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1230, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1302, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1320, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2013, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2031, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2103, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2130, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2301, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2310, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3012, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3021, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3102, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3120, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3201, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3210, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0132, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0213, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0231, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0312, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0321, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1023, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1032, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1203, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1230, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1302, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1320, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2013, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2031, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2103, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2130, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2301, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2310, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3012, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3021, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3102, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3120, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3201, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3210, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0132, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0213, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0231, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0312, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0321, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1023, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1032, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1203, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1230, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1302, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1320, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2013, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2031, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2103, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2130, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2301, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2310, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3012, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3021, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3102, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3120, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3201, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3210, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0132, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0213, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0231, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0312, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r0321, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1023, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1032, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1203, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1230, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1302, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r1320, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2013, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2031, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2103, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2130, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2301, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r2310, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3012, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3021, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3102, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3120, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3201, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(16x256, r3210, half, ushort, 256, TENSOR_REORDER_OCCUPANCY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0132, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0213, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0231, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0312, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0321, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1023, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1032, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1203, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1230, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1302, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1320, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2013, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2031, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2103, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2130, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2301, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2310, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3012, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3021, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3102, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3120, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3201, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3210, byte, uchar, 256, TENSOR_REORDER_OCCUPANCY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0132, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0213, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0231, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0312, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0321, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1023, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1032, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1203, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1230, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1302, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1320, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2013, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2031, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2103, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2130, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2301, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2310, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3012, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3021, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3102, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3120, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3201, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3210, dword, float, 256, TENSOR_REORDER_OCCUPANCY)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "general_tensor_reorder_kernel_util.hpp"
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0132, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0213, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0231, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0312, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r0321, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1023, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1032, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1203, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1230, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1302, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r1320, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2013, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2031, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2103, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2130, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2301, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r2310, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3012, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3021, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3102, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3120, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3201, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
DEFINE_GENERAL_4D_REORDER_KERNEL(1x256, r3210, dwordx2, double, 256, TENSOR_REORDER_OCCUPANCY)
Loading

0 comments on commit 8e5fdfe

Please sign in to comment.