Skip to content

Commit

Permalink
Merge pull request #5 from ROCmSoftwarePlatform/develop
Browse files Browse the repository at this point in the history
Merge the newest develop
  • Loading branch information
wenjh authored Mar 8, 2021
2 parents 6959e3e + b7630f7 commit 49bd201
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 6 deletions.
4 changes: 2 additions & 2 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ pipeline {
steps{
script{
try{
buildHipClangJob('g++', flags: '-DCMAKE_BUILD_TYPE=release', image: image+'-gfxall', gpu_arch: "all")
buildHipClangJob('g++', flags: '-DCMAKE_BUILD_TYPE=release', image: image+'-gfxall', gpu_arch: "gfx900;gfx906;gfx908")
}
catch(e){
echo "throwing error exception for the stage"
Expand All @@ -1091,7 +1091,7 @@ pipeline {
steps{
script{
try{
buildHipClangJob('/opt/rocm/llvm/bin/clang++', flags: '-DCMAKE_BUILD_TYPE=release', image: image+'rocm-gfxall', prefixpath: '/opt/rocm', gpu_arch: "all")
buildHipClangJob('/opt/rocm/llvm/bin/clang++', flags: '-DCMAKE_BUILD_TYPE=release', image: image+'rocm-gfxall', prefixpath: '/opt/rocm', gpu_arch: "gfx900;gfx906;gfx908")
}
catch(e){
echo "throwing error exception for the stage"
Expand Down
4 changes: 2 additions & 2 deletions src/kernels/MIOpenBatchNormBwdSpatial.cl
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ MIOpenBatchNormBwdSpatial(const __global _FLOAT* __restrict x_in,
nidx = remkey / MIO_BN_HW;
hwidx = remkey - (nidx * MIO_BN_HW);
index = nidx * MIO_BN_CHW + chwid + hwidx;
if(index < MIO_BN_NCHW)
if(index < (MIO_BN_NCHW - 3))
{
read4 = *((const global _FLOAT4*)(x_in + index));
mean += (_FLOAT_PREC)read4.x;
Expand Down Expand Up @@ -429,7 +429,7 @@ MIOpenBatchNormBwdSpatial(const __global _FLOAT* __restrict x_in,
nidx = remkey / MIO_BN_HW;
hwidx = remkey - (nidx * MIO_BN_HW);
index = nidx * MIO_BN_CHW + chwid + hwidx;
if(index < MIO_BN_NCHW)
if(index < (MIO_BN_NCHW - 3))
{
xread4 = *((const global _FLOAT4*)(x_in + index));
dyRead4 = *((const global _FLOAT4*)(dy_in + index));
Expand Down
2 changes: 1 addition & 1 deletion src/kernels/MIOpenBatchNormFwdTrainSpatial.cl
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ MIOpenBatchNormFwdTrainSpatial(const __global _FLOAT* __restrict in,
nidx = remkey / MIO_BN_HW;
hwidx = remkey - (nidx * MIO_BN_HW);
index = nidx * MIO_BN_CHW + chwid + hwidx;
if(index < MIO_BN_NCHW)
if(index < (MIO_BN_NCHW - 3))
{
read4 = *((const global _FLOAT4*)(in + index));
mean += (_FLOAT_PREC)read4.x;
Expand Down
17 changes: 16 additions & 1 deletion src/ocl/convolutionocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@

#include <boost/range/adaptors.hpp>

/// MIOpenGEMM issues with ROCm 3.7, most likely related to the
/// issues in the OpenCL compiler. Not reproducible in ROCm 4.0.
#define WORKAROUND_MIOPENGEMM_ROCM37 \
(MIOPEN_USE_MIOPENGEMM && HIP_PACKAGE_VERSION_MAJOR == 3 && HIP_PACKAGE_VERSION_MINOR == 7)

namespace miopen {

MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM)
Expand Down Expand Up @@ -572,7 +577,17 @@ static void DirConvFindCore(Handle& handle,
}
// if not 1x1
else if(workSpace != nullptr &&
workSpaceSize >= (conv.ForwardGetWorkSpaceSizeGEMM(wDesc, yDesc)))
workSpaceSize >= (conv.ForwardGetWorkSpaceSizeGEMM(wDesc, yDesc))
#if WORKAROUND_MIOPENGEMM_ROCM37
&&
!(conv.GetSpatialDimension() == 2 && conv.group_count == 4 && in_c == 4 &&
in_spatial[0] == 161 && in_spatial[1] == 700 && wDesc.GetLengths()[0] == 32 &&
wDesc.GetLengths()[1] == 1 && wei_spatial[0] == 5 && wei_spatial[1] == 20 &&
miopen::all_of(conv.GetConvPads(), [](auto v) { return v == 0; }) &&
miopen::all_of(conv.GetConvStrides(), [](auto v) { return v == 2; }) &&
miopen::all_of(conv.GetConvDilations(), [](auto v) { return v == 1; }))
#endif
)
{
if(conv.group_count > 1)
{
Expand Down
42 changes: 42 additions & 0 deletions test/conv_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@

#define TEST_DIRECT_SUPPORTED_CONFIG_ONLY (!MIOPEN_USE_ROCBLAS && !MIOPEN_USE_MIOPENTENSILE)

#define WORKAROUND_MI100_ROM37_HIP_COMPILER_CRASH \
(HIP_PACKAGE_VERSION_MAJOR == 3 && HIP_PACKAGE_VERSION_MINOR == 7)

#if TEST_DIRECT_SUPPORTED_CONFIG_ONLY
static inline bool is_direct_fwd_bwd_data_supported(miopen::Handle& handle,
const miopen::ConvolutionDescriptor convDesc,
Expand Down Expand Up @@ -105,6 +108,36 @@ static inline bool is_direct_bwd_wrw_supported(miopen::Handle& handle,
}
#endif

#if WORKAROUND_MI100_ROM37_HIP_COMPILER_CRASH
static inline bool skip_config(miopen::Handle& handle,
const miopen::ConvolutionDescriptor convDesc,
const miopen::TensorDescriptor& xDesc,
const miopen::TensorDescriptor& wDesc,
const miopen::TensorDescriptor& yDesc)
{
if(convDesc.mode != miopenConvolution)
return false;

auto ctx =
miopen::ConvolutionContext{xDesc, wDesc, yDesc, convDesc, miopen::conv::Direction::Forward};

ctx.do_search = false;
ctx.save_srch_req = false;
ctx.general_compile_options = "";
ctx.disable_perfdb_access = true;
ctx.SetStream(&handle);
ctx.SetupFloats();
ctx.DetectRocm();

return ctx.GetStream().GetDeviceName() == "gfx908" && ctx.Is2d() && ctx.IsFp16() &&
ctx.IsLayoutDefault() && ctx.use_hip_kernels && ctx.group_counts == 1 &&
ctx.batch_sz == 1 && ctx.n_inputs == 192 && ctx.in_height == 28 && ctx.in_width == 28 &&
ctx.n_outputs == 1 && ctx.kernel_size_h == 3 && ctx.kernel_size_w == 3 &&
ctx.pad_w == 1 && ctx.pad_h == 1 && ctx.kernel_stride_w == 1 &&
ctx.kernel_stride_h == 1 && ctx.kernel_dilation_w == 1 && ctx.kernel_dilation_h == 1;
}
#endif

static inline bool is_gemm_workspace_valid(miopen::Handle& handle,
const miopen::ConvolutionDescriptor convDesc,
const miopen::TensorDescriptor& xDesc,
Expand Down Expand Up @@ -1825,6 +1858,15 @@ struct conv_driver : test_driver
}
#endif

#if WORKAROUND_MI100_ROM37_HIP_COMPILER_CRASH
if(skip_config(get_handle(), filter, input.desc, weights.desc, output.desc))
{
skip_forward = true;
skip_backward_data = true;
skip_backward_weights = true;
}
#endif

// bwd53 kernel (large images supported) doesnt support stride !=1 and dilation and
// pad.
if(filter.GetSpatialDimension() == 2 && in_spatial_len[1] >= 2048 &&
Expand Down

0 comments on commit 49bd201

Please sign in to comment.