Skip to content

Commit

Permalink
Merge branch 'develop' into wip-rocmtest-trial-vega10
Browse files Browse the repository at this point in the history
# RESOLVED Conflicts:
#	Jenkinsfile
  • Loading branch information
atamazov committed Apr 14, 2021
2 parents 9a642d4 + 6dc32fe commit 05c96e7
Show file tree
Hide file tree
Showing 19 changed files with 299 additions and 40 deletions.
10 changes: 5 additions & 5 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ pipeline {
ulimit -c unlimited
cd build
CXX=/opt/rocm/llvm/bin/clang++ cmake -DBUILD_DEV=On -DCMAKE_BUILD_TYPE=debug -DMIOPEN_TEST_FLAGS=--disable-verification-cache ..
CTEST_PARALLEL_LEVEL=4 MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM=0 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 make -j\$(nproc) check
CTEST_PARALLEL_LEVEL=4 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 make -j\$(nproc) check
"""

}
Expand Down Expand Up @@ -342,7 +342,7 @@ pipeline {
ulimit -c unlimited
cd build
CXX=/opt/rocm/llvm/bin/clang++ cmake -DMIOPEN_USE_COMGR=On -DBUILD_DEV=On -DCMAKE_BUILD_TYPE=debug -DMIOPEN_TEST_FLAGS='--verbose --disable-verification-cache' ..
CTEST_PARALLEL_LEVEL=2 MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM=0 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 MIOPEN_LOG_LEVEL=5 make -j\$(nproc) check
CTEST_PARALLEL_LEVEL=2 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 MIOPEN_LOG_LEVEL=5 make -j\$(nproc) check
"""
}
steps{
Expand All @@ -368,7 +368,7 @@ pipeline {
ulimit -c unlimited
cd build
CXX=/opt/rocm/llvm/bin/clang++ cmake -DBUILD_DEV=On -DBUILD_SHARED_LIBS=Off -DCMAKE_BUILD_TYPE=release -DMIOPEN_TEST_FLAGS=--disable-verification-cache ..
CTEST_PARALLEL_LEVEL=4 MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM=0 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 make -j\$(nproc) check
CTEST_PARALLEL_LEVEL=4 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 make -j\$(nproc) check
"""
}
steps{
Expand Down Expand Up @@ -400,7 +400,7 @@ pipeline {
cd build
CXX=/opt/rocm/llvm/bin/clang++ cmake -DBUILD_DEV=On -DCMAKE_BUILD_TYPE=release ..
make -j test_conv2d
MIOPEN_FIND_MODE=normal CTEST_PARALLEL_LEVEL=4 MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM=0 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 bin/test_conv2d --disable-verification-cache
MIOPEN_FIND_MODE=normal CTEST_PARALLEL_LEVEL=4 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 bin/test_conv2d --disable-verification-cache
"""
}
steps{
Expand All @@ -427,7 +427,7 @@ pipeline {
cd build
CXX=/opt/rocm/llvm/bin/clang++ cmake -DBUILD_DEV=On -DCMAKE_BUILD_TYPE=release ..
make -j test_conv2d
MIOPEN_FIND_MODE=fast CTEST_PARALLEL_LEVEL=4 MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM=0 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 bin/test_conv2d --disable-verification-cache
MIOPEN_FIND_MODE=fast CTEST_PARALLEL_LEVEL=4 MIOPEN_CONV_PRECISE_ROCBLAS_TIMING=0 bin/test_conv2d --disable-verification-cache
"""
}
steps{
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ MIOpen supports two programming models -
* [Half](http://half.sourceforge.net/) - IEEE 754-based half-precision floating point library
* [Boost](http://www.boost.org/) at version 1.72 (other versions are not supported)
* MIOpen uses `boost-system` and `boost-filesystem` packages to enable persistent [kernel cache](https://rocmsoftwareplatform.github.io/MIOpen/doc/html/cache.html)
* [SQLite3](https://sqlite.org/index.html) for reading and writing performance database
* [MIOpenTENSILE](https://github.com/ROCmSoftwarePlatform/MIOpenTensile) Users can enable this library using the cmake configuration flag `-DMIOPEN_USE_MIOPENTENSILE=On`.
* [rocBlas](https://github.com/ROCmSoftwarePlatform/rocBLAS)
* Minimum version branch for pre-ROCm 3.5 [master-rocm-2.10](https://github.com/ROCmSoftwarePlatform/rocBLAS/tree/master-rocm-2.10)
Expand Down
8 changes: 8 additions & 0 deletions driver/lrn_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,14 @@ int LRNDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
{
scale = std::vector<Tgpu>(workSpaceNbVal, static_cast<Tgpu>(0));
scalehost = std::vector<Tref>(workSpaceNbVal, static_cast<Tref>(0));
if(inflags.GetValueInt("forw") == 2)
{
for(int i = 0; i < scale.size(); i++)
{
scale[i] = RAN_GEN<Tgpu>(static_cast<Tgpu>(0.0), static_cast<Tgpu>(1.0));
scalehost[i] = Tref(scale[i]);
}
}
}
din = std::vector<Tgpu>(in_sz, static_cast<Tgpu>(0));
dout = std::vector<Tgpu>(out_sz, static_cast<Tgpu>(0));
Expand Down
6 changes: 3 additions & 3 deletions driver/tensor_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
#include <miopen/miopen.h>
#include <miopen/tensor.hpp>
#include <miopen/tensor_extra.hpp>
#include <miopen/tensor_layout.hpp>
#include <numeric>
#include <vector>
#include "../test/tensor_layout.hpp"

std::vector<int> GetTensorLengths(miopenTensorDescriptor_t& tensor)
{
Expand Down Expand Up @@ -125,14 +125,14 @@ int SetTensorNd(miopenTensorDescriptor_t t,
}

// Dimension lengths vector 'len' comes with a default layout.
std::string len_layout = tensor_layout_get_default(layout.size());
std::string len_layout = miopen::tensor_layout_get_default(layout.size());
if(len_layout.empty())
{
return SetTensorNd(t, len, data_type);
}

std::vector<int> strides;
tensor_layout_to_strides(len, len_layout, layout, strides);
miopen::tensor_layout_to_strides(len, len_layout, layout, strides);

return miopenSetTensorDescriptor(t, data_type, len.size(), len.data(), strides.data());
}
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ set( MIOpen_Source
include/miopen/mlir_build.hpp
include/miopen/oclkernel.hpp
include/miopen/tensor.hpp
include/miopen/tensor_layout.hpp
include/miopen/tensor_ops.hpp
include/miopen/pooling.hpp
include/miopen/lrn.hpp
Expand Down
24 changes: 24 additions & 0 deletions src/conv/problem_description.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

#include <miopen/conv/data_invoke_params.hpp>
#include <miopen/conv/wrw_invoke_params.hpp>
#include <miopen/tensor_layout.hpp>

#include <sstream>

Expand Down Expand Up @@ -59,6 +60,29 @@ std::ostream& operator<<(std::ostream& stream, std::function<void(std::ostream&)
return stream;
}

void ProblemDescription::HeuristicUpdateLayouts()
{
const std::string labels = tensor_layout_get_default(in_layout.size());

static const std::vector<std::string> supported_layouts = {"NCHW", "NHWC", "NCDHW"};
for(const std::string& layout : supported_layouts)
{
// Skip layouts that doesn't match dimension sizes
if(layout.size() != labels.size())
continue;

if(in.IsPossibleLayout(labels, layout) && out.IsPossibleLayout(labels, layout) &&
weights.IsPossibleLayout(labels, layout))
{
in_layout = layout;
weights_layout = layout;
out_layout = layout;
return;
}
}
// If we did not find consistent layout, leave them as-is
}

void ProblemDescription::BuildConfKey(std::string& conf_key) const
{
std::ostringstream ss;
Expand Down
26 changes: 22 additions & 4 deletions src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <miopen/mlo_internal.hpp>
#include <miopen/solver.hpp>
#include <miopen/tensor.hpp>
#include <miopen/tensor_layout.hpp>
#include <miopen/algorithm.hpp>

#include <cassert>
Expand Down Expand Up @@ -140,9 +141,11 @@ const std::vector<int>& ConvolutionDescriptor::GetTransposeConvPads() const

int ConvolutionDescriptor::GetGroupCount() const { return group_count; }

TensorDescriptor ConvolutionDescriptor::GetForwardOutputTensor(const TensorDescriptor& xDesc,
const TensorDescriptor& wDesc,
miopenDataType_t yType) const
TensorDescriptor
ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& xDesc,
const TensorDescriptor& wDesc,
const std::string& yLayout,
miopenDataType_t yType) const
{
const std::size_t spatial_dim = GetSpatialDimension();

Expand Down Expand Up @@ -255,10 +258,25 @@ TensorDescriptor ConvolutionDescriptor::GetForwardOutputTensor(const TensorDescr
out_lens[0] = in_n;
out_lens[1] = out_c;

const std::string default_layout = tensor_layout_get_default(xDesc.GetSize());
std::vector<std::size_t> out_strides;
tensor_layout_to_strides(out_lens, default_layout, yLayout, out_strides);

return TensorDescriptor((xDesc.GetType() == miopenInt8 || xDesc.GetType() == miopenInt8x4
? (yType == miopenInt32 ? yType : miopenFloat)
: xDesc.GetType()),
out_lens);
out_lens,
out_strides);
}

TensorDescriptor ConvolutionDescriptor::GetForwardOutputTensor(const TensorDescriptor& xDesc,
const TensorDescriptor& wDesc,
miopenDataType_t yType) const
{
// output layout same as input
const std::string default_layout = tensor_layout_get_default(xDesc.GetSize());
const std::string in_layout = xDesc.GetLayout(default_layout);
return GetForwardOutputTensorWithLayout(xDesc, wDesc, in_layout, yType);
}

/// There is assumption that if Winograd is applicable and granularity loss is low, then there is no
Expand Down
4 changes: 4 additions & 0 deletions src/include/miopen/conv/problem_description.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ struct ProblemDescription
direction(direction_),
bias(bias_)
{
HeuristicUpdateLayouts();
}

// Conv descriptor getters
Expand Down Expand Up @@ -303,6 +304,9 @@ struct ProblemDescription
}

bool IsLayoutDefault() const;

void HeuristicUpdateLayouts();

void BuildConfKey(std::string& conf_key) const;

NetworkConfig BuildConfKey() const
Expand Down
5 changes: 5 additions & 0 deletions src/include/miopen/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ struct ConvolutionDescriptor : miopenConvolutionDescriptor

int GetGroupCount() const;

TensorDescriptor GetForwardOutputTensorWithLayout(const TensorDescriptor& xDesc,
const TensorDescriptor& wDesc,
const std::string& yLayout,
miopenDataType_t yType = miopenFloat) const;

TensorDescriptor GetForwardOutputTensor(const TensorDescriptor& xDesc,
const TensorDescriptor& wDesc,
miopenDataType_t yType = miopenFloat) const;
Expand Down
17 changes: 11 additions & 6 deletions src/include/miopen/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <miopen/each_args.hpp>
#include <miopen/returns.hpp>
#include <miopen/errors.hpp>
#include <miopen/functional.hpp>

#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -175,13 +176,17 @@ struct TensorDescriptor : miopenTensorDescriptor

std::string ToString() const;

template <class Vector, class Op>
static inline std::vector<int64_t> sort_permutation(const Vector& data, Op op)
bool IsPossibleLayout(const std::string& labels, const std::string& layout) const;

static inline std::vector<int64_t> find_permutation(const std::vector<std::size_t>& lens,
const std::vector<std::size_t>& strides)
{
std::vector<std::int64_t> result(data.size());
std::vector<std::int64_t> result(lens.size());
std::iota(result.begin(), result.end(), 0);
std::sort(
result.begin(), result.end(), [&](auto x, auto y) { return op(data[x], data[y]); });
std::stable_sort(
result.begin(),
result.end(),
by(std::greater<>{}, [&](auto x) { return std::make_tuple(strides[x], lens[x]); }));
return result;
}

Expand All @@ -196,7 +201,7 @@ struct TensorDescriptor : miopenTensorDescriptor
// Copy construct the result string from labels. This allocates the space at one go
// and is faster than calling push_back in transform.
auto result = labels;
auto p = sort_permutation(strides, std::greater<>{});
auto p = find_permutation(lens, strides);
std::transform(p.begin(), p.end(), result.begin(), [&](auto i) { return labels[i]; });
return result;
}
Expand Down
84 changes: 84 additions & 0 deletions src/include/miopen/tensor_layout.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/
#ifndef GUARD_TENSOR_LAYOUT_HPP
#define GUARD_TENSOR_LAYOUT_HPP

#include <miopen/errors.hpp>
#include <map>
#include <algorithm>
#include <vector>
#include <string>
#include <iterator>

namespace miopen {

template <typename T>
void tensor_layout_to_strides(const std::vector<T>& len,
const std::string& len_layout,
const std::string& layout,
std::vector<T>& strides)
{
// Bind the layout and the dimension lengths together into a map.
std::map<char, T> dim_to_len;
std::transform(len.begin(),
len.end(),
len_layout.begin(),
std::inserter(dim_to_len, dim_to_len.end()),
[](T l, char dim) { return std::make_pair(dim, l); });

// Now construct the strides according to layout by multiply the
// dimension lengths together.
std::transform(len_layout.begin(),
len_layout.end(),
std::back_inserter(strides),
[&layout, &dim_to_len](char cur_layout_char) {
auto pos = layout.find(cur_layout_char);
if(pos == std::string::npos)
{
MIOPEN_THROW(std::string("mismatched layout string, unexpect char: ")
.append(1, cur_layout_char));
}
return std::accumulate(layout.begin() + pos + 1,
layout.end(),
1,
[&dim_to_len](T accumulator, char l) {
return accumulator * dim_to_len[l];
});
});
}

inline std::string tensor_layout_get_default(int size)
{
if(size == 4)
return "NCHW";
if(size == 5)
return "NCDHW";
return "";
}

} // namespace miopen

#endif
9 changes: 8 additions & 1 deletion src/pooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <miopen/pooling.hpp>
#include <miopen/logger.hpp>
#include <miopen/tensor.hpp>
#include <miopen/tensor_layout.hpp>
#include <miopen/datatype.hpp>

#include <cassert>
Expand Down Expand Up @@ -216,7 +217,13 @@ TensorDescriptor PoolingDescriptor::GetForwardOutputTensor(const TensorDescripto
{
std::vector<int> out_dim(xDesc.GetSize());
GetForwardOutputDimNd(xDesc, xDesc.GetSize(), out_dim.data());
return TensorDescriptor(xDesc.GetType(), out_dim);

const std::string default_layout = tensor_layout_get_default(xDesc.GetSize());
const std::string in_layout = xDesc.GetLayout(default_layout);
std::vector<int> out_strides;
tensor_layout_to_strides(out_dim, default_layout, in_layout, out_strides);

return TensorDescriptor(xDesc.GetType(), out_dim, out_strides);
}

std::size_t PoolingDescriptor::GetWorkSpaceSize(const TensorDescriptor& yDesc) const
Expand Down
8 changes: 8 additions & 0 deletions src/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <miopen/errors.hpp>
#include <miopen/logger.hpp>
#include <miopen/tensor.hpp>
#include <miopen/tensor_layout.hpp>
#include <numeric>
#include <string>

Expand Down Expand Up @@ -122,6 +123,13 @@ std::size_t TensorDescriptor::GetElementSpace() const
1;
}

bool TensorDescriptor::IsPossibleLayout(const std::string& labels, const std::string& layout) const
{
std::vector<size_t> derived_strides;
tensor_layout_to_strides(lens, labels, layout, derived_strides);
return derived_strides == strides;
}

std::size_t TensorDescriptor::GetNumBytes() const
{
std::size_t typesize = 0;
Expand Down
Loading

0 comments on commit 05c96e7

Please sign in to comment.