Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLIR] Binary backend - step 1 - nonxdlops fwd path #841

Merged
merged 16 commits into from
Apr 23, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ RUN pip install -r /doc-requirements.txt
RUN if [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@7568654c938d42e9a91c6b18fb382f5b978d12fd; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@5fe0bf4a8dc59f3ab62df929297280915372ce16; fi

RUN cd ~ && \
export MLIR_COMMIT=31d92f4c64ae6fa6b7c5d543f68b69300b4513ce && \
export MLIR_COMMIT=331e8782b1b4a107cfb1f93bb90ae59bd95a6dad && \
wget https://github.com/ROCmSoftwarePlatform/llvm-project-mlir/archive/$MLIR_COMMIT.tar.gz && \
tar -xvzf $MLIR_COMMIT.tar.gz && \
rm -rf $MLIR_COMMIT.tar.gz && \
Expand Down
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ pipeline {
ulimit -c unlimited
cd build
CXX=/opt/rocm/llvm/bin/clang++ cmake -DMIOPEN_USE_MLIR=On -DBUILD_DEV=On -DCMAKE_BUILD_TYPE=release -DMIOPEN_TEST_FLAGS='--verbose --disable-verification-cache' ..
CTEST_PARALLEL_LEVEL=4 MIOPEN_LOG_LEVEL=5 make -j\$(nproc) check
CTEST_PARALLEL_LEVEL=4 MIOPEN_LOG_LEVEL=5 make -j\$(nproc) test_conv_igemm_mlir
atamazov marked this conversation as resolved.
Show resolved Hide resolved
"""
}
steps{
Expand Down
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ set( MIOpen_Source
conv/invokers/gen_x_w_y_pad.cpp
conv/invokers/ocl_wrw_rdc.cpp
conv/invokers/impl_gemm.cpp
conv/invokers/mlir_impl_gemm.cpp
conv/invokers/impl_gemm_dynamic.cpp
invoker_cache.cpp
tensor.cpp
Expand Down Expand Up @@ -228,6 +229,7 @@ set( MIOpen_Source
solver/conv_hip_implicit_gemm_mlir_cpp_fwd.cpp
solver/conv_hip_implicit_gemm_mlir_cpp_bwd.cpp
solver/conv_hip_implicit_gemm_mlir_cpp_wrw.cpp
solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp
solver/conv_hip_implicit_gemm_wrw_v4r4.cpp
solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp
solver/conv_hip_implicit_gemm_xdlops_common.cpp
Expand Down
153 changes: 153 additions & 0 deletions src/conv/invokers/mlir_impl_gemm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#include <miopen/conv/invokers/mlir_impl_gemm.hpp>
atamazov marked this conversation as resolved.
Show resolved Hide resolved
#include <miopen/memref.hpp>

#include <miopen/conv/data_invoke_params.hpp>
#include <miopen/algorithm.hpp>
#include <miopen/handle.hpp>
#include <miopen/tensor_ops.hpp>

#include <boost/any.hpp>

namespace miopen {
namespace conv {

namespace {
using MemRef4DGeneric = StridedMemRefType<void, 4>;

struct MlirConvArgs
{
MemRef4DGeneric filter;
MemRef4DGeneric input;
MemRef4DGeneric output;
};

// Rearrange strides correctly
// In MLIR: the layout, sizes and strides are coherent. The layout information is not
// embedded into the permutation of strides.
// - For NCHW, sizes = {N, C, H, W}; strides = {C*H*W, H*W, W, 1}
// - For NHWC, sizes = {N, H, W, C}; strides = {C*H*W, W*C, C, 1}
jerryyin marked this conversation as resolved.
Show resolved Hide resolved

// In MIOpen however, size and strides are not aligned. Permutation of the strides are used to
// infer actual layout
// - For NCHW, sizes = {N, C, H, W}; strides = {C*H*W, H*W, W, 1}
// - For NHWC, sizes = {N, C, H, W}; strides = {C*H*W, 1, W*C, C}
auto permuteDimsStrides(const std::vector<size_t>& dims, const std::vector<size_t>& strides)
{
auto sorted_dims = dims;
auto sorted_strides = strides;
auto p = TensorDescriptor::sort_permutation(strides, std::greater<>{});
std::transform(p.begin(), p.end(), sorted_dims.begin(), [&](auto i) { return dims[i]; });
std::transform(p.begin(), p.end(), sorted_strides.begin(), [&](auto i) { return strides[i]; });
return std::make_tuple(sorted_dims, sorted_strides);
};

void permuteDimStridesAllDir(const conv::ProblemDescription& conv_problem,
std::vector<size_t>& in_dims,
std::vector<size_t>& in_strides,
std::vector<size_t>& weights_dims,
std::vector<size_t>& weights_strides,
std::vector<size_t>& out_dims,
std::vector<size_t>& out_strides)
{
const TensorDescriptor& in = conv_problem.GetIn();
std::make_tuple(in_dims, in_strides) = permuteDimsStrides(in.GetLengths(), in.GetStrides());

const TensorDescriptor& weights = conv_problem.GetWeights();
std::make_tuple(weights_dims, weights_strides) =
permuteDimsStrides(weights.GetLengths(), weights.GetStrides());

const TensorDescriptor& out = conv_problem.GetOut();
std::make_tuple(out_dims, out_strides) = permuteDimsStrides(out.GetLengths(), out.GetStrides());
}

MlirConvArgs makeMlirConvArgs(const std::vector<size_t>& in_dims,
const std::vector<size_t>& in_strides,
const std::vector<size_t>& weights_dims,
const std::vector<size_t>& weights_strides,
const std::vector<size_t>& out_dims,
const std::vector<size_t>& out_strides)
{
auto initializeMemRef = [](const std::vector<size_t>& dims,
const std::vector<size_t>& strides,
MemRef4DGeneric& target) {
target.basePtr = nullptr;
target.data = nullptr;
target.offset = 0;
std::copy(dims.cbegin(), dims.cend(), &target.sizes[0]);
std::copy(strides.cbegin(), strides.cend(), &target.strides[0]);
Comment on lines +99 to +100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...while strides and sizes and known in the Factory (outer lambda), and necessary initializations should be performed there. In this case the only thing that Invoker will need to do (before launching the kernel) is initialization of buffer addresses.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jerryyin Whatever design you are going to select, it should be able to satisfy this and this. This is the simplest test of design validity.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's postpone this. #862 created.

};

MemRef4DGeneric filter;
initializeMemRef(weights_dims, weights_strides, filter);
MemRef4DGeneric input;
initializeMemRef(in_dims, in_strides, input);
MemRef4DGeneric output;
initializeMemRef(out_dims, out_strides, output);

return {filter, input, output};
}
} // Anonymous namespace

InvokerFactory MakeMlirFwdInvokerFactory(const ConvolutionContext& ctx)
atamazov marked this conversation as resolved.
Show resolved Hide resolved
{
assert((ctx.direction.IsForward()));

std::vector<size_t> in_dims, in_strides;
std::vector<size_t> weights_dims, weights_strides;
std::vector<size_t> out_dims, out_strides;
permuteDimStridesAllDir(ctx.conv_problem,
in_dims,
in_strides,
weights_dims,
weights_strides,
out_dims,
out_strides);

MlirConvArgs args =
makeMlirConvArgs(in_dims, in_strides, weights_dims, weights_strides, out_dims, out_strides);

return [=](const std::vector<Kernel>& kernels) {
return [=](const Handle& handle, const AnyInvokeParams& primitive_parameters) mutable {
const auto& data_ctx = primitive_parameters.CastTo<conv::DataInvokeParams>();
atamazov marked this conversation as resolved.
Show resolved Hide resolved
const auto& tensors = data_ctx.tensors;

void* filter = nullptr;
void* input = nullptr;
void* output = nullptr;
#if MIOPEN_BACKEND_OPENCL
clGetMemObjectInfo(tensors.w, CL_MEM_HOST_PTR, sizeof(filter), &filter, nullptr);
clGetMemObjectInfo(tensors.in, CL_MEM_HOST_PTR, sizeof(input), &input, nullptr);
clGetMemObjectInfo(tensors.out, CL_MEM_HOST_PTR, sizeof(output), &output, nullptr);
#elif MIOPEN_BACKEND_HIP
// NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast)
filter = const_cast<void*>(tensors.w);
// NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast)
input = const_cast<void*>(tensors.in);
// NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast)
output = const_cast<void*>(tensors.out);
#endif
atamazov marked this conversation as resolved.
Show resolved Hide resolved

if((filter == nullptr) || (input == nullptr) || (output == nullptr))
MIOPEN_THROW("Invalid device pointers");

args.filter.basePtr = filter;
args.filter.data = filter;
args.input.basePtr = input;
args.input.data = input;
args.output.basePtr = output;
args.output.data = output;

handle.Run(kernels[0])(args);
if(handle.IsProfilingEnabled())
{
float elapsed = 0;
elapsed += handle.GetKernelTime();
handle.ResetKernelTime();
handle.AccumKernelTime(elapsed);
}
atamazov marked this conversation as resolved.
Show resolved Hide resolved
};
};
}

} // namespace conv
} // namespace miopen
2 changes: 1 addition & 1 deletion src/hip/handlehip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ Program Handle::LoadProgram(const std::string& program_name,
{
this->impl->set_ctx();

if(!miopen::EndsWith(program_name, ".mlir-cpp"))
if((!miopen::EndsWith(program_name, ".mlir-cpp")) && (!miopen::EndsWith(program_name, ".mlir")))
jerryyin marked this conversation as resolved.
Show resolved Hide resolved
{
params += " -mcpu=" + this->GetTargetProperties().Name();
}
Expand Down
8 changes: 8 additions & 0 deletions src/hipoc/hipoc_program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,12 @@ struct HIPOCProgramImpl
{
hsaco_file = MiirBuildViaHip(dir, filename, src, params, target);
}
else if(miopen::EndsWith(filename, ".mlir"))
{
std::vector<char> buffer;
MiirGenBin(params, buffer);
WriteFile(buffer, hsaco_file);
}
#endif
else
{
Expand Down Expand Up @@ -303,6 +309,8 @@ struct HIPOCProgramImpl
std::string filename = is_kernel_str ? "tinygemm.cl" // Fixed name for miopengemm.
: program;
const auto src = [&]() -> std::string {
if(miopen::EndsWith(filename, ".mlir"))
return {}; // MLIR solutions do not use source code.
if(miopen::EndsWith(filename, ".mlir-cpp"))
return {}; // MLIR solutions do not use source code.
if(!kernel_src.empty())
Expand Down
38 changes: 38 additions & 0 deletions src/include/miopen/conv/invokers/mlir_impl_gemm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2019 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once

#include <miopen/invoker.hpp>
#include <miopen/conv/context.hpp>

namespace miopen {
namespace conv {

InvokerFactory MakeMlirFwdInvokerFactory(const ConvolutionContext& ctx);

} // namespace conv
} // namespace miopen
82 changes: 82 additions & 0 deletions src/include/miopen/memref.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once

namespace miopen {

// clang-format off
jerryyin marked this conversation as resolved.
Show resolved Hide resolved
template <int N>
void dropFront(int64_t arr[N], int64_t* res)
{
for (unsigned i = 1; i < N; ++i)
*(res + i - 1) = arr[i];
}

//===----------------------------------------------------------------------===//
// Codegen-compatible structures for StridedMemRef type.
//===----------------------------------------------------------------------===//
/// StridedMemRef descriptor type with static rank.
template <typename T, int N>
struct StridedMemRefType {
T* basePtr;
T* data;
int64_t offset;
int64_t sizes[N];
int64_t strides[N];
jerryyin marked this conversation as resolved.
Show resolved Hide resolved
// This operator[] is extremely slow and only for sugaring purposes.
StridedMemRefType<T, N - 1> operator[](int64_t idx) {
StridedMemRefType<T, N - 1> res;
res.basePtr = basePtr;
res.data = data;
res.offset = offset + idx * strides[0];
dropFront<N>(sizes, res.sizes);
dropFront<N>(strides, res.strides);
return res;
}
};

/// StridedMemRef descriptor type specialized for rank 1.
template <typename T>
struct StridedMemRefType<T, 1> {
T* basePtr;
T* data;
int64_t offset;
int64_t sizes[1];
int64_t strides[1];
T& operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
};

/// StridedMemRef descriptor type specialized for rank 0.
template <typename T>
struct StridedMemRefType<T, 0> {
T* basePtr;
T* data;
int64_t offset;
};
// clang-format on

} // namespace miopen
2 changes: 2 additions & 0 deletions src/include/miopen/mlir_build.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ boost::filesystem::path MiirBuildViaHip(boost::optional<TmpDir>& tmp_dir,
const TargetProperties& target);

void MiirGenLaunchParams(const std::string& params, size_t& local_size, size_t& global_size);

void MiirGenBin(const std::string& params, std::vector<char>& buffer);
atamazov marked this conversation as resolved.
Show resolved Hide resolved
} // namespace miopen

#endif // MIOPEN_USE_MLIR
Expand Down
7 changes: 7 additions & 0 deletions src/include/miopen/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,13 @@ struct ConvHipImplicitGemmMlirCppFwd : SolverBase<ConvolutionContext>
ConvSolution GetSolution(const ConvolutionContext& ctx) const;
};

struct ConvHipImplicitGemmMlirBinFwd : SolverBase<ConvolutionContext>
{
static std::tuple<int, int, int> CalculateGemmSize(const ConvolutionContext& ctx);
bool IsApplicable(const ConvolutionContext& ctx) const;
ConvSolution GetSolution(const ConvolutionContext& ctx) const;
};

struct PerformanceImplicitGemmV4R4GenXdlopsFwdFp32
: Serializable<PerformanceImplicitGemmV4R4GenXdlopsFwdFp32>
{
Expand Down
14 changes: 13 additions & 1 deletion src/mlir_build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,23 @@ boost::filesystem::path MiirBuildViaHip(boost::optional<TmpDir>& tmp_dir,
void MiirGenLaunchParams(const std::string& params, size_t& local_size, size_t& global_size)
{
AutoMiirHandle handle(params);
miirLowerInit();
auto status = miirLowerTuningParams(handle());
check_miir_error(status, "miirLowerTuningParams");
miirGetExecutionDims(handle(), &global_size, &local_size);
check_miir_error(status, "miirGetExecutionDims");
}

void MiirGenBin(const std::string& params, std::vector<char>& buffer)
{
AutoMiirHandle handle(params);
atamazov marked this conversation as resolved.
Show resolved Hide resolved
miirLowerBin(handle());

size_t size = 0;
auto status = miirBufferGet(handle(), nullptr, &size);
check_miir_error(status, "miirLowerTuningParams");
atamazov marked this conversation as resolved.
Show resolved Hide resolved
buffer.resize(size);
status = miirBufferGet(handle(), buffer.data(), &size);
check_miir_error(status, "miirBufferGet");
}

} // namespace miopen
1 change: 1 addition & 0 deletions src/mlo_dir_conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ static auto GetImplicitGemmSolvers()
miopen::solver::ConvHipImplicitGemmV4R1Fwd,
miopen::solver::ConvHipImplicitGemmV4R4Fwd,
miopen::solver::ConvHipImplicitGemmMlirCppFwd,
miopen::solver::ConvHipImplicitGemmMlirBinFwd,
miopen::solver::ConvHipImplicitGemmMlirCppBwd,
miopen::solver::ConvHipImplicitGemmBwdDataV1R1,
miopen::solver::ConvHipImplicitGemmBwdDataV4R1,
Expand Down
Loading