diff --git a/Dockerfile b/Dockerfile index ed17dd92d8..fdd8d15ab8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -97,7 +97,7 @@ RUN if [ "$USE_TARGETID" = "ON" ] ; then export HIPCC_LINK_FLAGS_APPEND='-O3 -pa RUN if [ "$USE_TARGETID" = "OFF" ] ; then echo "MIOpenTensile is not installed."; elif [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@be26d30d3d7509a414134a45f4a6d49e5da250b8; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@4bfe00a8de61d12862d9fa803b8ea9a981a50f97; fi RUN cd ~ && \ - export MLIR_COMMIT=31d92f4c64ae6fa6b7c5d543f68b69300b4513ce && \ + export MLIR_COMMIT=331e8782b1b4a107cfb1f93bb90ae59bd95a6dad && \ wget https://github.com/ROCmSoftwarePlatform/llvm-project-mlir/archive/$MLIR_COMMIT.tar.gz && \ tar -xvzf $MLIR_COMMIT.tar.gz && \ rm -rf $MLIR_COMMIT.tar.gz && \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f81521900b..f20750c5e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -196,6 +196,7 @@ set( MIOpen_Source conv/invokers/gen_x_w_y_pad.cpp conv/invokers/ocl_wrw_rdc.cpp conv/invokers/impl_gemm.cpp + conv/invokers/mlir_impl_gemm.cpp conv/invokers/impl_gemm_dynamic.cpp invoker_cache.cpp tensor.cpp @@ -232,6 +233,7 @@ set( MIOpen_Source solver/conv_hip_implicit_gemm_mlir_cpp_fwd.cpp solver/conv_hip_implicit_gemm_mlir_cpp_bwd.cpp solver/conv_hip_implicit_gemm_mlir_cpp_wrw.cpp + solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp solver/conv_hip_implicit_gemm_wrw_v4r4.cpp solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp solver/conv_hip_implicit_gemm_xdlops_common.cpp diff --git a/src/conv/invokers/mlir_impl_gemm.cpp b/src/conv/invokers/mlir_impl_gemm.cpp new file mode 100644 index 0000000000..060ba5261a --- /dev/null +++ b/src/conv/invokers/mlir_impl_gemm.cpp @@ -0,0 +1,170 @@ +/******************************************************************************* +* +* MIT License +* +* Copyright (c) 2021 Advanced Micro Devices, Inc. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in all +* copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +*******************************************************************************/ + +#include +#include + +#include +#include +#include +#include + +#include + +namespace miopen { +namespace conv { + +namespace { +using MemRef4DGeneric = StridedMemRefType; + +struct MlirConvArgs +{ + MemRef4DGeneric filter; + MemRef4DGeneric input; + MemRef4DGeneric output; +}; + +// Rearrange strides correctly +// In MLIR: the layout, sizes and strides are coherent. The layout information is not +// embedded into the permutation of strides. +// - For NCHW, sizes = {N, C, H, W}; strides = {C*H*W, H*W, W, 1} +// - For NHWC, sizes = {N, H, W, C}; strides = {C*H*W, W*C, C, 1} + +// In MIOpen however, size and strides are not aligned. Permutation of the strides are used to +// infer actual layout +// - For NCHW, sizes = {N, C, H, W}; strides = {C*H*W, H*W, W, 1} +// - For NHWC, sizes = {N, C, H, W}; strides = {C*H*W, 1, W*C, C} +auto permuteDimsStrides(const std::vector& dims, const std::vector& strides) +{ + auto sorted_dims = dims; + auto sorted_strides = strides; + auto p = TensorDescriptor::find_permutation(dims, strides); + std::transform(p.begin(), p.end(), sorted_dims.begin(), [&](auto i) { return dims[i]; }); + std::transform(p.begin(), p.end(), sorted_strides.begin(), [&](auto i) { return strides[i]; }); + return std::make_tuple(sorted_dims, sorted_strides); +}; + +void permuteDimStridesAllDir(const conv::ProblemDescription& conv_problem, + std::vector& in_dims, + std::vector& in_strides, + std::vector& weights_dims, + std::vector& weights_strides, + std::vector& out_dims, + std::vector& out_strides) +{ + const TensorDescriptor& in = conv_problem.GetIn(); + std::make_tuple(in_dims, in_strides) = permuteDimsStrides(in.GetLengths(), in.GetStrides()); + + const TensorDescriptor& weights = conv_problem.GetWeights(); + std::make_tuple(weights_dims, weights_strides) = + permuteDimsStrides(weights.GetLengths(), weights.GetStrides()); + + const TensorDescriptor& out = conv_problem.GetOut(); + std::make_tuple(out_dims, out_strides) = permuteDimsStrides(out.GetLengths(), out.GetStrides()); +} + +MlirConvArgs makeMlirConvArgs(const std::vector& in_dims, + const std::vector& in_strides, + const std::vector& weights_dims, + const std::vector& weights_strides, + const std::vector& out_dims, + const std::vector& out_strides) +{ + auto initDimStrides = [](const std::vector& dims, + const std::vector& strides, + MemRef4DGeneric& target) { + std::copy(dims.cbegin(), dims.cend(), &target.sizes[0]); + std::copy(strides.cbegin(), strides.cend(), &target.strides[0]); + }; + + MemRef4DGeneric filter{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}}; + initDimStrides(weights_dims, weights_strides, filter); + MemRef4DGeneric input{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}}; + initDimStrides(in_dims, in_strides, input); + MemRef4DGeneric output{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}}; + initDimStrides(out_dims, out_strides, output); + + return {filter, input, output}; +} +} // Anonymous namespace + +InvokerFactory MakeMlirFwdInvokerFactory(const ConvolutionContext& ctx) +{ + assert((ctx.direction.IsForward())); + + std::vector in_dims, in_strides; + std::vector weights_dims, weights_strides; + std::vector out_dims, out_strides; + permuteDimStridesAllDir(ctx.conv_problem, + in_dims, + in_strides, + weights_dims, + weights_strides, + out_dims, + out_strides); + + MlirConvArgs args = + makeMlirConvArgs(in_dims, in_strides, weights_dims, weights_strides, out_dims, out_strides); + + return [=](const std::vector& kernels) mutable { + return [=](const Handle& handle, const AnyInvokeParams& primitive_parameters) mutable { + const auto& forward_invoke_params = + primitive_parameters.CastTo(); + const auto& tensors = forward_invoke_params.tensors; + + void* filter = nullptr; + void* input = nullptr; + void* output = nullptr; +#if MIOPEN_BACKEND_OPENCL + clGetMemObjectInfo(tensors.w, CL_MEM_HOST_PTR, sizeof(filter), &filter, nullptr); + clGetMemObjectInfo(tensors.in, CL_MEM_HOST_PTR, sizeof(input), &input, nullptr); + clGetMemObjectInfo(tensors.out, CL_MEM_HOST_PTR, sizeof(output), &output, nullptr); +#elif MIOPEN_BACKEND_HIP + // NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast) + filter = const_cast(tensors.w); + // NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast) + input = const_cast(tensors.in); + // NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast) + output = const_cast(tensors.out); +#endif + + if((filter == nullptr) || (input == nullptr) || (output == nullptr)) + MIOPEN_THROW("Invalid device pointers"); + + args.filter.basePtr = filter; + args.filter.data = filter; + args.input.basePtr = input; + args.input.data = input; + args.output.basePtr = output; + args.output.data = output; + + handle.Run(kernels[0])(args); + }; + }; +} + +} // namespace conv +} // namespace miopen diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp index ffd7b3951e..5c2a05a555 100644 --- a/src/hip/handlehip.cpp +++ b/src/hip/handlehip.cpp @@ -391,7 +391,7 @@ Program Handle::LoadProgram(const std::string& program_name, { this->impl->set_ctx(); - if(!miopen::EndsWith(program_name, ".mlir-cpp")) + if((!miopen::EndsWith(program_name, ".mlir-cpp")) && (!miopen::EndsWith(program_name, ".mlir"))) { params += " -mcpu=" + this->GetTargetProperties().Name(); } diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index f5f4b52eee..45eeb9027a 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -248,6 +248,12 @@ void HIPOCProgramImpl::BuildCodeObjectInFile(std::string& params, { hsaco_file = MiirBuildViaHip(dir, filename, src, params, target); } + else if(miopen::EndsWith(filename, ".mlir")) + { + std::vector buffer; + MiirGenBin(params, buffer); + WriteFile(buffer, hsaco_file); + } #endif else { @@ -297,6 +303,8 @@ void HIPOCProgramImpl::BuildCodeObject(std::string params, std::string filename = is_kernel_str ? "tinygemm.cl" // Fixed name for miopengemm. : program; const auto src = [&]() -> std::string { + if(miopen::EndsWith(filename, ".mlir")) + return {}; // MLIR solutions do not use source code. if(miopen::EndsWith(filename, ".mlir-cpp")) return {}; // MLIR solutions do not use source code. if(!kernel_src.empty()) diff --git a/src/include/miopen/conv/invokers/mlir_impl_gemm.hpp b/src/include/miopen/conv/invokers/mlir_impl_gemm.hpp new file mode 100644 index 0000000000..fb3fa58dfb --- /dev/null +++ b/src/include/miopen/conv/invokers/mlir_impl_gemm.hpp @@ -0,0 +1,38 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +namespace miopen { +namespace conv { + +InvokerFactory MakeMlirFwdInvokerFactory(const ConvolutionContext& ctx); + +} // namespace conv +} // namespace miopen diff --git a/src/include/miopen/memref.hpp b/src/include/miopen/memref.hpp new file mode 100644 index 0000000000..093d654ba4 --- /dev/null +++ b/src/include/miopen/memref.hpp @@ -0,0 +1,82 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +namespace miopen { + +// clang-format off +template +void dropFront(int64_t arr[N], int64_t* res) +{ + for (unsigned i = 1; i < N; ++i) + *(res + i - 1) = arr[i]; +} + +//===----------------------------------------------------------------------===// +// Codegen-compatible structures for StridedMemRef type. +//===----------------------------------------------------------------------===// +/// StridedMemRef descriptor type with static rank. +template +struct StridedMemRefType { + T* basePtr; + T* data; + int64_t offset; + int64_t sizes[N]; + int64_t strides[N]; + // This operator[] is extremely slow and only for sugaring purposes. + StridedMemRefType operator[](int64_t idx) { + StridedMemRefType res; + res.basePtr = basePtr; + res.data = data; + res.offset = offset + idx * strides[0]; + dropFront(sizes, res.sizes); + dropFront(strides, res.strides); + return res; + } +}; + +/// StridedMemRef descriptor type specialized for rank 1. +template +struct StridedMemRefType { + T* basePtr; + T* data; + int64_t offset; + int64_t sizes[1]; + int64_t strides[1]; + T& operator[](int64_t idx) { return *(data + offset + idx * strides[0]); } +}; + +/// StridedMemRef descriptor type specialized for rank 0. +template +struct StridedMemRefType { + T* basePtr; + T* data; + int64_t offset; +}; +// clang-format on + +} // namespace miopen diff --git a/src/include/miopen/mlir_build.hpp b/src/include/miopen/mlir_build.hpp index 42522f057a..29ba0be1fe 100644 --- a/src/include/miopen/mlir_build.hpp +++ b/src/include/miopen/mlir_build.hpp @@ -44,6 +44,8 @@ boost::filesystem::path MiirBuildViaHip(boost::optional& tmp_dir, const TargetProperties& target); void MiirGenLaunchParams(const std::string& params, size_t& local_size, size_t& global_size); + +void MiirGenBin(const std::string& params, std::vector& buffer); } // namespace miopen #endif // MIOPEN_USE_MLIR diff --git a/src/include/miopen/solver.hpp b/src/include/miopen/solver.hpp index 44c97807f8..830d4614c1 100644 --- a/src/include/miopen/solver.hpp +++ b/src/include/miopen/solver.hpp @@ -796,6 +796,12 @@ struct ConvHipImplicitGemmMlirCppFwd : SolverBase ConvSolution GetSolution(const ConvolutionContext& ctx) const; }; +struct ConvHipImplicitGemmMlirBinFwd : SolverBase +{ + bool IsApplicable(const ConvolutionContext& ctx) const; + ConvSolution GetSolution(const ConvolutionContext& ctx) const; +}; + struct PerformanceImplicitGemmV4R4GenXdlopsFwdFp32 : Serializable { diff --git a/src/mlir_build.cpp b/src/mlir_build.cpp index 8cab823c6e..668c86df63 100644 --- a/src/mlir_build.cpp +++ b/src/mlir_build.cpp @@ -32,6 +32,7 @@ #include #include +#include namespace miopen { // Anonymous namespace @@ -43,6 +44,9 @@ class AutoMiirHandle public: AutoMiirHandle(const std::string& options) : handle(miirCreateHandle(options.c_str())) {} + // Explicitly disable copy and assignment of the handle to avoid double-free risk + AutoMiirHandle(const AutoMiirHandle&) = delete; + void operator=(const AutoMiirHandle&) = delete; ~AutoMiirHandle() { miirDestroyHandle(handle); } MiirHandle operator()() { return handle; } }; @@ -138,11 +142,23 @@ boost::filesystem::path MiirBuildViaHip(boost::optional& tmp_dir, void MiirGenLaunchParams(const std::string& params, size_t& local_size, size_t& global_size) { AutoMiirHandle handle(params); - miirLowerInit(); auto status = miirLowerTuningParams(handle()); check_miir_error(status, "miirLowerTuningParams"); miirGetExecutionDims(handle(), &global_size, &local_size); check_miir_error(status, "miirGetExecutionDims"); } +void MiirGenBin(const std::string& params, std::vector& buffer) +{ + AutoMiirHandle handle(params); + miirLowerBin(handle()); + + size_t size = 0; + auto status = miirBufferGet(handle(), nullptr, &size); + check_miir_error(status, "miirBufferGet"); + buffer.resize(size); + status = miirBufferGet(handle(), buffer.data(), &size); + check_miir_error(status, "miirBufferGet"); +} + } // namespace miopen diff --git a/src/mlo_dir_conv.cpp b/src/mlo_dir_conv.cpp index ffe6de37de..ce8bd11e7e 100644 --- a/src/mlo_dir_conv.cpp +++ b/src/mlo_dir_conv.cpp @@ -157,6 +157,7 @@ static auto GetImplicitGemmSolvers() miopen::solver::ConvHipImplicitGemmV4R1Fwd, miopen::solver::ConvHipImplicitGemmV4R4Fwd, miopen::solver::ConvHipImplicitGemmMlirCppFwd, + miopen::solver::ConvHipImplicitGemmMlirBinFwd, miopen::solver::ConvHipImplicitGemmMlirCppBwd, miopen::solver::ConvHipImplicitGemmBwdDataV1R1, miopen::solver::ConvHipImplicitGemmBwdDataV4R1, diff --git a/src/nogpu/handle.cpp b/src/nogpu/handle.cpp index 70c058565e..6c46313115 100644 --- a/src/nogpu/handle.cpp +++ b/src/nogpu/handle.cpp @@ -166,7 +166,7 @@ Program Handle::LoadProgram(const std::string& program_name, bool is_kernel_str, const std::string& kernel_src) const { - if(!miopen::EndsWith(program_name, ".mlir-cpp")) + if((!miopen::EndsWith(program_name, ".mlir-cpp")) && (!miopen::EndsWith(program_name, ".mlir"))) { params += " -mcpu=" + this->GetTargetProperties().Name(); } diff --git a/src/solver.cpp b/src/solver.cpp index c08493d4da..d284dbecbc 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -407,6 +407,8 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) RegisterWithSolver(registry, ++id, GemmBwd1x1_stride1{}, miopenConvolutionAlgoGEMM); RegisterWithSolver(registry, ++id, GemmBwdRest{}, miopenConvolutionAlgoGEMM); + RegisterWithSolver( + registry, ++id, ConvHipImplicitGemmMlirBinFwd{}, miopenConvolutionAlgoImplicitGEMM); // IMPORTANT: New solvers should be added to the end of the function! } diff --git a/src/solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp b/src/solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp new file mode 100644 index 0000000000..282a5cab04 --- /dev/null +++ b/src/solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp @@ -0,0 +1,159 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include +#include +#include +#include +#include + +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_HIP_IMPLICIT_GEMM_MLIR_BIN_FWD) + +namespace miopen { +namespace solver { + +namespace { +#if MIOPEN_USE_MLIR +std::tuple CalculateGemmSize(const ConvolutionContext& ctx) +{ + const size_t n = ConvolutionContextInterpreter::GetBatchN(ctx); + const size_t k = ConvolutionContextInterpreter::GetOutputChannelK(ctx); + const size_t c = ConvolutionContextInterpreter::GetInputChannelC(ctx); + const size_t ho = ConvolutionContextInterpreter::GetOutputHeightHo(ctx); + const size_t wo = ConvolutionContextInterpreter::GetOutputWidthWo(ctx); + const size_t y = ConvolutionContextInterpreter::GetFilterHeightY(ctx); + const size_t x = ConvolutionContextInterpreter::GetFilterWidthX(ctx); + + const auto gemm_m = k; + const auto gemm_n = ctx.Is3d() + ? n * ho * wo * ConvolutionContextInterpreter::GetOutputDepthDo(ctx) + : n * ho * wo; + const auto gemm_k = + ctx.Is3d() ? c * y * x * ConvolutionContextInterpreter::GetFilterDepthZ(ctx) : c * y * x; + + return std::make_tuple(gemm_m, gemm_n, gemm_k); +} +#endif +} // Anonymous namespace + +bool ConvHipImplicitGemmMlirBinFwd::IsApplicable(const ConvolutionContext& ctx) const +{ +#if MIOPEN_USE_MLIR + if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_HIP_IMPLICIT_GEMM_MLIR_BIN_FWD{})) + return false; + // Future: MLIR will support non-default layouts. + if(!ctx.IsLayoutDefault()) + return false; + // Future: MLIR will support 3d convolution + if(!ctx.Is2d()) + return false; + if(!IsComposableKernelSupportedHardware(ctx)) + return false; + if(!ctx.direction.IsForward()) + return false; + if(!ctx.IsFp32()) + return false; + if(ctx.group_counts != 1) + return false; + + int gemm_m = 0; + int gemm_n = 0; + int gemm_k = 0; + + std::tie(gemm_m, gemm_n, gemm_k) = CalculateGemmSize(ctx); + return gemm_m % 32 == 0 && gemm_n % 32 == 0 && gemm_k % 4 == 0; +#else + std::ignore = ctx; + return false; +#endif +} + +ConvSolution ConvHipImplicitGemmMlirBinFwd::GetSolution(const ConvolutionContext& ctx) const +{ +#if MIOPEN_USE_MLIR + ConvSolution result; + KernelInfo construction_parameters; + + std::string version = "_v4r4"; + std::string direction = "_fwd"; + std::string operation = "conv2d"; + + construction_parameters.kernel_name = "mlir_gen_igemm_conv2d" + version + direction; + construction_parameters.kernel_file = construction_parameters.kernel_name + ".mlir"; + + // Arguments for mlir-miopen-driver. + // clang-format off + using CI = ConvolutionContextInterpreter; + construction_parameters.comp_options = + std::string(" --operation ") + operation + + std::string(" --num_cu ") + std::to_string(ctx.GetStream().GetMaxComputeUnits()) + + std::string(" --arch ") + ctx.GetStream().GetDeviceName() + + std::string(" --fil_layout ") + CI::GetFilterLayout(ctx) + + std::string(" --fil_type ") + "fp32" + + std::string(" --in_layout ") + CI::GetInputLayout(ctx) + + std::string(" --in_type ") + "fp32" + + std::string(" --out_layout ") + CI::GetOutputLayout(ctx) + + std::string(" --out_type ") + "fp32" + + std::string(" --batchsize ") + std::to_string(CI::GetBatchN(ctx)) + + std::string(" --in_channels ") + std::to_string(CI::GetInputChannelC(ctx)) + + std::string(" --out_channels ") + std::to_string(CI::GetOutputChannelK(ctx)) + + std::string(" --in_h ") + std::to_string(CI::GetInputHeightHi(ctx)) + + std::string(" --in_w ") + std::to_string(CI::GetInputWidthWi(ctx)) + + std::string(" --out_h ") + std::to_string(CI::GetOutputHeightHo(ctx)) + + std::string(" --out_w ") + std::to_string(CI::GetOutputWidthWo(ctx)) + + std::string(" --fil_h ") + std::to_string(CI::GetFilterHeightY(ctx)) + + std::string(" --fil_w ") + std::to_string(CI::GetFilterWidthX(ctx)) + + std::string(" --dilation_h ") + std::to_string(CI::GetAdjustedConvolutionDilationH(ctx)) + + std::string(" --dilation_w ") + std::to_string(CI::GetAdjustedConvolutionDilationW(ctx)) + + std::string(" --conv_stride_h ") + std::to_string(CI::GetAdjustedConvolutionStrideH(ctx)) + + std::string(" --conv_stride_w ") + std::to_string(CI::GetAdjustedConvolutionStrideW(ctx)) + + std::string(" --padding_h ") + std::to_string(CI::GetInputLeftPadH(ctx)) + + std::string(" --padding_w ") + std::to_string(CI::GetInputLeftPadW(ctx)) + + std::string(" --kernel_name ") + construction_parameters.kernel_name; + // clang-format on + + size_t local_size = 0; + size_t global_size = 0; + MiirGenLaunchParams(construction_parameters.comp_options, local_size, global_size); + + construction_parameters.l_wk.push_back(local_size); + construction_parameters.l_wk.push_back(1); + construction_parameters.l_wk.push_back(1); + + construction_parameters.g_wk.push_back(global_size); + construction_parameters.g_wk.push_back(1); + construction_parameters.g_wk.push_back(1); + + result.invoker_factory = conv::MakeMlirFwdInvokerFactory(ctx); + result.construction_params.push_back(construction_parameters); + return result; +#else + std::ignore = ctx; + return {}; +#endif +} + +} // namespace solver +} // namespace miopen diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2206cdcbc0..efcd8255aa 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -314,7 +314,8 @@ endif() if(MIOPEN_TEST_MLIR) set(IMPLICITGEMM_MLIR_ENV_BASE MIOPEN_FIND_MODE=normal) - set(IMPLICITGEMM_MLIR_ENV_F ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppFwd) + set(IMPLICITGEMM_MLIR_ENV_F_CPP ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppFwd) + set(IMPLICITGEMM_MLIR_ENV_F_BIN ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirBinFwd) set(IMPLICITGEMM_MLIR_ENV_B ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppBwd) set(IMPLICITGEMM_MLIR_ENV_W ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppWrW) @@ -323,9 +324,11 @@ if(MIOPEN_TEST_MLIR) set(IMPLICITGEMM_MLIR_ARGS_W ${IMPLICITGEMM_ARGS} --verbose --disable-forward --disable-backward-data) add_custom_test(test_conv_igemm_mlir ALLOW_NONXDLOPS - COMMAND ${IMPLICITGEMM_MLIR_ENV_F} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1 - COMMAND ${IMPLICITGEMM_MLIR_ENV_F} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 256 56 56 --weights 256 256 1 1 --pads_strides_dilations 0 0 1 1 1 1 - COMMAND ${IMPLICITGEMM_MLIR_ENV_F} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 128 58 58 --weights 128 128 3 3 --pads_strides_dilations 0 0 1 1 1 1 + COMMAND ${IMPLICITGEMM_MLIR_ENV_F_CPP} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1 + COMMAND ${IMPLICITGEMM_MLIR_ENV_F_CPP} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 256 56 56 --weights 256 256 1 1 --pads_strides_dilations 0 0 1 1 1 1 + COMMAND ${IMPLICITGEMM_MLIR_ENV_F_CPP} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 128 58 58 --weights 128 128 3 3 --pads_strides_dilations 0 0 1 1 1 1 + COMMAND ${IMPLICITGEMM_MLIR_ENV_F_BIN} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 128 58 58 --weights 128 128 3 3 --pads_strides_dilations 0 0 1 1 1 1 + COMMAND ${IMPLICITGEMM_MLIR_ENV_F_BIN} $ ${IMPLICITGEMM_MLIR_ARGS_F} --input 128 8 32 32 --weights 128 8 3 3 --pads_strides_dilations 0 0 1 1 1 1 COMMAND ${IMPLICITGEMM_MLIR_ENV_B} $ ${IMPLICITGEMM_MLIR_ARGS_B} --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1 COMMAND ${IMPLICITGEMM_MLIR_ENV_B} $ ${IMPLICITGEMM_MLIR_ARGS_B} --input 64 256 56 56 --weights 256 256 1 1 --pads_strides_dilations 0 0 1 1 1 1