diff --git a/Dockerfile b/Dockerfile
index ed17dd92d8..fdd8d15ab8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -97,7 +97,7 @@ RUN if [ "$USE_TARGETID" = "ON" ] ; then export HIPCC_LINK_FLAGS_APPEND='-O3 -pa
 RUN if [ "$USE_TARGETID" = "OFF" ] ; then echo "MIOpenTensile is not installed."; elif [ "$MIOTENSILE_VER" = "latest" ] ; then cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@be26d30d3d7509a414134a45f4a6d49e5da250b8; else cget -p $PREFIX install ROCmSoftwarePlatform/MIOpenTensile@4bfe00a8de61d12862d9fa803b8ea9a981a50f97; fi
 
 RUN cd ~ && \
-    export MLIR_COMMIT=31d92f4c64ae6fa6b7c5d543f68b69300b4513ce && \
+    export MLIR_COMMIT=331e8782b1b4a107cfb1f93bb90ae59bd95a6dad && \
     wget https://github.com/ROCmSoftwarePlatform/llvm-project-mlir/archive/$MLIR_COMMIT.tar.gz && \
     tar -xvzf $MLIR_COMMIT.tar.gz && \
     rm -rf $MLIR_COMMIT.tar.gz && \
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f81521900b..f20750c5e1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -196,6 +196,7 @@ set( MIOpen_Source
     conv/invokers/gen_x_w_y_pad.cpp
     conv/invokers/ocl_wrw_rdc.cpp
     conv/invokers/impl_gemm.cpp
+    conv/invokers/mlir_impl_gemm.cpp
     conv/invokers/impl_gemm_dynamic.cpp
     invoker_cache.cpp
     tensor.cpp
@@ -232,6 +233,7 @@ set( MIOpen_Source
     solver/conv_hip_implicit_gemm_mlir_cpp_fwd.cpp
     solver/conv_hip_implicit_gemm_mlir_cpp_bwd.cpp
     solver/conv_hip_implicit_gemm_mlir_cpp_wrw.cpp
+    solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp
     solver/conv_hip_implicit_gemm_wrw_v4r4.cpp
     solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp
     solver/conv_hip_implicit_gemm_xdlops_common.cpp
diff --git a/src/conv/invokers/mlir_impl_gemm.cpp b/src/conv/invokers/mlir_impl_gemm.cpp
new file mode 100644
index 0000000000..060ba5261a
--- /dev/null
+++ b/src/conv/invokers/mlir_impl_gemm.cpp
@@ -0,0 +1,170 @@
+/*******************************************************************************
+*
+* MIT License
+*
+* Copyright (c) 2021 Advanced Micro Devices, Inc.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in all
+* copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+*******************************************************************************/
+
+#include <miopen/conv/invokers/mlir_impl_gemm.hpp>
+#include <miopen/memref.hpp>
+
+#include <miopen/conv/data_invoke_params.hpp>
+#include <miopen/algorithm.hpp>
+#include <miopen/handle.hpp>
+#include <miopen/tensor_ops.hpp>
+
+#include <boost/any.hpp>
+
+namespace miopen {
+namespace conv {
+
+namespace {
+using MemRef4DGeneric = StridedMemRefType<void, 4>;
+
+struct MlirConvArgs
+{
+    MemRef4DGeneric filter;
+    MemRef4DGeneric input;
+    MemRef4DGeneric output;
+};
+
+// Rearrange strides correctly
+// In MLIR: the layout, sizes and strides are coherent. The layout information is not
+// embedded into the permutation of strides.
+// - For NCHW, sizes = {N, C, H, W}; strides = {C*H*W, H*W, W, 1}
+// - For NHWC, sizes = {N, H, W, C}; strides = {C*H*W, W*C, C, 1}
+
+// In MIOpen however, size and strides are not aligned. Permutation of the strides are used to
+// infer actual layout
+// - For NCHW, sizes = {N, C, H, W}; strides = {C*H*W, H*W, W, 1}
+// - For NHWC, sizes = {N, C, H, W}; strides = {C*H*W, 1, W*C, C}
+auto permuteDimsStrides(const std::vector<size_t>& dims, const std::vector<size_t>& strides)
+{
+    auto sorted_dims    = dims;
+    auto sorted_strides = strides;
+    auto p              = TensorDescriptor::find_permutation(dims, strides);
+    std::transform(p.begin(), p.end(), sorted_dims.begin(), [&](auto i) { return dims[i]; });
+    std::transform(p.begin(), p.end(), sorted_strides.begin(), [&](auto i) { return strides[i]; });
+    return std::make_tuple(sorted_dims, sorted_strides);
+};
+
+void permuteDimStridesAllDir(const conv::ProblemDescription& conv_problem,
+                             std::vector<size_t>& in_dims,
+                             std::vector<size_t>& in_strides,
+                             std::vector<size_t>& weights_dims,
+                             std::vector<size_t>& weights_strides,
+                             std::vector<size_t>& out_dims,
+                             std::vector<size_t>& out_strides)
+{
+    const TensorDescriptor& in = conv_problem.GetIn();
+    std::make_tuple(in_dims, in_strides) = permuteDimsStrides(in.GetLengths(), in.GetStrides());
+
+    const TensorDescriptor& weights = conv_problem.GetWeights();
+    std::make_tuple(weights_dims, weights_strides) =
+        permuteDimsStrides(weights.GetLengths(), weights.GetStrides());
+
+    const TensorDescriptor& out = conv_problem.GetOut();
+    std::make_tuple(out_dims, out_strides) = permuteDimsStrides(out.GetLengths(), out.GetStrides());
+}
+
+MlirConvArgs makeMlirConvArgs(const std::vector<size_t>& in_dims,
+                              const std::vector<size_t>& in_strides,
+                              const std::vector<size_t>& weights_dims,
+                              const std::vector<size_t>& weights_strides,
+                              const std::vector<size_t>& out_dims,
+                              const std::vector<size_t>& out_strides)
+{
+    auto initDimStrides = [](const std::vector<size_t>& dims,
+                             const std::vector<size_t>& strides,
+                             MemRef4DGeneric& target) {
+        std::copy(dims.cbegin(), dims.cend(), &target.sizes[0]);
+        std::copy(strides.cbegin(), strides.cend(), &target.strides[0]);
+    };
+
+    MemRef4DGeneric filter{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}};
+    initDimStrides(weights_dims, weights_strides, filter);
+    MemRef4DGeneric input{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}};
+    initDimStrides(in_dims, in_strides, input);
+    MemRef4DGeneric output{nullptr, nullptr, 0, {0, 0, 0, 0}, {0, 0, 0, 0}};
+    initDimStrides(out_dims, out_strides, output);
+
+    return {filter, input, output};
+}
+} // Anonymous namespace
+
+InvokerFactory MakeMlirFwdInvokerFactory(const ConvolutionContext& ctx)
+{
+    assert((ctx.direction.IsForward()));
+
+    std::vector<size_t> in_dims, in_strides;
+    std::vector<size_t> weights_dims, weights_strides;
+    std::vector<size_t> out_dims, out_strides;
+    permuteDimStridesAllDir(ctx.conv_problem,
+                            in_dims,
+                            in_strides,
+                            weights_dims,
+                            weights_strides,
+                            out_dims,
+                            out_strides);
+
+    MlirConvArgs args =
+        makeMlirConvArgs(in_dims, in_strides, weights_dims, weights_strides, out_dims, out_strides);
+
+    return [=](const std::vector<Kernel>& kernels) mutable {
+        return [=](const Handle& handle, const AnyInvokeParams& primitive_parameters) mutable {
+            const auto& forward_invoke_params =
+                primitive_parameters.CastTo<conv::DataInvokeParams>();
+            const auto& tensors = forward_invoke_params.tensors;
+
+            void* filter = nullptr;
+            void* input  = nullptr;
+            void* output = nullptr;
+#if MIOPEN_BACKEND_OPENCL
+            clGetMemObjectInfo(tensors.w, CL_MEM_HOST_PTR, sizeof(filter), &filter, nullptr);
+            clGetMemObjectInfo(tensors.in, CL_MEM_HOST_PTR, sizeof(input), &input, nullptr);
+            clGetMemObjectInfo(tensors.out, CL_MEM_HOST_PTR, sizeof(output), &output, nullptr);
+#elif MIOPEN_BACKEND_HIP
+            // NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast)
+            filter = const_cast<void*>(tensors.w);
+            // NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast)
+            input = const_cast<void*>(tensors.in);
+            // NOLINTNEXTLINE (cppcoreguidelines-pro-type-const-cast)
+            output = const_cast<void*>(tensors.out);
+#endif
+
+            if((filter == nullptr) || (input == nullptr) || (output == nullptr))
+                MIOPEN_THROW("Invalid device pointers");
+
+            args.filter.basePtr = filter;
+            args.filter.data    = filter;
+            args.input.basePtr  = input;
+            args.input.data     = input;
+            args.output.basePtr = output;
+            args.output.data    = output;
+
+            handle.Run(kernels[0])(args);
+        };
+    };
+}
+
+} // namespace conv
+} // namespace miopen
diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp
index ffd7b3951e..5c2a05a555 100644
--- a/src/hip/handlehip.cpp
+++ b/src/hip/handlehip.cpp
@@ -391,7 +391,7 @@ Program Handle::LoadProgram(const std::string& program_name,
 {
     this->impl->set_ctx();
 
-    if(!miopen::EndsWith(program_name, ".mlir-cpp"))
+    if((!miopen::EndsWith(program_name, ".mlir-cpp")) && (!miopen::EndsWith(program_name, ".mlir")))
     {
         params += " -mcpu=" + this->GetTargetProperties().Name();
     }
diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp
index f5f4b52eee..45eeb9027a 100644
--- a/src/hipoc/hipoc_program.cpp
+++ b/src/hipoc/hipoc_program.cpp
@@ -248,6 +248,12 @@ void HIPOCProgramImpl::BuildCodeObjectInFile(std::string& params,
     {
         hsaco_file = MiirBuildViaHip(dir, filename, src, params, target);
     }
+    else if(miopen::EndsWith(filename, ".mlir"))
+    {
+        std::vector<char> buffer;
+        MiirGenBin(params, buffer);
+        WriteFile(buffer, hsaco_file);
+    }
 #endif
     else
     {
@@ -297,6 +303,8 @@ void HIPOCProgramImpl::BuildCodeObject(std::string params,
     std::string filename = is_kernel_str ? "tinygemm.cl" // Fixed name for miopengemm.
                                          : program;
     const auto src = [&]() -> std::string {
+        if(miopen::EndsWith(filename, ".mlir"))
+            return {}; // MLIR solutions do not use source code.
         if(miopen::EndsWith(filename, ".mlir-cpp"))
             return {}; // MLIR solutions do not use source code.
         if(!kernel_src.empty())
diff --git a/src/include/miopen/conv/invokers/mlir_impl_gemm.hpp b/src/include/miopen/conv/invokers/mlir_impl_gemm.hpp
new file mode 100644
index 0000000000..fb3fa58dfb
--- /dev/null
+++ b/src/include/miopen/conv/invokers/mlir_impl_gemm.hpp
@@ -0,0 +1,38 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/invoker.hpp>
+#include <miopen/conv/context.hpp>
+
+namespace miopen {
+namespace conv {
+
+InvokerFactory MakeMlirFwdInvokerFactory(const ConvolutionContext& ctx);
+
+} // namespace conv
+} // namespace miopen
diff --git a/src/include/miopen/memref.hpp b/src/include/miopen/memref.hpp
new file mode 100644
index 0000000000..093d654ba4
--- /dev/null
+++ b/src/include/miopen/memref.hpp
@@ -0,0 +1,82 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+namespace miopen {
+
+// clang-format off
+template <int N>
+void dropFront(int64_t arr[N], int64_t* res)
+{
+  for (unsigned i = 1; i < N; ++i)
+    *(res + i - 1) = arr[i];
+}
+
+//===----------------------------------------------------------------------===//
+// Codegen-compatible structures for StridedMemRef type.
+//===----------------------------------------------------------------------===//
+/// StridedMemRef descriptor type with static rank.
+template <typename T, int N>
+struct StridedMemRefType {
+  T* basePtr;
+  T* data;
+  int64_t offset;
+  int64_t sizes[N];
+  int64_t strides[N];
+  // This operator[] is extremely slow and only for sugaring purposes.
+  StridedMemRefType<T, N - 1> operator[](int64_t idx) {
+    StridedMemRefType<T, N - 1> res;
+    res.basePtr = basePtr;
+    res.data = data;
+    res.offset = offset + idx * strides[0];
+    dropFront<N>(sizes, res.sizes);
+    dropFront<N>(strides, res.strides);
+    return res;
+  }
+};
+
+/// StridedMemRef descriptor type specialized for rank 1.
+template <typename T>
+struct StridedMemRefType<T, 1> {
+  T* basePtr;
+  T* data;
+  int64_t offset;
+  int64_t sizes[1];
+  int64_t strides[1];
+  T& operator[](int64_t idx) { return *(data + offset + idx * strides[0]); }
+};
+
+/// StridedMemRef descriptor type specialized for rank 0.
+template <typename T>
+struct StridedMemRefType<T, 0> {
+  T* basePtr;
+  T* data;
+  int64_t offset;
+};
+// clang-format on
+
+} // namespace miopen
diff --git a/src/include/miopen/mlir_build.hpp b/src/include/miopen/mlir_build.hpp
index 42522f057a..29ba0be1fe 100644
--- a/src/include/miopen/mlir_build.hpp
+++ b/src/include/miopen/mlir_build.hpp
@@ -44,6 +44,8 @@ boost::filesystem::path MiirBuildViaHip(boost::optional<TmpDir>& tmp_dir,
                                         const TargetProperties& target);
 
 void MiirGenLaunchParams(const std::string& params, size_t& local_size, size_t& global_size);
+
+void MiirGenBin(const std::string& params, std::vector<char>& buffer);
 } // namespace miopen
 
 #endif // MIOPEN_USE_MLIR
diff --git a/src/include/miopen/solver.hpp b/src/include/miopen/solver.hpp
index 44c97807f8..830d4614c1 100644
--- a/src/include/miopen/solver.hpp
+++ b/src/include/miopen/solver.hpp
@@ -796,6 +796,12 @@ struct ConvHipImplicitGemmMlirCppFwd : SolverBase<ConvolutionContext>
     ConvSolution GetSolution(const ConvolutionContext& ctx) const;
 };
 
+struct ConvHipImplicitGemmMlirBinFwd : SolverBase<ConvolutionContext>
+{
+    bool IsApplicable(const ConvolutionContext& ctx) const;
+    ConvSolution GetSolution(const ConvolutionContext& ctx) const;
+};
+
 struct PerformanceImplicitGemmV4R4GenXdlopsFwdFp32
     : Serializable<PerformanceImplicitGemmV4R4GenXdlopsFwdFp32>
 {
diff --git a/src/mlir_build.cpp b/src/mlir_build.cpp
index 8cab823c6e..668c86df63 100644
--- a/src/mlir_build.cpp
+++ b/src/mlir_build.cpp
@@ -32,6 +32,7 @@
 #include <Miir.h>
 
 #include <fstream>
+#include <vector>
 
 namespace miopen {
 // Anonymous namespace
@@ -43,6 +44,9 @@ class AutoMiirHandle
 
     public:
     AutoMiirHandle(const std::string& options) : handle(miirCreateHandle(options.c_str())) {}
+    // Explicitly disable copy and assignment of the handle to avoid double-free risk
+    AutoMiirHandle(const AutoMiirHandle&) = delete;
+    void operator=(const AutoMiirHandle&) = delete;
     ~AutoMiirHandle() { miirDestroyHandle(handle); }
     MiirHandle operator()() { return handle; }
 };
@@ -138,11 +142,23 @@ boost::filesystem::path MiirBuildViaHip(boost::optional<TmpDir>& tmp_dir,
 void MiirGenLaunchParams(const std::string& params, size_t& local_size, size_t& global_size)
 {
     AutoMiirHandle handle(params);
-    miirLowerInit();
     auto status = miirLowerTuningParams(handle());
     check_miir_error(status, "miirLowerTuningParams");
     miirGetExecutionDims(handle(), &global_size, &local_size);
     check_miir_error(status, "miirGetExecutionDims");
 }
 
+void MiirGenBin(const std::string& params, std::vector<char>& buffer)
+{
+    AutoMiirHandle handle(params);
+    miirLowerBin(handle());
+
+    size_t size = 0;
+    auto status = miirBufferGet(handle(), nullptr, &size);
+    check_miir_error(status, "miirBufferGet");
+    buffer.resize(size);
+    status = miirBufferGet(handle(), buffer.data(), &size);
+    check_miir_error(status, "miirBufferGet");
+}
+
 } // namespace miopen
diff --git a/src/mlo_dir_conv.cpp b/src/mlo_dir_conv.cpp
index ffe6de37de..ce8bd11e7e 100644
--- a/src/mlo_dir_conv.cpp
+++ b/src/mlo_dir_conv.cpp
@@ -157,6 +157,7 @@ static auto GetImplicitGemmSolvers()
         miopen::solver::ConvHipImplicitGemmV4R1Fwd,
         miopen::solver::ConvHipImplicitGemmV4R4Fwd,
         miopen::solver::ConvHipImplicitGemmMlirCppFwd,
+        miopen::solver::ConvHipImplicitGemmMlirBinFwd,
         miopen::solver::ConvHipImplicitGemmMlirCppBwd,
         miopen::solver::ConvHipImplicitGemmBwdDataV1R1,
         miopen::solver::ConvHipImplicitGemmBwdDataV4R1,
diff --git a/src/nogpu/handle.cpp b/src/nogpu/handle.cpp
index 70c058565e..6c46313115 100644
--- a/src/nogpu/handle.cpp
+++ b/src/nogpu/handle.cpp
@@ -166,7 +166,7 @@ Program Handle::LoadProgram(const std::string& program_name,
                             bool is_kernel_str,
                             const std::string& kernel_src) const
 {
-    if(!miopen::EndsWith(program_name, ".mlir-cpp"))
+    if((!miopen::EndsWith(program_name, ".mlir-cpp")) && (!miopen::EndsWith(program_name, ".mlir")))
     {
         params += " -mcpu=" + this->GetTargetProperties().Name();
     }
diff --git a/src/solver.cpp b/src/solver.cpp
index c08493d4da..d284dbecbc 100644
--- a/src/solver.cpp
+++ b/src/solver.cpp
@@ -407,6 +407,8 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry)
     RegisterWithSolver(registry, ++id, GemmBwd1x1_stride1{}, miopenConvolutionAlgoGEMM);
     RegisterWithSolver(registry, ++id, GemmBwdRest{}, miopenConvolutionAlgoGEMM);
 
+    RegisterWithSolver(
+        registry, ++id, ConvHipImplicitGemmMlirBinFwd{}, miopenConvolutionAlgoImplicitGEMM);
     // IMPORTANT: New solvers should be added to the end of the function!
 }
 
diff --git a/src/solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp b/src/solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp
new file mode 100644
index 0000000000..282a5cab04
--- /dev/null
+++ b/src/solver/conv_hip_implicit_gemm_mlir_bin_fwd.cpp
@@ -0,0 +1,159 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include <miopen/mlir_build.hpp>
+#include <miopen/conv/invokers/mlir_impl_gemm.hpp>
+#include <miopen/solver.hpp>
+#include <miopen/solver/implicitgemm_util.hpp>
+#include <miopen/handle.hpp>
+
+MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_HIP_IMPLICIT_GEMM_MLIR_BIN_FWD)
+
+namespace miopen {
+namespace solver {
+
+namespace {
+#if MIOPEN_USE_MLIR
+std::tuple<int, int, int> CalculateGemmSize(const ConvolutionContext& ctx)
+{
+    const size_t n  = ConvolutionContextInterpreter::GetBatchN(ctx);
+    const size_t k  = ConvolutionContextInterpreter::GetOutputChannelK(ctx);
+    const size_t c  = ConvolutionContextInterpreter::GetInputChannelC(ctx);
+    const size_t ho = ConvolutionContextInterpreter::GetOutputHeightHo(ctx);
+    const size_t wo = ConvolutionContextInterpreter::GetOutputWidthWo(ctx);
+    const size_t y  = ConvolutionContextInterpreter::GetFilterHeightY(ctx);
+    const size_t x  = ConvolutionContextInterpreter::GetFilterWidthX(ctx);
+
+    const auto gemm_m = k;
+    const auto gemm_n = ctx.Is3d()
+                            ? n * ho * wo * ConvolutionContextInterpreter::GetOutputDepthDo(ctx)
+                            : n * ho * wo;
+    const auto gemm_k =
+        ctx.Is3d() ? c * y * x * ConvolutionContextInterpreter::GetFilterDepthZ(ctx) : c * y * x;
+
+    return std::make_tuple(gemm_m, gemm_n, gemm_k);
+}
+#endif
+} // Anonymous namespace
+
+bool ConvHipImplicitGemmMlirBinFwd::IsApplicable(const ConvolutionContext& ctx) const
+{
+#if MIOPEN_USE_MLIR
+    if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_HIP_IMPLICIT_GEMM_MLIR_BIN_FWD{}))
+        return false;
+    // Future: MLIR will support non-default layouts.
+    if(!ctx.IsLayoutDefault())
+        return false;
+    // Future: MLIR will support 3d convolution
+    if(!ctx.Is2d())
+        return false;
+    if(!IsComposableKernelSupportedHardware(ctx))
+        return false;
+    if(!ctx.direction.IsForward())
+        return false;
+    if(!ctx.IsFp32())
+        return false;
+    if(ctx.group_counts != 1)
+        return false;
+
+    int gemm_m = 0;
+    int gemm_n = 0;
+    int gemm_k = 0;
+
+    std::tie(gemm_m, gemm_n, gemm_k) = CalculateGemmSize(ctx);
+    return gemm_m % 32 == 0 && gemm_n % 32 == 0 && gemm_k % 4 == 0;
+#else
+    std::ignore = ctx;
+    return false;
+#endif
+}
+
+ConvSolution ConvHipImplicitGemmMlirBinFwd::GetSolution(const ConvolutionContext& ctx) const
+{
+#if MIOPEN_USE_MLIR
+    ConvSolution result;
+    KernelInfo construction_parameters;
+
+    std::string version   = "_v4r4";
+    std::string direction = "_fwd";
+    std::string operation = "conv2d";
+
+    construction_parameters.kernel_name = "mlir_gen_igemm_conv2d" + version + direction;
+    construction_parameters.kernel_file = construction_parameters.kernel_name + ".mlir";
+
+    // Arguments for mlir-miopen-driver.
+    // clang-format off
+    using CI = ConvolutionContextInterpreter;
+    construction_parameters.comp_options =
+        std::string(" --operation ") + operation +
+        std::string(" --num_cu ") + std::to_string(ctx.GetStream().GetMaxComputeUnits()) +
+        std::string(" --arch ") + ctx.GetStream().GetDeviceName() +
+        std::string(" --fil_layout ") + CI::GetFilterLayout(ctx) +
+        std::string(" --fil_type ") + "fp32" +
+        std::string(" --in_layout ") + CI::GetInputLayout(ctx) +
+        std::string(" --in_type ") + "fp32" +
+        std::string(" --out_layout ") + CI::GetOutputLayout(ctx) +
+        std::string(" --out_type ") + "fp32" +
+        std::string(" --batchsize ") + std::to_string(CI::GetBatchN(ctx)) +
+        std::string(" --in_channels ") + std::to_string(CI::GetInputChannelC(ctx)) +
+        std::string(" --out_channels ") + std::to_string(CI::GetOutputChannelK(ctx)) +
+        std::string(" --in_h ") + std::to_string(CI::GetInputHeightHi(ctx)) +
+        std::string(" --in_w ") + std::to_string(CI::GetInputWidthWi(ctx)) +
+        std::string(" --out_h ") + std::to_string(CI::GetOutputHeightHo(ctx)) +
+        std::string(" --out_w ") + std::to_string(CI::GetOutputWidthWo(ctx)) +
+        std::string(" --fil_h ") + std::to_string(CI::GetFilterHeightY(ctx)) +
+        std::string(" --fil_w ") + std::to_string(CI::GetFilterWidthX(ctx)) +
+        std::string(" --dilation_h ") + std::to_string(CI::GetAdjustedConvolutionDilationH(ctx)) +
+        std::string(" --dilation_w ") + std::to_string(CI::GetAdjustedConvolutionDilationW(ctx)) +
+        std::string(" --conv_stride_h ") + std::to_string(CI::GetAdjustedConvolutionStrideH(ctx)) +
+        std::string(" --conv_stride_w ") + std::to_string(CI::GetAdjustedConvolutionStrideW(ctx)) +
+        std::string(" --padding_h ") + std::to_string(CI::GetInputLeftPadH(ctx)) +
+        std::string(" --padding_w ") + std::to_string(CI::GetInputLeftPadW(ctx)) +
+        std::string(" --kernel_name ") + construction_parameters.kernel_name;
+    // clang-format on
+
+    size_t local_size  = 0;
+    size_t global_size = 0;
+    MiirGenLaunchParams(construction_parameters.comp_options, local_size, global_size);
+
+    construction_parameters.l_wk.push_back(local_size);
+    construction_parameters.l_wk.push_back(1);
+    construction_parameters.l_wk.push_back(1);
+
+    construction_parameters.g_wk.push_back(global_size);
+    construction_parameters.g_wk.push_back(1);
+    construction_parameters.g_wk.push_back(1);
+
+    result.invoker_factory = conv::MakeMlirFwdInvokerFactory(ctx);
+    result.construction_params.push_back(construction_parameters);
+    return result;
+#else
+    std::ignore = ctx;
+    return {};
+#endif
+}
+
+} // namespace solver
+} // namespace miopen
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 2206cdcbc0..efcd8255aa 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -314,7 +314,8 @@ endif()
 
 if(MIOPEN_TEST_MLIR)
     set(IMPLICITGEMM_MLIR_ENV_BASE MIOPEN_FIND_MODE=normal)
-    set(IMPLICITGEMM_MLIR_ENV_F ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppFwd)
+    set(IMPLICITGEMM_MLIR_ENV_F_CPP ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppFwd)
+    set(IMPLICITGEMM_MLIR_ENV_F_BIN ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirBinFwd)
     set(IMPLICITGEMM_MLIR_ENV_B ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppBwd)
     set(IMPLICITGEMM_MLIR_ENV_W ${IMPLICITGEMM_MLIR_ENV_BASE} MIOPEN_DEBUG_FIND_ONLY_SOLVER=ConvHipImplicitGemmMlirCppWrW)
 
@@ -323,9 +324,11 @@ if(MIOPEN_TEST_MLIR)
     set(IMPLICITGEMM_MLIR_ARGS_W ${IMPLICITGEMM_ARGS} --verbose --disable-forward --disable-backward-data)
     
     add_custom_test(test_conv_igemm_mlir ALLOW_NONXDLOPS
-        COMMAND ${IMPLICITGEMM_MLIR_ENV_F} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1
-        COMMAND ${IMPLICITGEMM_MLIR_ENV_F} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 256 56 56  --weights  256  256 1 1 --pads_strides_dilations 0 0 1 1 1 1
-        COMMAND ${IMPLICITGEMM_MLIR_ENV_F} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 128 58 58  --weights  128  128 3 3 --pads_strides_dilations 0 0 1 1 1 1
+        COMMAND ${IMPLICITGEMM_MLIR_ENV_F_CPP} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1
+        COMMAND ${IMPLICITGEMM_MLIR_ENV_F_CPP} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 256 56 56  --weights  256  256 1 1 --pads_strides_dilations 0 0 1 1 1 1
+        COMMAND ${IMPLICITGEMM_MLIR_ENV_F_CPP} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 128 58 58  --weights  128  128 3 3 --pads_strides_dilations 0 0 1 1 1 1
+        COMMAND ${IMPLICITGEMM_MLIR_ENV_F_BIN} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 64 128 58 58  --weights  128  128 3 3 --pads_strides_dilations 0 0 1 1 1 1
+        COMMAND ${IMPLICITGEMM_MLIR_ENV_F_BIN} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_F} --input 128 8 32 32  --weights  128  8 3 3 --pads_strides_dilations 0 0 1 1 1 1
 
         COMMAND ${IMPLICITGEMM_MLIR_ENV_B} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_B} --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1
         COMMAND ${IMPLICITGEMM_MLIR_ENV_B} $<TARGET_FILE:test_conv2d> ${IMPLICITGEMM_MLIR_ARGS_B} --input 64 256 56 56  --weights  256  256 1 1 --pads_strides_dilations 0 0 1 1 1 1