From 9a41ddec9d6d302080dc00c9860f3e496aa269c8 Mon Sep 17 00:00:00 2001 From: "M. Amber Hassaan" Date: Tue, 7 Nov 2023 02:29:37 +0000 Subject: [PATCH 01/73] WIP: updatable env vars, first draft --- src/include/miopen/env.hpp | 167 ++++++++++++++++++++++++++++++++----- 1 file changed, 148 insertions(+), 19 deletions(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 7e91a2a2c8..b9280ca1c1 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -33,6 +33,124 @@ namespace miopen { +namespace internal { + +/* NOTES AND GOTCHAS (TODO: Remove before committing) + * 1. All env variables that are used with IsDisabled() should be declared with + * default value 'true' using: MIOPEN_DECLARE_ENV_VAR(name, bool, true) + * 2. + */ + +template +struct ParseEnvVal{}; + +template <> +struct ParseEnvVal { + bool go(const char* vp) { + std::string value_env_str{vp}; + + for(auto& c : value_env_str) + { + if(std::isalpha(c) != 0) + { + c = std::tolower(static_cast(c)); + } + } + + if (std::strcmp(value_env_str.c_str(), "disable") == 0 || + std::strcmp(value_env_str.c_str(), "disabled") == 0 || + std::strcmp(value_env_str.c_str(), "0") == 0 || + std::strcmp(value_env_str.c_str(), "no") == 0 || + std::strcmp(value_env_str.c_str(), "off") == 0 || + std::strcmp(value_env_str.c_str(), "false") == 0) + { + return false; + } + else if (std::strcmp(value_env_str.c_str(), "enable") == 0 || + std::strcmp(value_env_str.c_str(), "enabled") == 0 || + std::strcmp(value_env_str.c_str(), "1") == 0 || + std::strcmp(value_env_str.c_str(), "yes") == 0 || + std::strcmp(value_env_str.c_str(), "on") == 0 || + std::strcmp(value_env_str.c_str(), "true") == 0) + { + return true; + } + else + { + MIOPEN_THROW(miopenStatusInvalidValue, "Invalid value for env variable"); + } + + return false; // shouldn't reach here + } +}; + +template <> +struct ParseEnvVal { + uint64_t go(const char* vp) { + return std::strtoull(vp, nullptr, 0); + } +}; + +template <> +struct ParseEnvVal { + std::string go(const char* vp) { + return std::string{vp}; + } +}; + +template +struct EnvVar { + + const T& GetValue() const { + return value; + } + + void UpdateValue(const T& val) { + value = val; + } + + explicit EnvVar(const char* const name, const T& def_val) { + const char* vp = std::getenv(name); + if (vp) // a value was provided + { + if constexpr (std::is_same_v) + { + value = ParseEnvVal::go(vp); + } + else if constexpr (std::is_same_v) { + value = ParseEnvVal::go(vp); + } else if constexpr (std::is_same_v) { + value = ParseEnvVal::go(vp); + } else { + value = ParseEnvVal::go(vp); // should cause compile error + } + } + else // no value provided, use default value + { + value = def_val; + } + + private: + T value{}; +}; + + +}// end namespace internal + + +#if 1 +// static inside function hides the variable and provides +// thread-safety/locking +#define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ + struct name { \ + using value_type = type; \ + static internal::EnvVar& Ref() { \ + static internal::EnvVar var{#name, default_val}; \ + return var;\ + }\ + }; + +#else /// \todo Rework: Case-insensitive string compare, ODR, (?) move to .cpp // Declare a cached environment variable @@ -121,37 +239,48 @@ inline std::vector GetEnv(const char* name) else return {{p}}; } +#endif + +/// \todo the following functions should be renamed to either include the word Env +/// or put inside a namespace 'env'. Right now we have a function named Value() +/// that returns env var value as only 64-bit ints + +template +inline std::string GetStringEnv(EnvVar) +{ + static_assert(std::is_same_v); + return EnvVar::Ref().GetValue(); +} -template -inline const char* GetStringEnv(T) +template +inline bool IsEnabled(EnvVar) { - static const std::vector result = GetEnv(T::value()); - if(result.empty()) - return nullptr; - else - return result.front().c_str(); + static_assert(std::is_same_v); + return EnvVar::Ref().GetValue(); } -template -inline bool IsEnabled(T) +template +inline bool IsDisabled(EnvVar) { - static const bool result = miopen::IsEnvvarValueEnabled(T::value()); - return result; + static_assert(std::is_same_v); + return !EnvVar::Ref().GetValue(); } -template -inline bool IsDisabled(T) +template +inline uint64_t Value(EnvVar) { - static const bool result = miopen::IsEnvvarValueDisabled(T::value()); - return result; + static_assert(std::is_same_v); + return EnvVar::Ref().GetValue(); } -template -inline uint64_t Value(T, uint64_t fallback = 0) +/// updates the cached value of an environment variable +template +void UpdateEnvVar(EnvVar, const ValueType& val) { - static const auto result = miopen::EnvvarValue(T::value(), fallback); - return result; + static_assert(std::is_same_v); + EnvVar::Ref().UpdateValue(val); } + } // namespace miopen #endif From 1e63ea66848f8ef3ac7f925bf22e650e6127e6d3 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Tue, 7 Nov 2023 16:48:17 -0600 Subject: [PATCH 02/73] update env var declarations --- src/binary_cache.cpp | 4 +- src/check_numerics.cpp | 2 +- src/comgr.cpp | 26 +- src/conv/solver_finders.cpp | 14 +- src/convolution.cpp | 12 +- src/db_path.cpp.in | 4 +- src/execution_context.cpp | 14 +- src/expanduser.cpp | 6 +- src/find_controls.cpp | 20 +- src/gemm_v2.cpp | 2 +- src/generic_search.cpp | 13 +- src/hip/handlehip.cpp | 2 +- src/hip/hip_build_utils.cpp | 4 +- src/hipoc/hipoc_kernel.cpp | 4 +- src/hipoc/hipoc_program.cpp | 16 +- src/include/miopen/convolution.hpp | 28 +- src/include/miopen/env.hpp | 130 +- src/include/miopen/find_db.hpp | 2 +- .../miopen/generic_search_controls.hpp | 14 +- .../miopen/solver/ck_utility_common.hpp | 4 +- .../miopen/solver/implicitgemm_util.hpp | 10 +- src/include/miopen/sqlite_db.hpp | 4 +- src/kernel_cache.cpp | 4 +- src/logger.cpp | 14 +- src/mlo_dir_conv.cpp | 2 +- src/ocl/clhelper.cpp | 2 +- src/ocl/convolutionocl.cpp | 43 +- src/ocl/gcn_asm_utils.cpp | 2 +- src/ocl/rnnocl.cpp | 2 +- src/ocl_kernel.cpp | 4 +- src/reducetensor.cpp | 2 +- src/solver.cpp | 3 +- src/solver/batchnorm/backward_ck.cpp | 2 +- .../backward_per_activation_fused.cpp | 2 +- src/solver/batchnorm/forward_inference_ck.cpp | 2 +- .../batchnorm/forward_inference_fused.cpp | 2 +- .../forward_per_activation_fused.cpp | 2 +- src/solver/batchnorm/forward_training_ck.cpp | 2 +- src/solver/conv_MP_bidirectional_winograd.cpp | 28 +- src/solver/conv_asm_1x1u.cpp | 33 +- src/solver/conv_asm_1x1u_bias_activ_fused.cpp | 2 +- src/solver/conv_asm_1x1u_stride2.cpp | 31 +- src/solver/conv_asm_3x3u.cpp | 29 +- src/solver/conv_asm_5x10u2v2b1.cpp | 2 +- src/solver/conv_asm_5x10u2v2f1.cpp | 2 +- .../conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW1x1.cpp | 31 +- src/solver/conv_asm_dir_BwdWrW3x3.cpp | 31 +- ...onv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp | 2 +- src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp | 1180 ++++++++--------- .../conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp | 4 +- src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp | 4 +- .../conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp | 4 +- .../conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp | 4 +- .../conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp | 4 +- .../conv_asm_implicit_gemm_v4r1_dynamic.cpp | 4 +- ...m_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp | 2 +- ...onv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp | 2 +- src/solver/conv_bin_wino3x3U.cpp | 2 +- src/solver/conv_bin_winoRxS.cpp | 8 +- src/solver/conv_bin_winoRxS_fused.cpp | 14 +- .../conv_ck_igemm_fwd_bias_activ_fused.cpp | 2 +- .../conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp | 4 +- src/solver/conv_direct_naive_conv.cpp | 2 +- src/solver/conv_direct_naive_conv_bwd.cpp | 2 +- src/solver/conv_direct_naive_conv_fwd.cpp | 2 +- src/solver/conv_direct_naive_conv_wrw.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 2 +- ...conv_hip_implicit_gemm_bwd_data_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_bwd_v1r1.cpp | 4 +- ...conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp | 4 +- .../conv_hip_implicit_gemm_bwd_v4r1.cpp | 4 +- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 33 +- .../conv_hip_implicit_gemm_fwd_v4r1.cpp | 4 +- .../conv_hip_implicit_gemm_fwd_v4r4.cpp | 2 +- ...conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp | 4 +- ...licit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp | 4 +- ...conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp | 4 +- .../conv_hip_implicit_gemm_fwd_xdlops.cpp | 2 +- ...v_hip_implicit_gemm_grouped_fwd_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_wrw_v4r4.cpp | 2 +- ...conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp | 2 +- ...licit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp | 2 +- src/solver/conv_mlir_igemm_bwd.cpp | 2 +- src/solver/conv_mlir_igemm_bwd_xdlops.cpp | 2 +- src/solver/conv_mlir_igemm_fwd.cpp | 2 +- src/solver/conv_mlir_igemm_fwd_xdlops.cpp | 2 +- src/solver/conv_mlir_igemm_wrw.cpp | 2 +- src/solver/conv_mlir_igemm_wrw_xdlops.cpp | 2 +- src/solver/conv_multipass_wino3x3WrW.cpp | 26 +- src/solver/conv_ocl_dir2D11x11.cpp | 2 +- src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp | 4 +- src/solver/conv_ocl_dir2D_bwdWrW_2.cpp | 4 +- src/solver/conv_ocl_dir2D_bwdWrW_53.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd1x1.cpp | 4 +- src/solver/conv_ocl_dir2Dfwd_fused.cpp | 2 +- src/solver/conv_ocl_dir2Dfwdgen.cpp | 2 +- src/solver/conv_winoRxS.cpp | 15 +- src/solver/conv_winoRxS_fused.cpp | 2 +- src/solver/conv_wino_fury_RxS.cpp | 4 +- src/solver/fft.cpp | 2 +- src/solver/gemm.cpp | 2 +- src/solver/gemm_bwd.cpp | 2 +- src/solver/gemm_wrw.cpp | 2 +- src/target_properties.cpp | 10 +- src/tmp_dir.cpp | 2 +- 109 files changed, 960 insertions(+), 1086 deletions(-) diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index 5e5d2d404a..42db6659e7 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -46,7 +46,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DISABLE_CACHE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DISABLE_CACHE, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_CUSTOM_CACHE_DIR) static boost::filesystem::path ComputeSysCachePath() @@ -66,7 +66,7 @@ static boost::filesystem::path ComputeUserCachePath() /// If MIOPEN_CUSTOM_CACHE_DIR is set in the environment, then /// use exactly that path. const auto custom = miopen::GetStringEnv(MIOPEN_CUSTOM_CACHE_DIR{}); - if(custom != nullptr && strlen(custom) > 0) + if(!custom.empty() && strlen(custom) > 0) { p = ExpandUser(custom); } diff --git a/src/check_numerics.cpp b/src/check_numerics.cpp index 12210069df..5a7e76f1de 100644 --- a/src/check_numerics.cpp +++ b/src/check_numerics.cpp @@ -33,7 +33,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CHECK_NUMERICS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CHECK_NUMERICS, uint64_t, 0) bool CheckNumericsEnabled(const int bitMask) { diff --git a/src/comgr.cpp b/src/comgr.cpp index f492eef1a1..64bdf6b57d 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -61,27 +61,27 @@ /// More info at https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1257. #define WORKAROUND_ISSUE_1257 (HIP_PACKAGE_VERSION_FLAT >= 4003021331ULL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_CALLS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_CALLS, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES, bool, false) /// 0: Off. /// 1: Logs each option on a separate line. /// 2: Logs all options altogether, on single line. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_OPTIONS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_OPTIONS, uint64_t, 0) /// Integer, set to max number of first characters /// you would like to log onto console. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT, uint64_t, 0) /// \todo Temporary for debugging: MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT) /// \todo Temporary for debugging: -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN, bool, false) /// \todo see issue #1222, PR #1316 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SRAM_EDC_DISABLED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SRAM_EDC_DISABLED, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) #ifndef MIOPEN_AMD_COMGR_VERSION_MAJOR #define MIOPEN_AMD_COMGR_VERSION_MAJOR 0 @@ -137,7 +137,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) /// have wavesize != 64 (currently gfx10 with default build settings). #define WORKAROUND_ISSUE_1431 PCH_IS_SUPPORTED -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE, bool, true) #define COMPILER_LC 1 @@ -477,7 +477,7 @@ static std::string GetStatusText(const amd_comgr_status_t status, const bool unk static void LogOptions(const char* options[], size_t count) { - static const auto control = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_OPTIONS{}, 0); + static const auto control = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_OPTIONS{}); if(!(control != 0 && miopen::IsLogging(miopen::LoggingLevel::Info))) return; if(control == 2) @@ -626,7 +626,7 @@ class Dataset : ComgrOwner d.SetName(name); d.SetBytes(content); AddData(d); - const auto show_first = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT{}, 0); + const auto show_first = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT{}); if(show_first > 0 && miopen::IsLogging(miopen::LoggingLevel::Info) && (type == AMD_COMGR_DATA_KIND_SOURCE || type == AMD_COMGR_DATA_KIND_INCLUDE)) { @@ -764,9 +764,7 @@ static void SetIsaName(const ActionInfo& action, static std::string GetDebugCompilerOptionsInsert() { - const char* p = miopen::GetStringEnv(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT{}); - if(p == nullptr) - p = ""; + const auto p = miopen::GetStringEnv(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT{}); return {p}; } @@ -1245,7 +1243,7 @@ class HiprtcProgram MIOPEN_LOG_I(name << ' ' << content.size() << " bytes"); if(miopen::IsLogging(miopen::LoggingLevel::Info)) { - const auto show_first = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT{}, 0); + const auto show_first = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT{}); if(show_first > 0) { const auto text_length = diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index c998fb75a4..f5a2493836 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -36,11 +36,11 @@ namespace miopen { MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_WINOGRAD) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_WINOGRAD, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT, bool, true) namespace conv { namespace { @@ -209,8 +209,8 @@ static void EvaluateInvokers(Handle& handle, const AnyInvokeParams& invoke_ctx, DbRecord& record) { - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch != nullptr && strlen(arch) > 0) + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(!arch.empty()) return; auto selected = miopen::solver::ConvSolution{miopenStatusUnknownError}; diff --git a/src/convolution.cpp b/src/convolution.cpp index d0f9a64ffb..bb713fb074 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -50,12 +50,12 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_WINOGRAD) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_WINOGRAD, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK, bool, false) namespace miopen { diff --git a/src/db_path.cpp.in b/src/db_path.cpp.in index 113b5fdcf4..17bba349df 100644 --- a/src/db_path.cpp.in +++ b/src/db_path.cpp.in @@ -66,7 +66,7 @@ boost::filesystem::path GetLibPath() std::string GetSystemDbPath() { auto p = GetStringEnv(MIOPEN_SYSTEM_DB_PATH{}); - if(p == nullptr) + if(p.empty()) #if MIOPEN_BUILD_DEV return "${MIOPEN_SYSTEM_DB_PATH}"; #else @@ -86,7 +86,7 @@ boost::filesystem::path PrepareUserDbPath() /// If MIOPEN_USER_DB_PATH is set in the environment, then assume that the user wants /// the library to use exactly that path. const auto p = GetStringEnv(MIOPEN_USER_DB_PATH{}); - if(p != nullptr) + if(!p.empty()) return ExpandUser(p); /// \anchor nfs-detection /// Otherwise, check if the user-db-path denotes a network filesystem. If this is the case, then diff --git a/src/execution_context.cpp b/src/execution_context.cpp index fb64a10c92..e6cad7862f 100644 --- a/src/execution_context.cpp +++ b/src/execution_context.cpp @@ -36,12 +36,12 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_KERNELS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_KERNELS, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER, bool, false) static std::ostream& operator<<(std::ostream& os, const rocm_meta_version& rmv) { @@ -201,7 +201,7 @@ bool IsHipKernelsEnabled() #if MIOPEN_USE_HIP_KERNELS return !miopen::IsDisabled(MIOPEN_DEBUG_HIP_KERNELS{}); #else - return miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); + return !miopen::IsDefault(MIOPEN_DEBUG_HIP_KERNELS{}) && miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); #endif } diff --git a/src/expanduser.cpp b/src/expanduser.cpp index 20ba3add40..8407877d86 100644 --- a/src/expanduser.cpp +++ b/src/expanduser.cpp @@ -181,10 +181,10 @@ bool IsNetworkedFilesystem(const boost::filesystem::path& path_) namespace { std::string GetHomeDir() { - const char* const p = GetStringEnv(HOME{}); - if(!(p == nullptr || p == std::string("/") || p == std::string(""))) + const auto p = GetStringEnv(HOME{}); + if(!(p.empty() || p == std::string("/"))) { - return {p}; + return p; } // todo: // need to figure out what is the correct thing to do here diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 5c5086d3d7..8c95cf77a1 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -68,10 +68,9 @@ const char* ToCString(const FindEnforceAction mode) FindEnforceAction GetFindEnforceActionImpl() { - const char* const p_asciz = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); - if(p_asciz == nullptr) + const auto str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); + if(str.empty()) return FindEnforceAction::Default_; - std::string str = p_asciz; for(auto& c : str) c = toupper(static_cast(c)); if(str == "NONE") @@ -87,7 +86,7 @@ FindEnforceAction GetFindEnforceActionImpl() else { // Nop. Fall down & try numerics. } - const auto val = static_cast(miopen::Value(MIOPEN_FIND_ENFORCE{})); + const auto val = static_cast(stoul(str)); if(FindEnforceAction::First_ <= val && val <= FindEnforceAction::Last_) return val; MIOPEN_LOG_NQE("Wrong MIOPEN_FIND_ENFORCE, using default."); @@ -103,11 +102,11 @@ FindEnforceAction GetFindEnforceAction() boost::optional> GetEnvFindOnlySolverImpl() { static_assert(miopen::solver::Id::invalid_value == 0, "miopen::solver::Id::invalid_value == 0"); - const char* const p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); + const auto slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); std::vector res; - if(p_asciz != nullptr && strlen(p_asciz) > 0) + if(!slv_str.empty) { - const auto solver_list = miopen::SplitDelim(std::string(p_asciz), ';'); + const auto solver_list = miopen::SplitDelim(slv_str, ';'); for(const auto& kinder : solver_list) { auto numeric_id = std::strtoul(kinder.c_str(), nullptr, 10); @@ -181,10 +180,9 @@ std::ostream& operator<<(std::ostream& os, const FindMode::Values& v) FindMode::Values GetFindModeValueImpl2() { - const char* const p_asciz = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); - if(p_asciz == nullptr) + const auto str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); + if(str.empty()) return FindMode::Values::Default_; - std::string str = p_asciz; for(auto& c : str) c = toupper(static_cast(c)); if(str == "NORMAL") @@ -198,7 +196,7 @@ FindMode::Values GetFindModeValueImpl2() else { // Nop. Fall down & try numerics. } - const auto val = static_cast(miopen::Value(MIOPEN_FIND_MODE{})); + const auto val = static_cast(stoul(str)); if(FindMode::Values::Begin_ <= val && val < FindMode::Values::End_) return val; MIOPEN_LOG_NQE("Wrong MIOPEN_FIND_MODE, using default."); diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 791bd33071..3e2e335a00 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -246,7 +246,7 @@ rocblas_status miopen_rocblas_gemm_strided_batched_ex3(const miopen::Handle& han #endif // MIOPEN_USE_ROCBLAS -MIOPEN_DECLARE_ENV_VAR(MIOPEN_GEMM_ENFORCE_BACKEND) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_GEMM_ENFORCE_BACKEND, uint64_t, 0) namespace miopen { diff --git a/src/generic_search.cpp b/src/generic_search.cpp index f7647b8f95..5fa6912973 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -36,26 +36,19 @@ namespace solver { std::size_t GetTuningIterationsMax() { - return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}, std::numeric_limits::max()); + return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}); } std::chrono::milliseconds GetTuningTimeMax() { - static const auto fallback = - std::chrono::duration_cast(std::chrono::hours{2}); static const auto res = - std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{}, fallback.count())}; + std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})}; return res; } std::size_t GetTuningThreadsMax() { -#if MIOPEN_USE_COMGR - const auto def_max = 1; // COMGR is not parallelizable -#else - const int def_max = std::thread::hardware_concurrency() / 2; -#endif - return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}, def_max); + return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}); } } // namespace solver diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp index ff6d27d26e..11bed4dff5 100644 --- a/src/hip/handlehip.cpp +++ b/src/hip/handlehip.cpp @@ -65,7 +65,7 @@ /// Brute-force W/A: return fixed values. #define WORKAROUND_FAULTY_HIPMEMGETINFO_VEGA_NAVI2X (ROCM_FEATURE_DEPRECATED_VEGA_NAVI2X) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_CU) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_CU, uint64_t, 0) namespace miopen { diff --git a/src/hip/hip_build_utils.cpp b/src/hip/hip_build_utils.cpp index 86cf3a7272..932309d649 100644 --- a/src/hip/hip_build_utils.cpp +++ b/src/hip/hip_build_utils.cpp @@ -37,8 +37,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_VERBOSE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_DUMP) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_VERBOSE, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_DUMP, bool, false) namespace miopen { diff --git a/src/hipoc/hipoc_kernel.cpp b/src/hipoc/hipoc_kernel.cpp index 18f5211d0d..e29c591c30 100644 --- a/src/hipoc/hipoc_kernel.cpp +++ b/src/hipoc/hipoc_kernel.cpp @@ -79,8 +79,8 @@ void HIPOCKernelInvoke::run(void* args, std::size_t size) const stop = make_hip_event(); } - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch != nullptr && strlen(arch) > 0) + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(!arch.empty()) { MIOPEN_THROW("MIOPEN_DEVICE_ARCH used, escaping launching kernel"); } diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 8a87d0d3f7..63e3eecbc1 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -53,12 +53,12 @@ /// "-Xclang -target-feature -Xclang +code-object-v3" /// 3 - "-mnocode-object-v3" / "-mcode-object-v3" /// 4 - "-mcode-object-version=2/3/4" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_USE_HIPRTC) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_USE_HIPRTC, bool, true) #define MIOPEN_WORKAROUND_ISSUE_1359 1 @@ -190,8 +190,8 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, const std::string& blob) : program(program_name) ///, module(CreateModuleInMem(blob)) { - if(nullptr != - miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{})) /// \todo Finish off this spaghetti eventually. + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(!arch.empty()) return; module = CreateModuleInMem(blob); } @@ -210,8 +210,8 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, } else { - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch == nullptr) + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(arch.empty()) { module = CreateModule(hsaco_file); } diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 28b68a35b0..9e8f32f915 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -48,9 +48,9 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, -1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, miopenF8RoundingMode_t.miopenF8RoundingModeStochastic) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED) namespace miopen { @@ -71,14 +71,11 @@ struct ConvolutionAttribute { class Gfx90aFp16alt { - int value = -1; friend struct ConvolutionAttribute; // For direct r/w. inline int Get() const { - if(nullptr != miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{})) - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{}); - return value; + return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{}); } public: @@ -99,23 +96,21 @@ struct ConvolutionAttribute std::uniform_int_distribution distribution(0, 0xFFFFFFFF); return distribution(gen); } - miopenF8RoundingMode_t rounding_mode = miopenF8RoundingModeStochastic; uint32_t seed = InitSeed(); friend struct ConvolutionAttribute; inline miopenF8RoundingMode_t Get() const { - if(nullptr != miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) - return static_cast( - miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); - return rounding_mode; + return static_cast( + miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); } inline uint32_t GetSeed() const { // assert(rounding_mode == miopenF8RoundingModeStochastic); - if(nullptr != miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{})) - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{}); + const auto str = miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{}); + if(!str.empty()) + return stoul(str); return seed; } @@ -124,15 +119,12 @@ struct ConvolutionAttribute class Deterministic { - int value = 0; friend struct ConvolutionAttribute; public: inline int Get() const { - if(nullptr != miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); - return value; + return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); } operator bool() const { diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index b9280ca1c1..690e002b8f 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -35,12 +35,6 @@ namespace miopen { namespace internal { -/* NOTES AND GOTCHAS (TODO: Remove before committing) - * 1. All env variables that are used with IsDisabled() should be declared with - * default value 'true' using: MIOPEN_DECLARE_ENV_VAR(name, bool, true) - * 2. - */ - template struct ParseEnvVal{}; @@ -75,7 +69,7 @@ struct ParseEnvVal { { return true; } - else + else { MIOPEN_THROW(miopenStatusInvalidValue, "Invalid value for env variable"); } @@ -105,7 +99,12 @@ struct EnvVar { return value; } + const bool IsDefault() const { + return is_default; + } + void UpdateValue(const T& val) { + is_default = false; value = val; } @@ -113,7 +112,8 @@ struct EnvVar { const char* vp = std::getenv(name); if (vp) // a value was provided { - if constexpr (std::is_same_v) + is_default = false; + if constexpr (std::is_same_v) { value = ParseEnvVal::go(vp); } @@ -130,17 +130,17 @@ struct EnvVar { value = def_val; } - private: + private: T value{}; + bool is_default = true; }; }// end namespace internal -#if 1 // static inside function hides the variable and provides -// thread-safety/locking +// thread-safety/locking #define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ struct name { \ using value_type = type; \ @@ -148,98 +148,16 @@ struct EnvVar { static internal::EnvVar var{#name, default_val}; \ return var;\ }\ - }; - -#else -/// \todo Rework: Case-insensitive string compare, ODR, (?) move to .cpp - -// Declare a cached environment variable -#define MIOPEN_DECLARE_ENV_VAR(x) \ - struct x \ - { \ - static const char* value() { return #x; } \ - }; + }; -/* - * Returns false if a feature-controlling environment variable is defined - * and set to something which disables a feature. - */ -inline bool IsEnvvarValueDisabled(const char* name) -{ - // NOLINTNEXTLINE (concurrency-mt-unsafe) - const auto value_env_p = std::getenv(name); - if(value_env_p == nullptr) - return false; - else - { - std::string value_env_str = value_env_p; - for(auto& c : value_env_str) - { - if(std::isalpha(c) != 0) - { - c = std::tolower(static_cast(c)); - } - } - return (std::strcmp(value_env_str.c_str(), "disable") == 0 || - std::strcmp(value_env_str.c_str(), "disabled") == 0 || - std::strcmp(value_env_str.c_str(), "0") == 0 || - std::strcmp(value_env_str.c_str(), "no") == 0 || - std::strcmp(value_env_str.c_str(), "off") == 0 || - std::strcmp(value_env_str.c_str(), "false") == 0); - } -} - -inline bool IsEnvvarValueEnabled(const char* name) -{ - // NOLINTNEXTLINE (concurrency-mt-unsafe) - const auto value_env_p = std::getenv(name); - if(value_env_p == nullptr) - return false; - else - { - std::string value_env_str = value_env_p; - for(auto& c : value_env_str) - { - if(std::isalpha(c) != 0) - { - c = std::tolower(static_cast(c)); - } - } - return (std::strcmp(value_env_str.c_str(), "enable") == 0 || - std::strcmp(value_env_str.c_str(), "enabled") == 0 || - std::strcmp(value_env_str.c_str(), "1") == 0 || - std::strcmp(value_env_str.c_str(), "yes") == 0 || - std::strcmp(value_env_str.c_str(), "on") == 0 || - std::strcmp(value_env_str.c_str(), "true") == 0); - } -} - -// Return 0 if env is enabled else convert environment var to an int. -// Supports hexadecimal with leading 0x or decimal -inline uint64_t EnvvarValue(const char* name, uint64_t fallback = 0) -{ - // NOLINTNEXTLINE (concurrency-mt-unsafe) - const auto value_env_p = std::getenv(name); - if(value_env_p == nullptr) - { - return fallback; - } - else - { - return strtoull(value_env_p, nullptr, 0); - } -} - -inline std::vector GetEnv(const char* name) -{ - // NOLINTNEXTLINE (concurrency-mt-unsafe) - const auto p = std::getenv(name); - if(p == nullptr) - return {}; - else - return {{p}}; -} -#endif +#define MIOPEN_DECLARE_ENV_VAR(name) \ + struct name { \ + using value_type = std::string; \ + static internal::EnvVar& Ref() { \ + static internal::EnvVar var{#name, std::string()}; \ + return var;\ + }\ + }; /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() @@ -273,9 +191,15 @@ inline uint64_t Value(EnvVar) return EnvVar::Ref().GetValue(); } +template +inline bool IsDefault(EnvVar) +{ + return EnvVar::Ref().IsDefault(); +} + /// updates the cached value of an environment variable template -void UpdateEnvVar(EnvVar, const ValueType& val) +void UpdateEnvVar(EnvVar, const ValueType& val) { static_assert(std::is_same_v); EnvVar::Ref().UpdateValue(val); diff --git a/src/include/miopen/find_db.hpp b/src/include/miopen/find_db.hpp index 677c4345c7..500c5d6795 100644 --- a/src/include/miopen/find_db.hpp +++ b/src/include/miopen/find_db.hpp @@ -41,7 +41,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_FIND_DB) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_FIND_DB, bool, false) namespace miopen { diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 37d14d0639..17c6522871 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -26,14 +26,20 @@ #pragma once #include +#include namespace miopen { namespace solver { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TUNING_TIME_MS_MAX) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX, uint64_t, std::numeric_limits::max()) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_TUNING_TIME_MS_MAX, uint64_t, std::chrono::duration_cast(std::chrono::hours{2}).count()) +#if MIOPEN_USE_COMGR + const auto def_max = 1; // COMGR is not parallelizable +#else + const int def_max = std::thread::hardware_concurrency() / 2; +#endif +MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, def_max) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) } // namespace solver } // namespace miopen diff --git a/src/include/miopen/solver/ck_utility_common.hpp b/src/include/miopen/solver/ck_utility_common.hpp index 18285d8a09..509e0ffab9 100644 --- a/src/include/miopen/solver/ck_utility_common.hpp +++ b/src/include/miopen/solver/ck_utility_common.hpp @@ -39,8 +39,8 @@ #include "../composable_kernel/host/solver/include/convolution_problem_descriptor.hpp" #include "../composable_kernel/host/solver/include/solver_common.hpp" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CK_USE_AMD_BUFFER_ADDRESSING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CK_USE_AMD_BUFFER_ADDRESSING, bool, true) namespace miopen { namespace solver { diff --git a/src/include/miopen/solver/implicitgemm_util.hpp b/src/include/miopen/solver/implicitgemm_util.hpp index e634be8d5f..abeb49ce6d 100644 --- a/src/include/miopen/solver/implicitgemm_util.hpp +++ b/src/include/miopen/solver/implicitgemm_util.hpp @@ -34,11 +34,11 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM, bool, true) #define WORKAROUND_SWDEV_229277_227616_229195 1 // workaround for unnecessary VGPA <--> AGRP data movement when using mfma LLVM intrinsic diff --git a/src/include/miopen/sqlite_db.hpp b/src/include/miopen/sqlite_db.hpp index a1beef32e1..db32fe7829 100644 --- a/src/include/miopen/sqlite_db.hpp +++ b/src/include/miopen/sqlite_db.hpp @@ -59,7 +59,7 @@ class path; } // namespace boost namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_SQL_WAL) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_SQL_WAL, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_PERFDB_OVERRIDE) constexpr bool InMemDb = MIOPEN_EMBED_DB; @@ -444,7 +444,7 @@ class SQLitePerfDb : public SQLiteBase return boost::none; const auto pdb_ovr = miopen::GetStringEnv(MIOPEN_DEBUG_PERFDB_OVERRIDE{}); - if(pdb_ovr != nullptr) + if(!pdb_ovr.empty()) { MIOPEN_LOG_I2("overriding tuning params with: " << pdb_ovr); DbRecord ovr_rec; diff --git a/src/kernel_cache.cpp b/src/kernel_cache.cpp index 0f706d458f..880307f65c 100644 --- a/src/kernel_cache.cpp +++ b/src/kernel_cache.cpp @@ -124,8 +124,8 @@ Kernel KernelCache::AddKernel(const Handle& h, } Kernel kernel{}; - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch != nullptr && strlen(arch) > 0) + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(!arch.empty()) { kernel = Kernel{program, kernel_name}; } diff --git a/src/logger.cpp b/src/logger.cpp index f8045f9ac7..af78cea297 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -41,27 +41,27 @@ namespace miopen { /// Enable logging of the most important function calls. /// Name of envvar in a bit inadequate due to historical reasons. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING, bool, false) /// Prints driver command lines into log. /// Works from any application which uses the library. /// Allows to reproduce library use cases using the driver instead of the actual application. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_CMD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_CMD, bool, false) /// Prefix each log line with information which allows the user /// to uniquiely identify log records printed from different processes /// or threads. Useful for debugging multi-process/multi-threaded apps. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_MPMT) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_MPMT, bool, false) /// Add timestamps to each log line. /// Not useful with multi-process/multi-threaded apps. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME, bool, false) /// See LoggingLevel in the header. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_LOG_LEVEL) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_LOG_LEVEL, uint64_t, LoggingLevel::Default) /// Enable logging of function calls to ROCTX api. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ROCTX) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ROCTX, bool, false) namespace debug { @@ -70,7 +70,7 @@ bool LoggingQuiet = false; // NOLINT (cppcoreguidelines-avoid-non-const-global-v } // namespace debug /// Disable logging quieting. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE, bool, false) namespace { diff --git a/src/mlo_dir_conv.cpp b/src/mlo_dir_conv.cpp index 4d90a479e0..33a775b529 100644 --- a/src/mlo_dir_conv.cpp +++ b/src/mlo_dir_conv.cpp @@ -55,7 +55,7 @@ #define WORKAROUND_SWDEV_227826 0 #if WORKAROUND_SWDEV_227826 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS, bool, false) #endif miopen::PerformanceDb miopen::GetDb(const miopen::ExecutionContext& ctx) diff --git a/src/ocl/clhelper.cpp b/src/ocl/clhelper.cpp index 202e539c15..433ca17bb3 100644 --- a/src/ocl/clhelper.cpp +++ b/src/ocl/clhelper.cpp @@ -45,7 +45,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) namespace miopen { diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 29650085d1..8378cebde2 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -58,12 +58,12 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMMED_FALLBACK) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMMED_FALLBACK, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DUMP_TENSOR_PATH) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK, bool, false) static inline bool IsValidFilterChannelNumber(const TensorDescriptor& x, const TensorDescriptor& w, @@ -387,13 +387,12 @@ static void ConvForwardCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.yDesc, tensors.y); - const char* file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); - if(flag && static_cast(file_name)) + const auto file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + if(flag && !file_name.empty()) { - std::string file_name_str = file_name; - DumpTensorToFileFromDevice(handle, tensors.xDesc, tensors.x, file_name_str + "_x.bin"); - DumpTensorToFileFromDevice(handle, tensors.wDesc, tensors.w, file_name_str + "_w.bin"); - DumpTensorToFileFromDevice(handle, tensors.yDesc, tensors.y, file_name_str + "_y.bin"); + DumpTensorToFileFromDevice(handle, tensors.xDesc, tensors.x, file_name + "_x.bin"); + DumpTensorToFileFromDevice(handle, tensors.wDesc, tensors.w, file_name + "_w.bin"); + DumpTensorToFileFromDevice(handle, tensors.yDesc, tensors.y, file_name + "_y.bin"); } } @@ -913,13 +912,12 @@ static void ConvBwdCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.dxDesc, tensors.dx); - const char* file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); - if(flag && static_cast(file_name)) + const auto file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + if(flag && !file_name.empty()) { - std::string file_name_str = file_name; - DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name_str + "_dy.bin"); - DumpTensorToFileFromDevice(handle, tensors.wDesc, tensors.w, file_name_str + "_w.bin"); - DumpTensorToFileFromDevice(handle, tensors.dxDesc, tensors.dx, file_name_str + "_dx.bin"); + DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name + "_dy.bin"); + DumpTensorToFileFromDevice(handle, tensors.wDesc, tensors.w, file_name + "_w.bin"); + DumpTensorToFileFromDevice(handle, tensors.dxDesc, tensors.dx, file_name + "_dx.bin"); } } @@ -1115,13 +1113,12 @@ static void ConvWrwCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.dwDesc, tensors.dw); - const char* file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); - if(flag && static_cast(file_name)) + const auto file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + if(flag && !file_name.empty()) { - std::string file_name_str = file_name; - DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name_str + "_dy.bin"); - DumpTensorToFileFromDevice(handle, tensors.xDesc, tensors.x, file_name_str + "_x.bin"); - DumpTensorToFileFromDevice(handle, tensors.dwDesc, tensors.dw, file_name_str + "_dw.bin"); + DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name + "_dy.bin"); + DumpTensorToFileFromDevice(handle, tensors.xDesc, tensors.x, file_name + "_x.bin"); + DumpTensorToFileFromDevice(handle, tensors.dwDesc, tensors.dw, file_name + "_dw.bin"); } } diff --git a/src/ocl/gcn_asm_utils.cpp b/src/ocl/gcn_asm_utils.cpp index a6018ba53c..e8ee88c1e4 100644 --- a/src/ocl/gcn_asm_utils.cpp +++ b/src/ocl/gcn_asm_utils.cpp @@ -71,7 +71,7 @@ static std::string CleanupPath(const char* p); std::string GetGcnAssemblerPathImpl() { const auto asm_path_env_p = miopen::GetStringEnv(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH{}); - if(asm_path_env_p != nullptr) + if(!asm_path_env_p.empty()) { return CleanupPath(asm_path_env_p); } diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index 8f650ef30d..d2e3c5c98a 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -36,7 +36,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_RNNFWD_exp) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_RNNFWD_exp, bool, true) namespace miopen { diff --git a/src/ocl_kernel.cpp b/src/ocl_kernel.cpp index ff6f3d65ae..c60d0ab076 100644 --- a/src/ocl_kernel.cpp +++ b/src/ocl_kernel.cpp @@ -58,8 +58,8 @@ void OCLKernelInvoke::run() const MIOPEN_HANDLE_LOCK - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch != nullptr && strlen(arch) > 0) + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(!arch.empty()) { MIOPEN_THROW("MIOPEN_DEVICE_ARCH used, escaping launching kernel"); } diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 6ca1067fb8..b148f92077 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -47,7 +47,7 @@ #include <../composable_kernel/composable_kernel/include/utility/data_type_enum.hpp> #include <../composable_kernel/composable_kernel/include/utility/reduction_enums.hpp> -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DYNAMIC_REDUCTION); +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DYNAMIC_REDUCTION, bool, true); #define WORKAROUND_MIOPEN_ISSUE_557 1 diff --git a/src/solver.cpp b/src/solver.cpp index 01835dcb1c..847de11756 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -43,7 +43,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_DEPRECATED_SOLVERS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_DEPRECATED_SOLVERS, bool, false) namespace miopen { namespace solver { @@ -66,7 +66,6 @@ std::vector PrecompileKernels(const Handle& h, const std::vector #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_BACK) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_BACK, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/batchnorm/backward_per_activation_fused.cpp b/src/solver/batchnorm/backward_per_activation_fused.cpp index fc112eeb28..b3a5e90b78 100644 --- a/src/solver/batchnorm/backward_per_activation_fused.cpp +++ b/src/solver/batchnorm/backward_per_activation_fused.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_BWDTRG_ACTIV_FUSED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_BWDTRG_ACTIV_FUSED, bool, true) namespace miopen { diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index 75ab54e973..d6ed0cd558 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -32,7 +32,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_INFER) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_INFER, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/batchnorm/forward_inference_fused.cpp b/src/solver/batchnorm/forward_inference_fused.cpp index 189bc61bf7..ba064f1523 100644 --- a/src/solver/batchnorm/forward_inference_fused.cpp +++ b/src/solver/batchnorm/forward_inference_fused.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_FWDINFER_ACTIV_FUSED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_FWDINFER_ACTIV_FUSED, bool, true) namespace miopen { diff --git a/src/solver/batchnorm/forward_per_activation_fused.cpp b/src/solver/batchnorm/forward_per_activation_fused.cpp index a194b263c2..f49a4c13e4 100644 --- a/src/solver/batchnorm/forward_per_activation_fused.cpp +++ b/src/solver/batchnorm/forward_per_activation_fused.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_FWDTRG_ACTIV_FUSED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_FWDTRG_ACTIV_FUSED, bool, true) namespace miopen { diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index 442ee5e899..49d0323ebc 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -33,7 +33,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_FWD_TRAINING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_FWD_TRAINING, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index c70ea319bd..2cf2ad9fbc 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -51,7 +51,7 @@ #if WORKAROUND_SWDEV_257202 // Workaround, solver disabled by default. -#define IS_DISABLED(expr) !miopen::IsEnabled(expr) +#define IS_DISABLED(expr) miopen::IsDefault(expr) || !miopen::IsEnabled(expr) #else // Normal behavior (solver enabled by default). #define IS_DISABLED(expr) miopen::IsDisabled(expr) @@ -63,23 +63,23 @@ namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F2X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F3X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F4X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F5X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F6X3) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F2X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F3X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F4X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F5X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F6X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. // These names represent shader parameters, e.g. shader C is batch_size etc and useful for diff --git a/src/solver/conv_asm_1x1u.cpp b/src/solver/conv_asm_1x1u.cpp index e162059246..37a1c400e1 100644 --- a/src/solver/conv_asm_1x1u.cpp +++ b/src/solver/conv_asm_1x1u.cpp @@ -42,9 +42,9 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR, bool, false) namespace miopen { namespace solver { @@ -827,24 +827,19 @@ ConvSolution ConvAsm1x1U::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm1x1U fromEnv; { - std::string s; - const auto p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS{}); - if(p_asciz != nullptr) + const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS{}); + if(!s.empty()) // else nothing to parse. { - s = std::string(p_asciz); - if(!s.empty()) // else nothing to parse. + if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) { - if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) - { - MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS: " - "Bad format or invalid for the problem config: " - << s); - } - else - { - MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); - pcfg = &fromEnv; - } + MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS: " + "Bad format or invalid for the problem config: " + << s); + } + else + { + MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); + pcfg = &fromEnv; } } } diff --git a/src/solver/conv_asm_1x1u_bias_activ_fused.cpp b/src/solver/conv_asm_1x1u_bias_activ_fused.cpp index f4cd160f18..89aa86e9f2 100644 --- a/src/solver/conv_asm_1x1u_bias_activ_fused.cpp +++ b/src/solver/conv_asm_1x1u_bias_activ_fused.cpp @@ -46,7 +46,7 @@ using half_float::half; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_1x1u_stride2.cpp b/src/solver/conv_asm_1x1u_stride2.cpp index f442a8f410..440fe125fc 100644 --- a/src/solver/conv_asm_1x1u_stride2.cpp +++ b/src/solver/conv_asm_1x1u_stride2.cpp @@ -38,8 +38,8 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2, bool, true) namespace miopen { namespace solver { @@ -609,24 +609,19 @@ ConvSolution ConvAsm1x1UV2::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm1x1UV2 fromEnv; { - std::string s; - const auto p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS{}); - if(p_asciz != nullptr) + const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS{}); + if(!s.empty()) // else nothing to parse. { - s = std::string(p_asciz); - if(!s.empty()) // else nothing to parse. + if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) { - if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) - { - MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS: " - "Bad format or invalid for the problem config: " - << s); - } - else - { - MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); - pcfg = &fromEnv; - } + MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS: " + "Bad format or invalid for the problem config: " + << s); + } + else + { + MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); + pcfg = &fromEnv; } } } diff --git a/src/solver/conv_asm_3x3u.cpp b/src/solver/conv_asm_3x3u.cpp index 284841e465..a4f4eccbe3 100644 --- a/src/solver/conv_asm_3x3u.cpp +++ b/src/solver/conv_asm_3x3u.cpp @@ -41,7 +41,7 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U, bool, true) namespace miopen { namespace solver { @@ -254,24 +254,19 @@ ConvSolution ConvAsm3x3U::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm3x3U fromEnv; { - std::string s; - const auto p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS{}); - if(p_asciz != nullptr) + const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS{}); + if(!s.empty()) // else nothing to parse. { - s = std::string(p_asciz); - if(!s.empty()) // else nothing to parse. + if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(problem)) { - if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(problem)) - { - MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS: " - "Bad format or invalid for the problem config: " - << s); - } - else - { - MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); - pcfg = &fromEnv; - } + MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS: " + "Bad format or invalid for the problem config: " + << s); + } + else + { + MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); + pcfg = &fromEnv; } } } diff --git a/src/solver/conv_asm_5x10u2v2b1.cpp b/src/solver/conv_asm_5x10u2v2b1.cpp index a46f7bd749..0a405953af 100644 --- a/src/solver/conv_asm_5x10u2v2b1.cpp +++ b/src/solver/conv_asm_5x10u2v2b1.cpp @@ -31,7 +31,7 @@ #define WORKAROUND_ISSUE_1146 1 // check asm solver applicability for gfx90a -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_5x10u2v2f1.cpp b/src/solver/conv_asm_5x10u2v2f1.cpp index ebc77c2490..bb33ac57b3 100644 --- a/src/solver/conv_asm_5x10u2v2f1.cpp +++ b/src/solver/conv_asm_5x10u2v2f1.cpp @@ -32,7 +32,7 @@ #define WORKAROUND_ISSUE_1146 1 // check asm solver applicability for gfx90a -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp index 5864e1a92e..cc9cf3e5f8 100644 --- a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp +++ b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp @@ -32,7 +32,7 @@ #define WORKAROUND_ISSUE_1146 1 // check asm solver applicability for gfx90a -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index 79c3046b83..815f71d24a 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -38,8 +38,8 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1, bool, true) namespace miopen { namespace solver { @@ -740,24 +740,19 @@ ConvSolution ConvAsmBwdWrW1x1::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsmBwdWrW1x1 fromEnv; { - std::string s; - const auto p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS{}); - if(p_asciz != nullptr) + const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS{}); + if(!s.empty()) // else nothing to parse. { - s = std::string(p_asciz); - if(!s.empty()) // else nothing to parse. + if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) { - if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) - { - MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS: " - "Bad format or invalid for the problem config: " - << s); - } - else - { - MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); - pcfg = &fromEnv; - } + MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS: " + "Bad format or invalid for the problem config: " + << s); + } + else + { + MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); + pcfg = &fromEnv; } } } diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index f6972648cb..7410babb7a 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -42,8 +42,8 @@ #define WORKAROUND_SWDEV_330460 1 // ConvAsmBwdWrw3x3 has precision issues on MI200 MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3, bool, false) namespace miopen { namespace solver { @@ -474,24 +474,19 @@ ConvSolution ConvAsmBwdWrW3x3::GetSolution(const ExecutionContext& ctx, PerformanceConfigAsmDirect3x3WrW fromEnv; { - std::string s; - const auto p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS{}); - if(p_asciz != nullptr) + const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS{}); + if(!s.empty()) // else nothing to parse. { - s = std::string(p_asciz); - if(!s.empty()) // else nothing to parse. + if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) { - if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) - { - MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS: " - "Bad format or invalid for the problem config: " - << s); - } - else - { - MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); - pcfg = &fromEnv; - } + MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS: " + "Bad format or invalid for the problem config: " + << s); + } + else + { + MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); + pcfg = &fromEnv; } } } diff --git a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp index 207df443a6..4d2f2457a7 100644 --- a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp index 3679f632e6..5b704541a4 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS, bool, true) namespace miopen { namespace solver { @@ -166,596 +166,596 @@ GetImplicitGemmGtcDynamicBwdTunablesList(const ProblemDescription& problem) { "bwd", miopenFloat, 1, 1, 4, 64, 16, 4, 64, 1, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0}, { "bwd", miopenFloat, 1, 0, 4, 64, 16, 4, 64, 1, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0}, { "bwd", miopenFloat, 4, 1, 4, 64, 16, 4, 64, 1, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0}, - }; + }; static const std::vector tunables_fp16 = { - { "bwd", miopenHalf, 4, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 2}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 32, 8, 64, 4, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 32, 8, 64, 4, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 4}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 4}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 4}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 4}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 8, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 8, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {8, 1, 1, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {8, 1, 1, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 256, 8, 4, 64, 4, 2, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 256, 8, 4, 64, 4, 2, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 256, 16, 16, 64, 4, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 256, 16, 16, 64, 4, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 32, 8, 32, 8, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 32, 8, 32, 8, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 128, 8, 8, 32, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 128, 8, 8, 32, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 256, 16, 4, 64, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 256, 16, 4, 64, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {2, 1, 8, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {2, 1, 8, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 16, 8, 64, 16, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 16, 8, 64, 16, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 32, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 32, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 64, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 64, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 8}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 8}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 8, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 8, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 128, 8, 16, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 8, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 128, 8, 16, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 8, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 16, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 16, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 16, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 16, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 8, 1, 1}, {1, 8, 1, 32}, {1, 8, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 8, 1, 1}, {1, 8, 1, 32}, {1, 8, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {8, 1, 1, 1}, {1, 8, 1, 32}, {8, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {8, 1, 1, 1}, {1, 8, 1, 32}, {8, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 8, 1, 32}, {1, 4, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 8, 1, 32}, {1, 4, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 8, 1, 32}, {4, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 8, 1, 32}, {4, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 1, 2}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 1, 2}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {4, 1, 2, 1}, {1, 16, 1, 16}, {4, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {4, 1, 2, 1}, {1, 16, 1, 16}, {4, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 1, 2}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 1, 2}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 16, 1, 16}, {2, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 16, 1, 16}, {2, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 1, 4}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 1, 4}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 4, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {2, 1, 4, 1}, {1, 32, 1, 8}, {2, 1, 4, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 4, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {2, 1, 4, 1}, {1, 32, 1, 8}, {2, 1, 4, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 32, 1, 8}, {1, 1, 4, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 32, 1, 8}, {1, 1, 4, 1}, {1, 32, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 64, 1, 4}, {1, 1, 8, 1}, {1, 64, 1, 4}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 64, 1, 4}, {1, 1, 8, 1}, {1, 64, 1, 4}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 8, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 8}, {1, 1, 1, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 8, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 8}, {1, 1, 1, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 2}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 2}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 2, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 2, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 8, 8, 32, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 2, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 8, 8, 32, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 2, 1}, {1, 8, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 4, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 4, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 2}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 2}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 2, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 2, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 2}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 2}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 2, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 2, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 4}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 4}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 4, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 4, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 4, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 4, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 4, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 4, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 8, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 8, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 4, 1, 8, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 8}, {1, 1, 8, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 8, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 8}, {1, 1, 8, 1}, {1, 16, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 64, 4, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 4}, {1, 1, 1, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 1, 1, 64, 4, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 4}, {1, 1, 1, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, - { "bwd", miopenHalf, 4, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, - { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 4, 1, 4, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0 }, - { "bwd", miopenHalf, 1, 1, 4, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 32, 32, 32, 8, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 8, 64, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 64, 32, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 128, 16, 32, 32, 8, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 2}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 8, 32, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 32, 32, 32, 8, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 64, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 256, 16, 32, 32, 8, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 8, 64, 16, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 64, 16, 64, 16, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 1}, {1, 2, 1, 128}, {1, 16, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 2, 1, 128}, {1, 8, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 2, 1, 128}, {1, 4, 1, 1}, {1, 2, 1, 128}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 8, 32, 32, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 64, 16, 16, 16, 2, 2, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 32, 32, 8, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 128, 32, 16, 16, 16, 2, 2, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 4}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 4, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 8, 16, 64, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 32, 32, 32, 8, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 16, 64, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 256, 16, 32, 32, 8, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 2, 1, 8}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {2, 1, 8, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 32, 8, 64, 4, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 32, 8, 64, 4, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 32, 16, 64, 4, 4, 1, 2, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 16, 1, 2}, {1, 4, 1, 64}, {1, 16, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 2}, {1, 4, 1, 64}, {1, 8, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {8, 1, 2, 1}, {1, 4, 1, 64}, {8, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 4, 1, 2}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {4, 1, 2, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 2}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {2, 1, 2, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 8, 1, 4}, {1, 8, 1, 32}, {1, 8, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 4}, {1, 8, 1, 32}, {1, 4, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {4, 1, 4, 1}, {1, 8, 1, 32}, {4, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 8, 32, 8, 4, 1, 2, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 4}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 1, 4}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 64, 16, 16, 16, 2, 1, 2, 2, {1, 4, 1, 8}, {1, 16, 1, 16}, {1, 4, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 4}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 1, 4}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 2, 1, 8}, {1, 16, 1, 16}, {1, 2, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {2, 1, 8, 1}, {1, 16, 1, 16}, {2, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 16, 32, 8, 4, 1, 2, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 8, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 64, 32, 16, 16, 16, 2, 1, 2, 2, {1, 1, 16, 1}, {1, 32, 1, 8}, {1, 1, 8, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 8, 1, 1}, {1, 4, 1, 64}, {1, 8, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {8, 1, 1, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {8, 1, 1, 1}, {1, 4, 1, 64}, {8, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 2}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 2, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 8, 1, 32}, {1, 4, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 8, 1, 32}, {4, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 8, 8, 32, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 1, 8}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 16, 1, 16}, {1, 2, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 16, 1, 16}, {2, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 8, 32, 4, 2, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 16, 32, 32, 8, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 128, 32, 32, 32, 8, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 32, 1, 8}, {1, 1, 16, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 8}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 256, 8, 4, 64, 4, 2, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 256, 8, 4, 64, 4, 2, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 8, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 256, 16, 4, 64, 4, 2, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 256, 16, 16, 64, 4, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 256, 16, 16, 64, 4, 4, 1, 1, 2, 2, {1, 1, 16, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 2, 1, 4}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {2, 1, 4, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 32, 8, 32, 8, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 32, 8, 32, 8, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 32, 16, 32, 8, 4, 1, 1, 2, 2, {1, 1, 8, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 4, 1, 1}, {1, 4, 1, 64}, {1, 4, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {4, 1, 1, 1}, {1, 4, 1, 64}, {4, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 4, 1, 64}, {1, 2, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 4, 1, 64}, {2, 1, 1, 1}, {1, 4, 1, 64}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 64, 8, 16, 16, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 64, 16, 16, 16, 4, 1, 1, 2, 2, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 4}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 128, 8, 8, 32, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 128, 8, 8, 32, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 4, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 128, 16, 8, 32, 4, 1, 1, 2, 2, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 8, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 256, 16, 4, 64, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 256, 16, 4, 64, 4, 1, 1, 2, 2, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 16, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {2, 1, 8, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 2, 1, 8}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {2, 1, 8, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 16, 8, 64, 16, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 16, 8, 64, 16, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 128, 16, 16, 64, 16, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 32, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 32, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 32, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 2}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 64, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 64, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 2, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 64, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 8}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 8}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 8, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 8, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 128, 8, 16, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 8, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 128, 8, 16, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 8, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 16, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 128, 16, 16, 64, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 16, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 16, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 16, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 16}, {1, 1, 1, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 8, 1, 1}, {1, 8, 1, 32}, {1, 8, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 8, 1, 1}, {1, 8, 1, 32}, {1, 8, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {8, 1, 1, 1}, {1, 8, 1, 32}, {8, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {8, 1, 1, 1}, {1, 8, 1, 32}, {8, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 8, 1, 32}, {1, 4, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 8, 1, 32}, {1, 4, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 8, 1, 32}, {4, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 8, 1, 32}, {4, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 32}, {1, 2, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 32}, {2, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 32}, {1, 1, 1, 1}, {1, 8, 1, 32}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 1, 2}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 1, 2}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {4, 1, 2, 1}, {1, 16, 1, 16}, {4, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 4, 1, 2}, {1, 16, 1, 16}, {1, 4, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {4, 1, 2, 1}, {1, 16, 1, 16}, {4, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 1, 2}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 1, 2}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 16, 1, 16}, {2, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 16, 1, 16}, {1, 2, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 16, 1, 16}, {2, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 16}, {1, 1, 2, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 1, 4}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 1, 4}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 4, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {2, 1, 4, 1}, {1, 32, 1, 8}, {2, 1, 4, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 2, 1, 4}, {1, 32, 1, 8}, {1, 2, 4, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {2, 1, 4, 1}, {1, 32, 1, 8}, {2, 1, 4, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 32, 1, 8}, {1, 1, 4, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 32, 16, 16, 16, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 32, 1, 8}, {1, 1, 4, 1}, {1, 32, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 64, 1, 4}, {1, 1, 8, 1}, {1, 64, 1, 4}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 32, 64, 16, 16, 16, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 64, 1, 4}, {1, 1, 8, 1}, {1, 64, 1, 4}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 16}, {1, 1, 4, 1}, {1, 16, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 8, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 8}, {1, 1, 1, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 8, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 8}, {1, 1, 1, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 16}, {1, 2, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 16}, {2, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 16}, {1, 1, 1, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 8}, {1, 1, 2, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 4, 1, 2}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {4, 1, 2, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 2}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {2, 1, 2, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 0, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 2, 1, 4}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {2, 1, 4, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 8, 32, 8, 4, 1, 2, 1, 1, {1, 1, 4, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 1, 1, 32, 16, 16, 32, 8, 4, 1, 2, 1, 1, {1, 1, 8, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 2}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 8, 1, 16}, {1, 2, 1, 2}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 2, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 8, 1, 16}, {2, 1, 2, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 8, 8, 32, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 2, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 8, 8, 32, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 8, 1, 16}, {1, 1, 2, 1}, {1, 8, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 4, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 16, 1, 8}, {1, 1, 4, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 2}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 2}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 2, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 2, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 2}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 2}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 2, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 2, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 4}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 4}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 4, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 4, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 4, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 4, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 4, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 8, 8, 32, 4, 2, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 4, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 8, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 32, 16, 8, 32, 4, 2, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 8, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 4, 1, 8, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 8}, {1, 1, 8, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 8, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 8}, {1, 1, 8, 1}, {1, 16, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 64, 4, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 4}, {1, 1, 1, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 1, 1, 64, 4, 16, 64, 4, 4, 1, 1, 1, 1, {1, 1, 16, 1}, {1, 16, 1, 4}, {1, 1, 1, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 4, 1, 1}, {1, 4, 1, 16}, {1, 4, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {4, 1, 1, 1}, {1, 4, 1, 16}, {4, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 1}, {1, 4, 1, 16}, {1, 2, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {2, 1, 1, 1}, {1, 4, 1, 16}, {2, 1, 1, 1}, {1, 4, 1, 16}, 0 }, + { "bwd", miopenHalf, 4, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 0, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 1, 2}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 2, 1, 2}, {1, 8, 1, 8}, {1, 2, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {2, 1, 2, 1}, {1, 8, 1, 8}, {2, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 16, 8, 16, 16, 4, 1, 1, 1, 1, {1, 1, 2, 1}, {1, 8, 1, 8}, {1, 1, 2, 1}, {1, 8, 1, 8}, 0 }, + { "bwd", miopenHalf, 4, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 1, 1, 16, 16, 16, 16, 16, 4, 1, 1, 1, 1, {1, 1, 4, 1}, {1, 16, 1, 4}, {1, 1, 4, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 4, 1, 4, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0 }, + { "bwd", miopenHalf, 1, 1, 4, 64, 16, 4, 64, 4, 1, 1, 1, 1, {1, 1, 1, 1}, {1, 16, 1, 4}, {1, 1, 16, 1}, {1, 16, 1, 4}, 0 }, }; // clang-format on diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp index 426131b99b..853aa8f6c9 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp @@ -32,8 +32,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS_NHWC) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS_NHWC, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) #define BWD_MAX_GEMM_K_SPLITS 8 // #define DEBUG_IGEMM_ASM_BWD_NHWC_CHECK_VALID_TILE_LIST diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index 57834af505..38d0143476 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -33,7 +33,7 @@ #define WORKAROUND_SWDEV_306318 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS, bool, true) namespace miopen { namespace solver { @@ -1541,7 +1541,7 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ExecutionContext #if WORKAROUND_SWDEV_306318 if((problem.GetWeightsHeight_() == 1) && (problem.GetWeightsWidth_() == 1) && (problem.GetInChannels_() % 8 != 0)) - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) return false; #endif diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp index 8e789ddc0c..dce670c47d 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp @@ -32,8 +32,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_DLOPS_NCHWC) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_DLOPS_NCHWC, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) // #define DEBUG_IGEMM_ASM_FWD_NCHWC_CHECK_VALID_TILE_LIST diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp index 219e2b9d78..05a1f3ca94 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp @@ -32,8 +32,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS_NHWC) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS_NHWC, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) #define FWD_MAX_GEMM_K_SPLITS 8 // #define DEBUG_IGEMM_ASM_FWD_NHWC_CHECK_VALID_TILE_LIST diff --git a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp index 2cfaeeb2ee..a722fdcd9e 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp @@ -35,8 +35,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS_NHWC) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS_NHWC, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) #define WRW_MAX_GEMM_K_SPLITS 10 #define WORKAROUND_ISSUE_2496 1 diff --git a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp index 45f611fabf..1e0243f2f3 100644 --- a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp @@ -30,8 +30,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp index 19e8b29282..aee5798041 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp @@ -34,7 +34,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp index bef7f4841c..59cb10160a 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_bin_wino3x3U.cpp b/src/solver/conv_bin_wino3x3U.cpp index 6d23f77920..5eda9fc136 100644 --- a/src/solver/conv_bin_wino3x3U.cpp +++ b/src/solver/conv_bin_wino3x3U.cpp @@ -35,7 +35,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_3X3) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_3X3, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_bin_winoRxS.cpp b/src/solver/conv_bin_winoRxS.cpp index 9285108b30..ade87d6814 100644 --- a/src/solver/conv_bin_winoRxS.cpp +++ b/src/solver/conv_bin_winoRxS.cpp @@ -36,12 +36,12 @@ #include /// Global switch -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS, bool, true) /// Sub-switches for testing/debugging -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD, bool, true) /// \todo Detect at runtime and remove this var: -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SRAM_EDC_DISABLED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SRAM_EDC_DISABLED, bool, false) /// \return v rounded up (towards +inf) to the nearest multiple of m. /// Defined for positive values only. diff --git a/src/solver/conv_bin_winoRxS_fused.cpp b/src/solver/conv_bin_winoRxS_fused.cpp index ac2272fcf8..f4411f88a9 100644 --- a/src/solver/conv_bin_winoRxS_fused.cpp +++ b/src/solver/conv_bin_winoRxS_fused.cpp @@ -36,8 +36,8 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_FUSED_WINOGRAD) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_FUSED_WINOGRAD, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS, bool, true) /// \return v rounded up (towards +inf) to the nearest multiple of m. /// Defined for positive values only. @@ -122,7 +122,7 @@ bool ConvBinWinogradRxSFused::IsApplicable(const FusionContext& context, // clang-format off return conv_problem.GetKernelStrideH() == conv_problem.GetKernelStrideW() && conv_problem.GetDilationH() == 1 - && conv_problem.GetDilationW() == 1 + && conv_problem.GetDilationW() == 1 && (static_cast(C) * x * y) <= std::pow(2, 28) && (static_cast(K) * x * y) <= std::pow(2, 28) && (static_cast(K) * OH * OW) <= std::pow(2, 28) @@ -134,10 +134,10 @@ bool ConvBinWinogradRxSFused::IsApplicable(const FusionContext& context, && OH <= std::pow(2, 16) && OW <= std::pow(2, 16) && H <= std::pow(2, 16) - && W <= std::pow(2, 16) - && C <= std::pow(2, 16) - && K <= std::pow(2, 16) - && N <= std::pow(2, 16) + && W <= std::pow(2, 16) + && C <= std::pow(2, 16) + && K <= std::pow(2, 16) + && N <= std::pow(2, 16) && group_count == 1; // clang-format on } diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index 1457c7d309..76dc4b5f66 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -37,7 +37,7 @@ #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_BIAS_ACTIV) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_BIAS_ACTIV, bool, true) #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL // Forward declare CK's function. diff --git a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp index c4c3398fd9..9f94328c61 100644 --- a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp +++ b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp @@ -36,7 +36,7 @@ #define WORKAROUND_SWDEV_411729 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW, bool, true) namespace miopen { namespace solver { @@ -90,7 +90,7 @@ bool ConvCkIgemmFwdV6r1DlopsNchw::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_411729 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #else if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #endif diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index 443ae6a667..0cc73ab0b9 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -119,7 +119,7 @@ bool IsOutputInt32(const ProblemDescription& problem) problem.GetOutDataType() == miopenInt32; } -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS); +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS, bool, false); std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) { diff --git a/src/solver/conv_direct_naive_conv_bwd.cpp b/src/solver/conv_direct_naive_conv_bwd.cpp index ad96b8badb..0b5fd517b8 100644 --- a/src/solver/conv_direct_naive_conv_bwd.cpp +++ b/src/solver/conv_direct_naive_conv_bwd.cpp @@ -29,7 +29,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_direct_naive_conv_fwd.cpp b/src/solver/conv_direct_naive_conv_fwd.cpp index 9fee363c24..afd6132c51 100644 --- a/src/solver/conv_direct_naive_conv_fwd.cpp +++ b/src/solver/conv_direct_naive_conv_fwd.cpp @@ -28,7 +28,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_direct_naive_conv_wrw.cpp b/src/solver/conv_direct_naive_conv_wrw.cpp index eaf7ed7d68..0468565b85 100644 --- a/src/solver/conv_direct_naive_conv_wrw.cpp +++ b/src/solver/conv_direct_naive_conv_wrw.cpp @@ -29,7 +29,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 57d8709c06..8d44dbf0b4 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -36,7 +36,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 54632adc9c..c76e620744 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -36,7 +36,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index dc2f0f6218..b75427b808 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -36,7 +36,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index 3cda7a1cc1..51e2a92376 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -36,7 +36,7 @@ #endif #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp index 241aba33c9..efc0d7e4a2 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp @@ -34,7 +34,7 @@ #define WORKAROUND_ISSUE_309 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1, bool, true) namespace miopen { namespace solver { @@ -663,7 +663,7 @@ bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ExecutionContext& ctx, return false; #if WORKAROUND_ISSUE_309 if(problem.IsBfp16()) - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) return false; #endif diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp index b8f04e2b38..5d0261ec47 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp @@ -31,7 +31,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS, bool, true) namespace miopen { namespace solver { @@ -757,7 +757,7 @@ bool ConvHipImplicitGemmBwdDataV1R1Xdlops::IsApplicable(const ExecutionContext& const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_251757 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) return false; #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp index a58b0df45e..e6ba63b512 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1, bool, true) namespace miopen { namespace solver { @@ -731,7 +731,7 @@ bool ConvHipImplicitGemmBwdDataV4R1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_229277_227616_229195 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 485bdbbc5b..4569b9d13c 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -36,7 +36,7 @@ #define WORKAROUND_SWDEV_329642 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS) namespace miopen { @@ -816,14 +816,14 @@ bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ExecutionContext& #if WORKAROUND_ISSUE_1206 if(problem.IsFp32()) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen:IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif #if WORKAROUND_SWDEV_329642 if(problem.IsBfp16() && ctx.GetStream().GetDeviceName() == "gfx90a") { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen:IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif @@ -912,25 +912,20 @@ ConvSolution ConvHipImplicitGemmBwdDataV4R1Xdlops::GetSolution( PerformanceImplicitGemmBwdDataV4R1Xdlops fromEnv; { - std::string s; - const auto p_asciz = + const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS{}); - if(p_asciz != nullptr) + if(!s.empty()) // else nothing to parse. { - s = std::string(p_asciz); - if(!s.empty()) // else nothing to parse. + if(!fromEnv.Deserialize(s) || !fromEnv.IsReallyValid(problem)) { - if(!fromEnv.Deserialize(s) || !fromEnv.IsReallyValid(problem)) - { - MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS: " - "Bad format or invalid for the problem config: " - << s); - } - else - { - MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); - pcfg = &fromEnv; - } + MIOPEN_LOG_E("MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS: " + "Bad format or invalid for the problem config: " + << s); + } + else + { + MIOPEN_LOG_I("Overridden from env: " << fromEnv.ToString()); + pcfg = &fromEnv; } } } diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index 0a4babf380..5adc5b5078 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -34,8 +34,8 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp index 2f51ebf819..1e5b10506d 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp @@ -31,7 +31,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp index c9e7e9bcb0..b44cbd0fbd 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS, bool, true) /* this fix is for fp16 xdlops vectorizable kernels due to followings, we may revisit this fix after compiler fix: @@ -39,7 +39,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS) 2. LDS write performance(75% impact) */ MIOPEN_DECLARE_ENV_VAR( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM) + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM, bool, false) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp index fb94e30441..0cbeeb028a 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp @@ -41,7 +41,7 @@ /// Fatal compiler errors with ROCm 3.7 on some BF16 configs. #define WORKAROUND_MI100_BF16_FATAL_COMPILER_ERRORS (HIP_PACKAGE_VERSION_FLAT <= 3007999999ULL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS, bool, true) /* this fix is for fp16 xdlops vectorizable kernels due to followings, we may revisit this fix after compiler fix: @@ -49,7 +49,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_ 2. LDS write performance(75% impact) */ MIOPEN_DECLARE_ENV_VAR( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM) + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM, bool, false) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp index 838e39fdae..c88812dda6 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS, bool, true) /* this fix is for fp16 xdlops vectorizable kernels due to followings, we may revisit this fix after compiler fix: @@ -39,7 +39,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS) 2. LDS write performance(75% impact) */ MIOPEN_DECLARE_ENV_VAR( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM) + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM, bool, false) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 9dfad4478d..7f76f088ea 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -36,7 +36,7 @@ #endif #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index 783dabce43..17357df167 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -35,7 +35,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp index 8bdfb5a544..6b53b2eb00 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp index f110bc8695..cda8d70875 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp @@ -35,7 +35,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp index 19c320390d..863748744b 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp @@ -38,7 +38,7 @@ /// Fatal compiler errors with ROCm 3.7 on some BF16 configs. #define WORKAROUND_MI100_BF16_FATAL_COMPILER_ERRORS (HIP_PACKAGE_VERSION_FLAT <= 3007999999ULL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_bwd.cpp b/src/solver/conv_mlir_igemm_bwd.cpp index 012f99b304..f69fce78c8 100644 --- a/src/solver/conv_mlir_igemm_bwd.cpp +++ b/src/solver/conv_mlir_igemm_bwd.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_bwd_xdlops.cpp b/src/solver/conv_mlir_igemm_bwd_xdlops.cpp index 71c1cb9020..63a9e54fa0 100644 --- a/src/solver/conv_mlir_igemm_bwd_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_bwd_xdlops.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_fwd.cpp b/src/solver/conv_mlir_igemm_fwd.cpp index a0e1accc7e..ca039800a3 100644 --- a/src/solver/conv_mlir_igemm_fwd.cpp +++ b/src/solver/conv_mlir_igemm_fwd.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_fwd_xdlops.cpp b/src/solver/conv_mlir_igemm_fwd_xdlops.cpp index 5c26b30c26..c213829678 100644 --- a/src/solver/conv_mlir_igemm_fwd_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_fwd_xdlops.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_wrw.cpp b/src/solver/conv_mlir_igemm_wrw.cpp index 3a56d7eb7f..658cc544db 100644 --- a/src/solver/conv_mlir_igemm_wrw.cpp +++ b/src/solver/conv_mlir_igemm_wrw.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_wrw_xdlops.cpp b/src/solver/conv_mlir_igemm_wrw_xdlops.cpp index 32fd4a0d00..f34daf9b7f 100644 --- a/src/solver/conv_mlir_igemm_wrw_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_wrw_xdlops.cpp @@ -34,7 +34,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index 3bf93a2c48..88a95c7a0c 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -48,16 +48,16 @@ namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX, uint64_t, 0) // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. // These names represent shader parameters, e.g. shader C is batch_size etc and useful for @@ -396,13 +396,13 @@ bool ConvWinograd3x3MultipassWrW if(problem.IsFp16() && (StartsWith(name, "gfx908") || StartsWith(name, "gfx906"))) { if(wino_data_tile == 3 && wino_filter_tile == 4) - if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) + if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{}) || !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) return false; if(wino_data_tile == 3 && wino_filter_tile == 5) - if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) + if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{}) || !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) return false; if(wino_data_tile == 3 && wino_filter_tile == 6) - if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) + if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{}) || !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) return false; } else diff --git a/src/solver/conv_ocl_dir2D11x11.cpp b/src/solver/conv_ocl_dir2D11x11.cpp index d6706c5403..c86cf7e33b 100644 --- a/src/solver/conv_ocl_dir2D11x11.cpp +++ b/src/solver/conv_ocl_dir2D11x11.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp index 4056779ad7..f81351f35e 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1, bool, true) #define TWO_PASSES 1 @@ -48,7 +48,7 @@ bool ConvOclBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_266868 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) return false; #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp index 0dbd8a1930..67ee850762 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp @@ -36,8 +36,8 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2_SEARCH_OPTIMIZED) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2_SEARCH_OPTIMIZED, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp index 2549cf11ad..a662df4c49 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwd.cpp b/src/solver/conv_ocl_dir2Dfwd.cpp index c07c410ba0..a9f22d0e2d 100644 --- a/src/solver/conv_ocl_dir2Dfwd.cpp +++ b/src/solver/conv_ocl_dir2Dfwd.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwd1x1.cpp b/src/solver/conv_ocl_dir2Dfwd1x1.cpp index dde0a23467..7abdb541a8 100644 --- a/src/solver/conv_ocl_dir2Dfwd1x1.cpp +++ b/src/solver/conv_ocl_dir2Dfwd1x1.cpp @@ -33,7 +33,7 @@ #define WORKAROUND_SWDEV_271887 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1, bool, true) namespace miopen { namespace solver { @@ -47,7 +47,7 @@ bool ConvOclDirectFwd1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_271887 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_ocl_dir2Dfwd_fused.cpp b/src/solver/conv_ocl_dir2Dfwd_fused.cpp index 0cc517be91..09fc368ffd 100644 --- a/src/solver/conv_ocl_dir2Dfwd_fused.cpp +++ b/src/solver/conv_ocl_dir2Dfwd_fused.cpp @@ -36,7 +36,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwdgen.cpp b/src/solver/conv_ocl_dir2Dfwdgen.cpp index df9b7ab851..8541bed9a3 100644 --- a/src/solver/conv_ocl_dir2Dfwdgen.cpp +++ b/src/solver/conv_ocl_dir2Dfwdgen.cpp @@ -29,7 +29,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 67486a274c..7960e4e575 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -52,11 +52,11 @@ // we will keep ConvBinWinoRxS<2,3> for group convolutions only. #define WORKAROUND_ISSUE_1681 0 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS) #define MAX_CU_LIMIT 512 @@ -720,25 +720,22 @@ GetPerfConfFromEnv(const ExecutionContext& ctx) { PerformanceConfigConvBinWinogradRxS fromEnv; std::string s; - const char* p_asciz = nullptr; const char* env_name; if(IS2X3) { - p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS{}); + s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS{}); env_name = MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS::value(); } else if(IS3X2) { - p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS{}); + s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS{}); env_name = MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS::value(); } - if(p_asciz == nullptr) + if(s.empty()) return {}; - s = std::string(p_asciz); - if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx)) { MIOPEN_LOG_E(env_name << "Tuning config: Bad value or invalid format: `" << s << '\''); diff --git a/src/solver/conv_winoRxS_fused.cpp b/src/solver/conv_winoRxS_fused.cpp index 9010a9a61a..2422f0c732 100644 --- a/src/solver/conv_winoRxS_fused.cpp +++ b/src/solver/conv_winoRxS_fused.cpp @@ -43,7 +43,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1, bool, true) #define IS3X2 (Winodata == 3 && Winofilter == 2) diff --git a/src/solver/conv_wino_fury_RxS.cpp b/src/solver/conv_wino_fury_RxS.cpp index cf9e3a0ef6..12db5256be 100644 --- a/src/solver/conv_wino_fury_RxS.cpp +++ b/src/solver/conv_wino_fury_RxS.cpp @@ -30,8 +30,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2, bool, true) namespace miopen { namespace solver { diff --git a/src/solver/fft.cpp b/src/solver/fft.cpp index 4302325dab..4bd0590a52 100644 --- a/src/solver/fft.cpp +++ b/src/solver/fft.cpp @@ -41,7 +41,7 @@ namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT, bool, true) static void cgemm_grid(size_t* global_work_size, size_t* local_work_size, diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 3ba7aa5ea7..5da9ca08fa 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -41,7 +41,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) /// MIOpenGEMM issues with ROCm 3.7, most likely related to the /// issues in the OpenCL compiler. Not reproducible in ROCm 4.0. diff --git a/src/solver/gemm_bwd.cpp b/src/solver/gemm_bwd.cpp index 2001cad929..7f6beaa308 100644 --- a/src/solver/gemm_bwd.cpp +++ b/src/solver/gemm_bwd.cpp @@ -40,7 +40,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) #define WORKAROUND_MIOPENGEMM_ISSUE_59 1 diff --git a/src/solver/gemm_wrw.cpp b/src/solver/gemm_wrw.cpp index b7a206b171..1bb9377116 100644 --- a/src/solver/gemm_wrw.cpp +++ b/src/solver/gemm_wrw.cpp @@ -9,7 +9,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) // copy from convolution.cpp // Workaround for issue 1430. diff --git a/src/target_properties.cpp b/src/target_properties.cpp index 61965da0aa..05eba954e5 100644 --- a/src/target_properties.cpp +++ b/src/target_properties.cpp @@ -55,9 +55,9 @@ static std::string GetDeviceNameFromMap(const std::string& in) {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, }; - const char* const p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{}); - if(p_asciz != nullptr && strlen(p_asciz) > 0) - return {p_asciz}; + const auto dev_str = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{}); + if(!dev_str.empty()) + return dev_str; const auto name = in.substr(0, in.find(':')); // str.substr(0, npos) returns str. @@ -76,8 +76,8 @@ const std::size_t TargetProperties::MaxLocalMemorySize = static_cast std::string { - const char* const arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); - if(arch != nullptr && strlen(arch) > 0) + const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + if(!arch.empty()) return arch; return handle->GetDeviceNameImpl(); }(); diff --git a/src/tmp_dir.cpp b/src/tmp_dir.cpp index c90dff2bc2..b2441c184e 100644 --- a/src/tmp_dir.cpp +++ b/src/tmp_dir.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SAVE_TEMP_DIR) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SAVE_TEMP_DIR, bool, false) namespace miopen { From 0222ee67ccc5bfdbc455f41bb37dd51d2f6adad5 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Tue, 7 Nov 2023 17:09:20 -0600 Subject: [PATCH 03/73] draw out env string definitions --- src/binary_cache.cpp | 2 +- src/comgr.cpp | 2 +- src/conv/solver_finders.cpp | 2 +- src/db_path.cpp.in | 4 ++-- src/expanduser.cpp | 2 +- src/find_controls.cpp | 6 +++--- src/hipoc/hipoc_kernel.cpp | 2 +- src/hipoc/hipoc_program.cpp | 2 +- src/include/miopen/convolution.hpp | 2 +- src/include/miopen/env.hpp | 9 --------- src/include/miopen/sqlite_db.hpp | 2 +- src/kernel_cache.cpp | 2 +- src/ocl/convolutionocl.cpp | 2 +- src/ocl/gcn_asm_utils.cpp | 2 +- src/ocl_kernel.cpp | 2 +- src/solver/conv_asm_1x1u.cpp | 2 +- src/solver/conv_asm_1x1u_stride2.cpp | 2 +- src/solver/conv_asm_3x3u.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW1x1.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW3x3.cpp | 2 +- src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 2 +- src/solver/conv_winoRxS.cpp | 4 ++-- src/target_properties.cpp | 4 ++-- 23 files changed, 27 insertions(+), 36 deletions(-) diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index 42db6659e7..a4663ed947 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -47,7 +47,7 @@ namespace miopen { MIOPEN_DECLARE_ENV_VAR(MIOPEN_DISABLE_CACHE, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CUSTOM_CACHE_DIR) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CUSTOM_CACHE_DIR, std::string, "") static boost::filesystem::path ComputeSysCachePath() { diff --git a/src/comgr.cpp b/src/comgr.cpp index 64bdf6b57d..54707c0aab 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -74,7 +74,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_OPTIONS, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT, uint64_t, 0) /// \todo Temporary for debugging: -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT, std::string, "") /// \todo Temporary for debugging: MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN, bool, false) diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index f5a2493836..a8789ec7ea 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -34,7 +34,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT, bool, true) diff --git a/src/db_path.cpp.in b/src/db_path.cpp.in index 17bba349df..1f9dcb0550 100644 --- a/src/db_path.cpp.in +++ b/src/db_path.cpp.in @@ -37,8 +37,8 @@ #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_SYSTEM_DB_PATH) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_USER_DB_PATH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_SYSTEM_DB_PATH, std::string, "") +MIOPEN_DECLARE_ENV_VAR(MIOPEN_USER_DB_PATH, std::string, "") namespace miopen { diff --git a/src/expanduser.cpp b/src/expanduser.cpp index 8407877d86..5bd2c7bed2 100644 --- a/src/expanduser.cpp +++ b/src/expanduser.cpp @@ -81,7 +81,7 @@ #endif #endif // __linux__ -MIOPEN_DECLARE_ENV_VAR(HOME) +MIOPEN_DECLARE_ENV_VAR(HOME, std::string, "") namespace miopen { diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 8c95cf77a1..51928477d0 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -39,9 +39,9 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_FIND_ENFORCE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FIND_ONLY_SOLVER) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_FIND_MODE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_FIND_ENFORCE, std::string, "") +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FIND_ONLY_SOLVER, std::string, "") +MIOPEN_DECLARE_ENV_VAR(MIOPEN_FIND_MODE, std::string, "") namespace miopen { diff --git a/src/hipoc/hipoc_kernel.cpp b/src/hipoc/hipoc_kernel.cpp index e29c591c30..f5ae383bb6 100644 --- a/src/hipoc/hipoc_kernel.cpp +++ b/src/hipoc/hipoc_kernel.cpp @@ -36,7 +36,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") namespace miopen { diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 63e3eecbc1..bcf66bb472 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -55,7 +55,7 @@ /// 4 - "-mcode-object-version=2/3/4" MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_USE_HIPRTC, bool, true) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 9e8f32f915..33e2cbe5d5 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -51,7 +51,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, -1) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, miopenF8RoundingMode_t.miopenF8RoundingModeStochastic) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, std::string, "") namespace miopen { diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 690e002b8f..166deb59ea 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -150,15 +150,6 @@ struct EnvVar { }\ }; -#define MIOPEN_DECLARE_ENV_VAR(name) \ - struct name { \ - using value_type = std::string; \ - static internal::EnvVar& Ref() { \ - static internal::EnvVar var{#name, std::string()}; \ - return var;\ - }\ - }; - /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() /// that returns env var value as only 64-bit ints diff --git a/src/include/miopen/sqlite_db.hpp b/src/include/miopen/sqlite_db.hpp index db32fe7829..c61fd86a63 100644 --- a/src/include/miopen/sqlite_db.hpp +++ b/src/include/miopen/sqlite_db.hpp @@ -60,7 +60,7 @@ class path; namespace miopen { MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_SQL_WAL, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_PERFDB_OVERRIDE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_PERFDB_OVERRIDE, std::string, "") constexpr bool InMemDb = MIOPEN_EMBED_DB; #if MIOPEN_ENABLE_SQLITE_BACKOFF diff --git a/src/kernel_cache.cpp b/src/kernel_cache.cpp index 880307f65c..0de3e481c9 100644 --- a/src/kernel_cache.cpp +++ b/src/kernel_cache.cpp @@ -48,7 +48,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") namespace miopen { diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 8378cebde2..f57c3d597b 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -61,7 +61,7 @@ namespace miopen { MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMMED_FALLBACK, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DUMP_TENSOR_PATH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DUMP_TENSOR_PATH, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK, bool, false) diff --git a/src/ocl/gcn_asm_utils.cpp b/src/ocl/gcn_asm_utils.cpp index e8ee88c1e4..a470eb8847 100644 --- a/src/ocl/gcn_asm_utils.cpp +++ b/src/ocl/gcn_asm_utils.cpp @@ -60,7 +60,7 @@ bool ValidateGcnAssembler() { return true; } /// \todo Try to assemble AMD GCN source? #define WORKAROUND_SWDEV_233338 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH, std::string, "") static const char option_no_co_v3[] = "-mno-code-object-v3"; diff --git a/src/ocl_kernel.cpp b/src/ocl_kernel.cpp index c60d0ab076..4d7004a8b5 100644 --- a/src/ocl_kernel.cpp +++ b/src/ocl_kernel.cpp @@ -30,7 +30,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") static std::string DimToFormattedString(const size_t* dims, size_t count) { diff --git a/src/solver/conv_asm_1x1u.cpp b/src/solver/conv_asm_1x1u.cpp index 37a1c400e1..bef53c6909 100644 --- a/src/solver/conv_asm_1x1u.cpp +++ b/src/solver/conv_asm_1x1u.cpp @@ -41,7 +41,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR, bool, false) diff --git a/src/solver/conv_asm_1x1u_stride2.cpp b/src/solver/conv_asm_1x1u_stride2.cpp index 440fe125fc..9e9601ab9d 100644 --- a/src/solver/conv_asm_1x1u_stride2.cpp +++ b/src/solver/conv_asm_1x1u_stride2.cpp @@ -37,7 +37,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2, bool, true) diff --git a/src/solver/conv_asm_3x3u.cpp b/src/solver/conv_asm_3x3u.cpp index a4f4eccbe3..fc538e9fb1 100644 --- a/src/solver/conv_asm_3x3u.cpp +++ b/src/solver/conv_asm_3x3u.cpp @@ -40,7 +40,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U, bool, true) namespace miopen { diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index 815f71d24a..e7cc003331 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -37,7 +37,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1, bool, true) diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index 7410babb7a..a9d8ceef6e 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -41,7 +41,7 @@ #define MIOPEN_GCN_ASM_DIRECT_3X3WRW_SEARCH_LWC_FIXED 0 #define WORKAROUND_SWDEV_330460 1 // ConvAsmBwdWrw3x3 has precision issues on MI200 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3, bool, false) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 4569b9d13c..0a5789fef5 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -37,7 +37,7 @@ #define WORKAROUND_SWDEV_329642 1 MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS, std::string, "") namespace miopen { namespace solver { diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 7960e4e575..221e813faa 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -53,11 +53,11 @@ #define WORKAROUND_ISSUE_1681 0 MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS, std::string, "") MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS, std::string, "") #define MAX_CU_LIMIT 512 diff --git a/src/target_properties.cpp b/src/target_properties.cpp index 05eba954e5..f7a9e1442a 100644 --- a/src/target_properties.cpp +++ b/src/target_properties.cpp @@ -32,8 +32,8 @@ #define WORKAROUND_ISSUE_1204 1 // ROCm may incorrectly report "sramecc-" for gfx900. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENFORCE_DEVICE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENFORCE_DEVICE, std::string, "") +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") namespace miopen { From cd035e44c5cd4a363a4b64a25d59309f89edb1ac Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 00:04:42 +0000 Subject: [PATCH 04/73] compilation fixes --- src/find_controls.cpp | 6 ++-- src/include/miopen/convolution.hpp | 6 ++-- src/include/miopen/env.hpp | 31 ++++++++++--------- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 4 +-- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 51928477d0..fb5cc0bd8b 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -68,7 +68,7 @@ const char* ToCString(const FindEnforceAction mode) FindEnforceAction GetFindEnforceActionImpl() { - const auto str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); + auto str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); if(str.empty()) return FindEnforceAction::Default_; for(auto& c : str) @@ -104,7 +104,7 @@ boost::optional> GetEnvFindOnlySolverImpl() static_assert(miopen::solver::Id::invalid_value == 0, "miopen::solver::Id::invalid_value == 0"); const auto slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); std::vector res; - if(!slv_str.empty) + if(!slv_str.empty()) { const auto solver_list = miopen::SplitDelim(slv_str, ';'); for(const auto& kinder : solver_list) @@ -180,7 +180,7 @@ std::ostream& operator<<(std::ostream& os, const FindMode::Values& v) FindMode::Values GetFindModeValueImpl2() { - const auto str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); + auto str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); if(str.empty()) return FindMode::Values::Default_; for(auto& c : str) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 33e2cbe5d5..df5b7bf034 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -49,8 +49,8 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, -1) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, miopenF8RoundingMode_t.miopenF8RoundingModeStochastic) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, miopenF8RoundingMode_t::miopenF8RoundingModeStochastic) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, std::string, "") namespace miopen { @@ -124,7 +124,7 @@ struct ConvolutionAttribute public: inline int Get() const { - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); + return miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); } operator bool() const { diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 166deb59ea..1dcfcf34dc 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -31,6 +31,8 @@ #include #include +#include + namespace miopen { namespace internal { @@ -40,7 +42,7 @@ struct ParseEnvVal{}; template <> struct ParseEnvVal { - bool go(const char* vp) { + static bool go(const char* vp) { std::string value_env_str{vp}; for(auto& c : value_env_str) @@ -80,21 +82,25 @@ struct ParseEnvVal { template <> struct ParseEnvVal { - uint64_t go(const char* vp) { + static uint64_t go(const char* vp) { return std::strtoull(vp, nullptr, 0); } }; template <> struct ParseEnvVal { - std::string go(const char* vp) { + static std::string go(const char* vp) { return std::string{vp}; } }; template struct EnvVar { + private: + T value{}; + bool is_default = true; + public: const T& GetValue() const { return value; } @@ -129,10 +135,7 @@ struct EnvVar { { value = def_val; } - - private: - T value{}; - bool is_default = true; + } }; @@ -144,8 +147,8 @@ struct EnvVar { #define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ struct name { \ using value_type = type; \ - static internal::EnvVar& Ref() { \ - static internal::EnvVar var{#name, default_val}; \ + static miopen::internal::EnvVar& Ref() { \ + static miopen::internal::EnvVar var{#name, default_val}; \ return var;\ }\ }; @@ -157,28 +160,28 @@ struct EnvVar { template inline std::string GetStringEnv(EnvVar) { - static_assert(std::is_same_v); + static_assert(std::is_same_v); return EnvVar::Ref().GetValue(); } template inline bool IsEnabled(EnvVar) { - static_assert(std::is_same_v); + static_assert(std::is_same_v); return EnvVar::Ref().GetValue(); } template inline bool IsDisabled(EnvVar) { - static_assert(std::is_same_v); + static_assert(std::is_same_v); return !EnvVar::Ref().GetValue(); } template inline uint64_t Value(EnvVar) { - static_assert(std::is_same_v); + static_assert(std::is_same_v); return EnvVar::Ref().GetValue(); } @@ -192,7 +195,7 @@ inline bool IsDefault(EnvVar) template void UpdateEnvVar(EnvVar, const ValueType& val) { - static_assert(std::is_same_v); + static_assert(std::is_same_v); EnvVar::Ref().UpdateValue(val); } diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 0a5789fef5..623c00ec03 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -816,14 +816,14 @@ bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ExecutionContext& #if WORKAROUND_ISSUE_1206 if(problem.IsFp32()) { - if(miopen:IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif #if WORKAROUND_SWDEV_329642 if(problem.IsBfp16() && ctx.GetStream().GetDeviceName() == "gfx90a") { - if(miopen:IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif From 79446a45468692562e8bae4b473845413f849d88 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 11:57:31 -0600 Subject: [PATCH 05/73] fix --- src/include/miopen/convolution.hpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index df5b7bf034..434f09e4c9 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -48,10 +48,10 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, -1) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, miopenF8RoundingMode_t::miopenF8RoundingModeStochastic) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, std::string, "") +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, uint64_t, 0) namespace miopen { @@ -71,11 +71,14 @@ struct ConvolutionAttribute { class Gfx90aFp16alt { + int value = -1; friend struct ConvolutionAttribute; // For direct r/w. inline int Get() const { - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{}); + if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{})) + return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{}); + return value; } public: @@ -96,21 +99,23 @@ struct ConvolutionAttribute std::uniform_int_distribution distribution(0, 0xFFFFFFFF); return distribution(gen); } + miopenF8RoundingMode_t rounding_mode = miopenF8RoundingModeStochastic; uint32_t seed = InitSeed(); friend struct ConvolutionAttribute; inline miopenF8RoundingMode_t Get() const { - return static_cast( - miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); + if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) + return static_cast( + miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); + return rounding_mode; } inline uint32_t GetSeed() const { // assert(rounding_mode == miopenF8RoundingModeStochastic); - const auto str = miopen::GetStringEnv(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{}); - if(!str.empty()) - return stoul(str); + if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{})) + return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{}); return seed; } @@ -119,12 +124,15 @@ struct ConvolutionAttribute class Deterministic { + int value = 0; friend struct ConvolutionAttribute; public: inline int Get() const { - return miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); + if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) + return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); + return value; } operator bool() const { From 9d4d4f5eab2f97f8164e748ba910e861bb6b6a24 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 12:06:31 -0600 Subject: [PATCH 06/73] fix --- src/include/miopen/convolution.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 434f09e4c9..2973dd80ef 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -49,7 +49,7 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, uint64_t, 0) @@ -131,7 +131,7 @@ struct ConvolutionAttribute inline int Get() const { if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); + return miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); return value; } operator bool() const From 541eea55b12b73873327efea0bb2cd1ef834c801 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 12:11:46 -0600 Subject: [PATCH 07/73] update driver envs --- driver/conv_driver.hpp | 6 +++--- driver/random.hpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index 5d627ba1a5..f9a8dfeaa3 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -78,9 +78,9 @@ miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, int f #define WORKAROUND_ISSUE_2176 1 // https://github.com/AMDComputeLibraries/MLOpen/issues/2176 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_PAD_BUFFERS_2M) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_USE_GPU_REFERENCE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_SUBNORM_PERCENTAGE) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_PAD_BUFFERS_2M, bool, false) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_USE_GPU_REFERENCE, bool, true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_SUBNORM_PERCENTAGE, uint64_t, 0) // Support in the library discontinued, but left in the driver // for reference in the future. diff --git a/driver/random.hpp b/driver/random.hpp index b3be81f56e..19757c1830 100644 --- a/driver/random.hpp +++ b/driver/random.hpp @@ -5,7 +5,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DRIVER_PRNG_SEED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DRIVER_PRNG_SEED, uint64_t, 12345678) namespace prng { namespace details { using glibc_gen = std::linear_congruential_engine; @@ -13,7 +13,7 @@ using glibc_gen = std::linear_congruential_engine Date: Wed, 8 Nov 2023 12:26:37 -0600 Subject: [PATCH 08/73] env for test folder --- test/driver.hpp | 8 ++++---- test/gpu_conv.hpp | 2 +- test/gtest/conv_embed_db.cpp | 6 +++--- test/gtest/conv_hip_igemm_xdlops.cpp | 6 +++--- test/gtest/conv_igemm_dynamic.cpp | 8 ++++---- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/test/driver.hpp b/test/driver.hpp index 201e4a367d..6d1ac397a9 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -100,7 +100,7 @@ auto cpu_async(V& v, Ts&&... xs) -> std::future return std::async(std::launch::deferred, [&] { return v.cpu(xs...); }); } -MIOPEN_DECLARE_ENV_VAR(MIOPEN_VERIFY_CACHE_PATH) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_VERIFY_CACHE_PATH, std::string, "") struct test_driver { @@ -151,11 +151,11 @@ struct test_driver static std::string compute_cache_path() { - auto e = miopen::GetStringEnv(MIOPEN_VERIFY_CACHE_PATH{}); - if(e == nullptr) + auto s = miopen::GetStringEnv(MIOPEN_VERIFY_CACHE_PATH{}); + if(s.empty()) return "~/.cache/miopen/tests"; else - return e; + return s; } std::string program_name; diff --git a/test/gpu_conv.hpp b/test/gpu_conv.hpp index 3165ace5fc..c0e1f34bad 100644 --- a/test/gpu_conv.hpp +++ b/test/gpu_conv.hpp @@ -42,7 +42,7 @@ extern bool LoggingQuiet; // NOLINT (cppcoreguidelines-avoid-non-const-gl } // namespace debug } // namespace miopen -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF, bool, false) struct AutoPrepareForGpuReference { diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index 36d1e28123..3c1a8d6ca7 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -32,13 +32,13 @@ #include "../conv2d.hpp" #include "get_handle.hpp" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_FLOAT_ARG, std::string, "") static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); - const char* const p_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); - return (p_envVar != nullptr && std::strcmp(p_envVar, float_arg) == 0); + const auto s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + return (!s_envVar.empty() && std::strcmp(s_envVar.c_str(), float_arg) == 0); } void GetArgs(const std::string& param, std::vector& tokens) diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 14e1738a35..0cd2fefbf2 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -32,13 +32,13 @@ #include "../conv2d.hpp" #include "get_handle.hpp" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_FLOAT_ARG, std::string, "") static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); - const char* const p_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); - return (p_envVar != nullptr && std::strcmp(p_envVar, float_arg) == 0); + const auto s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + return (!s_envVar.empty() && std::strcmp(s_envVar.c_str(), float_arg) == 0); } void GetArgs(const std::string& param, std::vector& tokens) diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 6e2863eb8f..01dc4bb8be 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -33,7 +33,7 @@ using TestCase = std::tuple, std::string>; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_GPU_XNACK_ENABLED) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_GPU_XNACK_ENABLED, bool, false) static bool SkipTest(void) { return miopen::IsEnabled(MIOPEN_TEST_GPU_XNACK_ENABLED{}); } @@ -143,17 +143,17 @@ std::vector GetTestCases(const std::string& precision) const std::vector test_cases = { // clang-format off -#if CODECOV_TEST +#if CODECOV_TEST TestCase{env, precision + v + " --input 32 32 17 17 --weights 32 32 1 7 --pads_strides_dilations 0 3 1 1 1 1" + dis_bk_data + dis_bk_wei + dis_vali}, TestCase{env_wrw, precision + v + " --input 64 64 28 28 --weights 32 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_data + dis_vali}, TestCase{env_bwd, precision + v + " --input 64 64 28 28 --weights 16 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_wei + dis_vali}, -#else +#else TestCase{env, precision + v + " --input 16 16 56 56 --weights 64 16 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_bk_data + dis_bk_wei}, TestCase{env, precision + v + " --input 16 64 34 34 --weights 64 64 3 3 --pads_strides_dilations 0 0 1 1 1 1" + dis_bk_data + dis_bk_wei}, TestCase{env, precision + v + " --input 32 32 17 17 --weights 32 32 1 7 --pads_strides_dilations 0 3 1 1 1 1" + dis_bk_data + dis_bk_wei}, TestCase{env_1x1, precision + v + " --input 16 384 8 8 --weights 64 384 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_bk_data + dis_bk_wei}, TestCase{env_wrw, precision + v + " --input 64 64 28 28 --weights 32 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_data}, - TestCase{env_wrw, precision + v + " --input 16 128 36 36 --weights 32 128 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_data}, + TestCase{env_wrw, precision + v + " --input 16 128 36 36 --weights 32 128 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_data}, TestCase{env_bwd, precision + v + " --input 64 64 28 28 --weights 16 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_wei}, TestCase{env_bwd, precision + v + " --input 16 128 36 36 --weights 32 128 1 1 --pads_strides_dilations 0 0 1 1 1 1" + dis_fwd + dis_bk_wei}, #endif From 438aa343f1408ebe9af8d0ea599b15078bfe39dd Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 18:29:12 +0000 Subject: [PATCH 09/73] compilation fixes --- src/binary_cache.cpp | 2 +- src/logger.cpp | 2 +- src/ocl/convolutionocl.cpp | 2 +- src/solver/conv_winoRxS.cpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index a4663ed947..af39aa8118 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -66,7 +66,7 @@ static boost::filesystem::path ComputeUserCachePath() /// If MIOPEN_CUSTOM_CACHE_DIR is set in the environment, then /// use exactly that path. const auto custom = miopen::GetStringEnv(MIOPEN_CUSTOM_CACHE_DIR{}); - if(!custom.empty() && strlen(custom) > 0) + if(!custom.empty()) { p = ExpandUser(custom); } diff --git a/src/logger.cpp b/src/logger.cpp index af78cea297..7ed0f0e938 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -58,7 +58,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_MPMT, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME, bool, false) /// See LoggingLevel in the header. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_LOG_LEVEL, uint64_t, LoggingLevel::Default) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_LOG_LEVEL, uint64_t, 0) /// Enable logging of function calls to ROCTX api. MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ROCTX, bool, false) diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index f57c3d597b..0b8a5b6454 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -58,7 +58,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool true) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMMED_FALLBACK, bool, true) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DUMP_TENSOR_PATH, std::string, "") diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 13187bf5fb..b3cc3362e7 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -756,12 +756,12 @@ GetPerfConfFromEnv(const ExecutionContext& ctx) if(IS2X3) { s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS{}); - env_name = MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS::value(); + env_name = "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS"; } else if(IS3X2) { s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS{}); - env_name = MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS::value(); + env_name = "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS"; } if(s.empty()) From fb292b368bc7a5844549f8f812fd04245f2195fe Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 20:46:31 +0000 Subject: [PATCH 10/73] format --- src/execution_context.cpp | 3 +- src/generic_search.cpp | 13 +- src/include/miopen/convolution.hpp | 2 +- src/include/miopen/env.hpp | 174 +++++++++--------- .../miopen/generic_search_controls.hpp | 13 +- src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp | 3 +- .../conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp | 3 +- .../conv_hip_implicit_gemm_bwd_v1r1.cpp | 3 +- ...conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp | 3 +- .../conv_hip_implicit_gemm_bwd_v4r1.cpp | 3 +- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 10 +- src/solver/conv_multipass_wino3x3WrW.cpp | 9 +- src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp | 3 +- src/solver/conv_ocl_dir2Dfwd1x1.cpp | 3 +- src/solver/conv_winoRxS.cpp | 4 +- 15 files changed, 134 insertions(+), 115 deletions(-) diff --git a/src/execution_context.cpp b/src/execution_context.cpp index e6cad7862f..296308c431 100644 --- a/src/execution_context.cpp +++ b/src/execution_context.cpp @@ -201,7 +201,8 @@ bool IsHipKernelsEnabled() #if MIOPEN_USE_HIP_KERNELS return !miopen::IsDisabled(MIOPEN_DEBUG_HIP_KERNELS{}); #else - return !miopen::IsDefault(MIOPEN_DEBUG_HIP_KERNELS{}) && miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); + return !miopen::IsDefault(MIOPEN_DEBUG_HIP_KERNELS{}) && + miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); #endif } diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 5fa6912973..4bdc047b34 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -34,22 +34,15 @@ namespace miopen { namespace solver { -std::size_t GetTuningIterationsMax() -{ - return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}); -} +std::size_t GetTuningIterationsMax() { return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}); } std::chrono::milliseconds GetTuningTimeMax() { - static const auto res = - std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})}; + static const auto res = std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})}; return res; } -std::size_t GetTuningThreadsMax() -{ - return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}); -} +std::size_t GetTuningThreadsMax() { return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}); } } // namespace solver } // namespace miopen diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 2973dd80ef..79b0a624ca 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -107,7 +107,7 @@ struct ConvolutionAttribute { if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) return static_cast( - miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); + miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); return rounding_mode; } diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 1dcfcf34dc..6193b84d2c 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -38,120 +38,126 @@ namespace miopen { namespace internal { template -struct ParseEnvVal{}; +struct ParseEnvVal +{ +}; template <> -struct ParseEnvVal { - static bool go(const char* vp) { - std::string value_env_str{vp}; - - for(auto& c : value_env_str) +struct ParseEnvVal +{ + static bool go(const char* vp) { - if(std::isalpha(c) != 0) + std::string value_env_str{vp}; + + for(auto& c : value_env_str) { - c = std::tolower(static_cast(c)); + if(std::isalpha(c) != 0) + { + c = std::tolower(static_cast(c)); + } } - } - if (std::strcmp(value_env_str.c_str(), "disable") == 0 || - std::strcmp(value_env_str.c_str(), "disabled") == 0 || - std::strcmp(value_env_str.c_str(), "0") == 0 || - std::strcmp(value_env_str.c_str(), "no") == 0 || - std::strcmp(value_env_str.c_str(), "off") == 0 || - std::strcmp(value_env_str.c_str(), "false") == 0) - { - return false; - } - else if (std::strcmp(value_env_str.c_str(), "enable") == 0 || - std::strcmp(value_env_str.c_str(), "enabled") == 0 || - std::strcmp(value_env_str.c_str(), "1") == 0 || - std::strcmp(value_env_str.c_str(), "yes") == 0 || - std::strcmp(value_env_str.c_str(), "on") == 0 || - std::strcmp(value_env_str.c_str(), "true") == 0) - { - return true; - } - else - { - MIOPEN_THROW(miopenStatusInvalidValue, "Invalid value for env variable"); - } + if(std::strcmp(value_env_str.c_str(), "disable") == 0 || + std::strcmp(value_env_str.c_str(), "disabled") == 0 || + std::strcmp(value_env_str.c_str(), "0") == 0 || + std::strcmp(value_env_str.c_str(), "no") == 0 || + std::strcmp(value_env_str.c_str(), "off") == 0 || + std::strcmp(value_env_str.c_str(), "false") == 0) + { + return false; + } + else if(std::strcmp(value_env_str.c_str(), "enable") == 0 || + std::strcmp(value_env_str.c_str(), "enabled") == 0 || + std::strcmp(value_env_str.c_str(), "1") == 0 || + std::strcmp(value_env_str.c_str(), "yes") == 0 || + std::strcmp(value_env_str.c_str(), "on") == 0 || + std::strcmp(value_env_str.c_str(), "true") == 0) + { + return true; + } + else + { + MIOPEN_THROW(miopenStatusInvalidValue, "Invalid value for env variable"); + } - return false; // shouldn't reach here - } + return false; // shouldn't reach here + } }; template <> -struct ParseEnvVal { - static uint64_t go(const char* vp) { - return std::strtoull(vp, nullptr, 0); - } +struct ParseEnvVal +{ + static uint64_t go(const char* vp) { return std::strtoull(vp, nullptr, 0); } }; template <> -struct ParseEnvVal { - static std::string go(const char* vp) { - return std::string{vp}; - } +struct ParseEnvVal +{ + static std::string go(const char* vp) { return std::string{vp}; } }; template -struct EnvVar { - private: +struct EnvVar +{ +private: T value{}; bool is_default = true; - public: - const T& GetValue() const { - return value; - } - - const bool IsDefault() const { - return is_default; - } +public: + const T& GetValue() const { return value; } - void UpdateValue(const T& val) { - is_default = false; - value = val; - } + const bool IsDefault() const { return is_default; } - explicit EnvVar(const char* const name, const T& def_val) { - const char* vp = std::getenv(name); - if (vp) // a value was provided + void UpdateValue(const T& val) { - is_default = false; - if constexpr (std::is_same_v) - { - value = ParseEnvVal::go(vp); - } - else if constexpr (std::is_same_v) { - value = ParseEnvVal::go(vp); - } else if constexpr (std::is_same_v) { - value = ParseEnvVal::go(vp); - } else { - value = ParseEnvVal::go(vp); // should cause compile error - } + is_default = false; + value = val; } - else // no value provided, use default value + + explicit EnvVar(const char* const name, const T& def_val) { - value = def_val; + const char* vp = std::getenv(name); + if(vp) // a value was provided + { + is_default = false; + if constexpr(std::is_same_v) + { + value = ParseEnvVal::go(vp); + } + else if constexpr(std::is_same_v) + { + value = ParseEnvVal::go(vp); + } + else if constexpr(std::is_same_v) + { + value = ParseEnvVal::go(vp); + } + else + { + value = ParseEnvVal::go(vp); // should cause compile error + } + } + else // no value provided, use default value + { + value = def_val; + } } - } }; - -}// end namespace internal - +} // end namespace internal // static inside function hides the variable and provides // thread-safety/locking -#define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ - struct name { \ - using value_type = type; \ - static miopen::internal::EnvVar& Ref() { \ - static miopen::internal::EnvVar var{#name, default_val}; \ - return var;\ - }\ - }; +#define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ + struct name \ + { \ + using value_type = type; \ + static miopen::internal::EnvVar& Ref() \ + { \ + static miopen::internal::EnvVar var{#name, default_val}; \ + return var; \ + } \ + }; /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 17c6522871..282674139b 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -31,12 +31,17 @@ namespace miopen { namespace solver { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX, uint64_t, std::numeric_limits::max()) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TUNING_TIME_MS_MAX, uint64_t, std::chrono::duration_cast(std::chrono::hours{2}).count()) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX, + uint64_t, + std::numeric_limits::max()) +MIOPEN_DECLARE_ENV_VAR( + MIOPEN_TUNING_TIME_MS_MAX, + uint64_t, + std::chrono::duration_cast(std::chrono::hours{2}).count()) #if MIOPEN_USE_COMGR - const auto def_max = 1; // COMGR is not parallelizable +const auto def_max = 1; // COMGR is not parallelizable #else - const int def_max = std::thread::hardware_concurrency() / 2; +const int def_max = std::thread::hardware_concurrency() / 2; #endif MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, def_max) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index 38d0143476..7d13fee721 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -1541,7 +1541,8 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ExecutionContext #if WORKAROUND_SWDEV_306318 if((problem.GetWeightsHeight_() == 1) && (problem.GetWeightsWidth_() == 1) && (problem.GetInChannels_() % 8 != 0)) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) return false; #endif diff --git a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp index 9f94328c61..c149fa1dd0 100644 --- a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp +++ b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp @@ -90,7 +90,8 @@ bool ConvCkIgemmFwdV6r1DlopsNchw::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_411729 - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #else if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #endif diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp index efc0d7e4a2..7ee6c0ab92 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp @@ -663,7 +663,8 @@ bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ExecutionContext& ctx, return false; #if WORKAROUND_ISSUE_309 if(problem.IsBfp16()) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) return false; #endif diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp index 5d0261ec47..84961dc7dc 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp @@ -757,7 +757,8 @@ bool ConvHipImplicitGemmBwdDataV1R1Xdlops::IsApplicable(const ExecutionContext& const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_251757 - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) return false; #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp index e6ba63b512..781b8a3a5d 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp @@ -731,7 +731,8 @@ bool ConvHipImplicitGemmBwdDataV4R1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_229277_227616_229195 - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 623c00ec03..c6bd85505b 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -37,7 +37,9 @@ #define WORKAROUND_SWDEV_329642 1 MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS, std::string, "") +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS, + std::string, + "") namespace miopen { namespace solver { @@ -816,14 +818,16 @@ bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ExecutionContext& #if WORKAROUND_ISSUE_1206 if(problem.IsFp32()) { - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif #if WORKAROUND_SWDEV_329642 if(problem.IsBfp16() && ctx.GetStream().GetDeviceName() == "gfx90a") { - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index 88a95c7a0c..48de70e20d 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -396,13 +396,16 @@ bool ConvWinograd3x3MultipassWrW if(problem.IsFp16() && (StartsWith(name, "gfx908") || StartsWith(name, "gfx906"))) { if(wino_data_tile == 3 && wino_filter_tile == 4) - if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{}) || !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) + if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) return false; if(wino_data_tile == 3 && wino_filter_tile == 5) - if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{}) || !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) + if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) return false; if(wino_data_tile == 3 && wino_filter_tile == 6) - if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{}) || !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) + if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) return false; } else diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp index f81351f35e..a0adb34794 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp @@ -48,7 +48,8 @@ bool ConvOclBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_266868 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) return false; #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) diff --git a/src/solver/conv_ocl_dir2Dfwd1x1.cpp b/src/solver/conv_ocl_dir2Dfwd1x1.cpp index 7abdb541a8..92e020efb0 100644 --- a/src/solver/conv_ocl_dir2Dfwd1x1.cpp +++ b/src/solver/conv_ocl_dir2Dfwd1x1.cpp @@ -47,7 +47,8 @@ bool ConvOclDirectFwd1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_271887 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{}) || !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) + if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{}) || + !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index b3cc3362e7..51de839c03 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -755,12 +755,12 @@ GetPerfConfFromEnv(const ExecutionContext& ctx) if(IS2X3) { - s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS{}); + s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS{}); env_name = "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS"; } else if(IS3X2) { - s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS{}); + s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS{}); env_name = "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS"; } From 44a174fd515be4ca2861f9beabed8732fc233663 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 22:12:05 +0000 Subject: [PATCH 11/73] tidy --- src/include/miopen/convolution.hpp | 3 ++- src/include/miopen/env.hpp | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 79b0a624ca..aaf0c1ba4b 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -131,7 +131,8 @@ struct ConvolutionAttribute inline int Get() const { if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) - return miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{}); + return static_cast( + miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})); return value; } operator bool() const diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 6193b84d2c..6bfc5beed7 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -106,7 +106,7 @@ struct EnvVar public: const T& GetValue() const { return value; } - const bool IsDefault() const { return is_default; } + bool IsDefault() const { return is_default; } void UpdateValue(const T& val) { @@ -116,8 +116,9 @@ struct EnvVar explicit EnvVar(const char* const name, const T& def_val) { + // NOLINTNEXTLINE (concurrency-mt-unsafe) const char* vp = std::getenv(name); - if(vp) // a value was provided + if(vp != nullptr) // a value was provided { is_default = false; if constexpr(std::is_same_v) From 473b17ceaf8a6d6f9be43af4bbf24cce6f9f0665 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 22:52:52 +0000 Subject: [PATCH 12/73] quality improvements for env.h --- src/include/miopen/env.hpp | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 6bfc5beed7..d27fca31e5 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -121,22 +121,7 @@ struct EnvVar if(vp != nullptr) // a value was provided { is_default = false; - if constexpr(std::is_same_v) - { - value = ParseEnvVal::go(vp); - } - else if constexpr(std::is_same_v) - { - value = ParseEnvVal::go(vp); - } - else if constexpr(std::is_same_v) - { - value = ParseEnvVal::go(vp); - } - else - { - value = ParseEnvVal::go(vp); // should cause compile error - } + value = ParseEnvVal::go(vp); } else // no value provided, use default value { @@ -165,7 +150,7 @@ struct EnvVar /// that returns env var value as only 64-bit ints template -inline std::string GetStringEnv(EnvVar) +inline const std::string& GetStringEnv(EnvVar) { static_assert(std::is_same_v); return EnvVar::Ref().GetValue(); From d33b9a94422453e3beb6164f191d6d0068245dc9 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 17:07:37 -0600 Subject: [PATCH 13/73] convert to const string reference --- src/binary_cache.cpp | 2 +- src/comgr.cpp | 2 +- src/conv/solver_finders.cpp | 2 +- src/find_controls.cpp | 2 +- src/hipoc/hipoc_kernel.cpp | 2 +- src/hipoc/hipoc_program.cpp | 4 ++-- src/include/miopen/sqlite_db.hpp | 2 +- src/kernel_cache.cpp | 2 +- src/ocl/convolutionocl.cpp | 6 +++--- src/ocl/gcn_asm_utils.cpp | 2 +- src/ocl_kernel.cpp | 2 +- src/solver/conv_asm_1x1u.cpp | 2 +- src/solver/conv_asm_1x1u_stride2.cpp | 2 +- src/solver/conv_asm_3x3u.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW1x1.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW3x3.cpp | 2 +- src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 2 +- src/solver/conv_winoRxS.cpp | 2 +- src/target_properties.cpp | 4 ++-- 19 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index af39aa8118..17b9b0ab46 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -65,7 +65,7 @@ static boost::filesystem::path ComputeUserCachePath() boost::filesystem::path p; /// If MIOPEN_CUSTOM_CACHE_DIR is set in the environment, then /// use exactly that path. - const auto custom = miopen::GetStringEnv(MIOPEN_CUSTOM_CACHE_DIR{}); + const auto& custom = miopen::GetStringEnv(MIOPEN_CUSTOM_CACHE_DIR{}); if(!custom.empty()) { p = ExpandUser(custom); diff --git a/src/comgr.cpp b/src/comgr.cpp index 54707c0aab..fddfc2a167 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -764,7 +764,7 @@ static void SetIsaName(const ActionInfo& action, static std::string GetDebugCompilerOptionsInsert() { - const auto p = miopen::GetStringEnv(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT{}); + const auto& p = miopen::GetStringEnv(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT{}); return {p}; } diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index a8789ec7ea..ec75e7b642 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -209,7 +209,7 @@ static void EvaluateInvokers(Handle& handle, const AnyInvokeParams& invoke_ctx, DbRecord& record) { - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(!arch.empty()) return; diff --git a/src/find_controls.cpp b/src/find_controls.cpp index fb5cc0bd8b..a90fe1d80b 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -102,7 +102,7 @@ FindEnforceAction GetFindEnforceAction() boost::optional> GetEnvFindOnlySolverImpl() { static_assert(miopen::solver::Id::invalid_value == 0, "miopen::solver::Id::invalid_value == 0"); - const auto slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); + const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); std::vector res; if(!slv_str.empty()) { diff --git a/src/hipoc/hipoc_kernel.cpp b/src/hipoc/hipoc_kernel.cpp index f5ae383bb6..1ed7278e5b 100644 --- a/src/hipoc/hipoc_kernel.cpp +++ b/src/hipoc/hipoc_kernel.cpp @@ -79,7 +79,7 @@ void HIPOCKernelInvoke::run(void* args, std::size_t size) const stop = make_hip_event(); } - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(!arch.empty()) { MIOPEN_THROW("MIOPEN_DEVICE_ARCH used, escaping launching kernel"); diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index bcf66bb472..2e829e0aaf 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -190,7 +190,7 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, const std::string& blob) : program(program_name) ///, module(CreateModuleInMem(blob)) { - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(!arch.empty()) return; module = CreateModuleInMem(blob); @@ -210,7 +210,7 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, } else { - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(arch.empty()) { module = CreateModule(hsaco_file); diff --git a/src/include/miopen/sqlite_db.hpp b/src/include/miopen/sqlite_db.hpp index c61fd86a63..2794d7fdf6 100644 --- a/src/include/miopen/sqlite_db.hpp +++ b/src/include/miopen/sqlite_db.hpp @@ -443,7 +443,7 @@ class SQLitePerfDb : public SQLiteBase if(dbInvalid) return boost::none; - const auto pdb_ovr = miopen::GetStringEnv(MIOPEN_DEBUG_PERFDB_OVERRIDE{}); + const auto& pdb_ovr = miopen::GetStringEnv(MIOPEN_DEBUG_PERFDB_OVERRIDE{}); if(!pdb_ovr.empty()) { MIOPEN_LOG_I2("overriding tuning params with: " << pdb_ovr); diff --git a/src/kernel_cache.cpp b/src/kernel_cache.cpp index 0de3e481c9..a171320ad9 100644 --- a/src/kernel_cache.cpp +++ b/src/kernel_cache.cpp @@ -124,7 +124,7 @@ Kernel KernelCache::AddKernel(const Handle& h, } Kernel kernel{}; - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(!arch.empty()) { kernel = Kernel{program, kernel_name}; diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 0b8a5b6454..3c3727323d 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -387,7 +387,7 @@ static void ConvForwardCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.yDesc, tensors.y); - const auto file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + const auto& file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); if(flag && !file_name.empty()) { DumpTensorToFileFromDevice(handle, tensors.xDesc, tensors.x, file_name + "_x.bin"); @@ -912,7 +912,7 @@ static void ConvBwdCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.dxDesc, tensors.dx); - const auto file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + const auto& file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); if(flag && !file_name.empty()) { DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name + "_dy.bin"); @@ -1113,7 +1113,7 @@ static void ConvWrwCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.dwDesc, tensors.dw); - const auto file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + const auto& file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); if(flag && !file_name.empty()) { DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name + "_dy.bin"); diff --git a/src/ocl/gcn_asm_utils.cpp b/src/ocl/gcn_asm_utils.cpp index a470eb8847..dfbdc58988 100644 --- a/src/ocl/gcn_asm_utils.cpp +++ b/src/ocl/gcn_asm_utils.cpp @@ -70,7 +70,7 @@ static std::string CleanupPath(const char* p); std::string GetGcnAssemblerPathImpl() { - const auto asm_path_env_p = miopen::GetStringEnv(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH{}); + const auto& asm_path_env_p = miopen::GetStringEnv(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH{}); if(!asm_path_env_p.empty()) { return CleanupPath(asm_path_env_p); diff --git a/src/ocl_kernel.cpp b/src/ocl_kernel.cpp index 4d7004a8b5..0d9b523d4f 100644 --- a/src/ocl_kernel.cpp +++ b/src/ocl_kernel.cpp @@ -58,7 +58,7 @@ void OCLKernelInvoke::run() const MIOPEN_HANDLE_LOCK - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(!arch.empty()) { MIOPEN_THROW("MIOPEN_DEVICE_ARCH used, escaping launching kernel"); diff --git a/src/solver/conv_asm_1x1u.cpp b/src/solver/conv_asm_1x1u.cpp index bef53c6909..df970ce5ca 100644 --- a/src/solver/conv_asm_1x1u.cpp +++ b/src/solver/conv_asm_1x1u.cpp @@ -827,7 +827,7 @@ ConvSolution ConvAsm1x1U::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm1x1U fromEnv; { - const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS{}); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) diff --git a/src/solver/conv_asm_1x1u_stride2.cpp b/src/solver/conv_asm_1x1u_stride2.cpp index 9e9601ab9d..56ef938bed 100644 --- a/src/solver/conv_asm_1x1u_stride2.cpp +++ b/src/solver/conv_asm_1x1u_stride2.cpp @@ -609,7 +609,7 @@ ConvSolution ConvAsm1x1UV2::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm1x1UV2 fromEnv; { - const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS{}); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) diff --git a/src/solver/conv_asm_3x3u.cpp b/src/solver/conv_asm_3x3u.cpp index fc538e9fb1..35518c1cd6 100644 --- a/src/solver/conv_asm_3x3u.cpp +++ b/src/solver/conv_asm_3x3u.cpp @@ -254,7 +254,7 @@ ConvSolution ConvAsm3x3U::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm3x3U fromEnv; { - const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS{}); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(problem)) diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index e7cc003331..054e757c8e 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -740,7 +740,7 @@ ConvSolution ConvAsmBwdWrW1x1::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsmBwdWrW1x1 fromEnv; { - const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS{}); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index a9d8ceef6e..4fa440f299 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -474,7 +474,7 @@ ConvSolution ConvAsmBwdWrW3x3::GetSolution(const ExecutionContext& ctx, PerformanceConfigAsmDirect3x3WrW fromEnv; { - const auto s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS{}); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index c6bd85505b..2606c91b6f 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -916,7 +916,7 @@ ConvSolution ConvHipImplicitGemmBwdDataV4R1Xdlops::GetSolution( PerformanceImplicitGemmBwdDataV4R1Xdlops fromEnv; { - const auto s = + const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS{}); if(!s.empty()) // else nothing to parse. { diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 51de839c03..1471aebd8e 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -750,7 +750,7 @@ static inline boost::optional GetPerfConfFromEnv(const ExecutionContext& ctx) { PerformanceConfigConvBinWinogradRxS fromEnv; - std::string s; + const std::string& s; const char* env_name; if(IS2X3) diff --git a/src/target_properties.cpp b/src/target_properties.cpp index f7a9e1442a..b27d2d9de3 100644 --- a/src/target_properties.cpp +++ b/src/target_properties.cpp @@ -55,7 +55,7 @@ static std::string GetDeviceNameFromMap(const std::string& in) {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, }; - const auto dev_str = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{}); + const auto& dev_str = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{}); if(!dev_str.empty()) return dev_str; @@ -76,7 +76,7 @@ const std::size_t TargetProperties::MaxLocalMemorySize = static_cast std::string { - const auto arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); if(!arch.empty()) return arch; return handle->GetDeviceNameImpl(); From 694a079415fcd72b9a679b71af17d046f4e4d5ea Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 8 Nov 2023 17:31:08 -0600 Subject: [PATCH 14/73] fix --- src/solver/conv_winoRxS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 1471aebd8e..eb553a88f1 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -750,7 +750,7 @@ static inline boost::optional GetPerfConfFromEnv(const ExecutionContext& ctx) { PerformanceConfigConvBinWinogradRxS fromEnv; - const std::string& s; + const std::string s; const char* env_name; if(IS2X3) From 9c12c2fbbac92ea7331b21ebb043f35e3ef8f14d Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 02:03:14 +0000 Subject: [PATCH 15/73] fix string definition --- src/solver/conv_winoRxS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index eb553a88f1..51de839c03 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -750,7 +750,7 @@ static inline boost::optional GetPerfConfFromEnv(const ExecutionContext& ctx) { PerformanceConfigConvBinWinogradRxS fromEnv; - const std::string s; + std::string s; const char* env_name; if(IS2X3) From a3605ab6bd8748c33a134a7d0d3966a6fb695f82 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 03:22:14 +0000 Subject: [PATCH 16/73] formatting --- src/include/miopen/generic_search_controls.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 282674139b..cea10a05d2 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -39,11 +39,13 @@ MIOPEN_DECLARE_ENV_VAR( uint64_t, std::chrono::duration_cast(std::chrono::hours{2}).count()) #if MIOPEN_USE_COMGR -const auto def_max = 1; // COMGR is not parallelizable +// COMGR is not parallelizable +MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, 1) #else -const int def_max = std::thread::hardware_concurrency() / 2; +MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, + uint64_t, + std::thread::hardware_concurrency() / 2) #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, def_max) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) } // namespace solver From 688919cdacdffa1deec423a7c57d056736a45ab4 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 05:30:13 +0000 Subject: [PATCH 17/73] fix --- src/ocl/gcn_asm_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ocl/gcn_asm_utils.cpp b/src/ocl/gcn_asm_utils.cpp index dfbdc58988..6bdb261bc9 100644 --- a/src/ocl/gcn_asm_utils.cpp +++ b/src/ocl/gcn_asm_utils.cpp @@ -73,7 +73,7 @@ std::string GetGcnAssemblerPathImpl() const auto& asm_path_env_p = miopen::GetStringEnv(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH{}); if(!asm_path_env_p.empty()) { - return CleanupPath(asm_path_env_p); + return CleanupPath(asm_path_env_p.c_str()); } #ifdef MIOPEN_AMDGCN_ASSEMBLER // string literal generated by CMake return CleanupPath(MIOPEN_AMDGCN_ASSEMBLER); From 5ea88a6fe0975b998149aa44304dfb9764ab8f68 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 14:13:11 -0600 Subject: [PATCH 18/73] use std::string::compare --- src/include/miopen/env.hpp | 24 ++++++++++++------------ test/gtest/conv_embed_db.cpp | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index d27fca31e5..0c62175034 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -57,21 +57,21 @@ struct ParseEnvVal } } - if(std::strcmp(value_env_str.c_str(), "disable") == 0 || - std::strcmp(value_env_str.c_str(), "disabled") == 0 || - std::strcmp(value_env_str.c_str(), "0") == 0 || - std::strcmp(value_env_str.c_str(), "no") == 0 || - std::strcmp(value_env_str.c_str(), "off") == 0 || - std::strcmp(value_env_str.c_str(), "false") == 0) + if(value_env_str.compare("disable") == 0 || + value_env_str.compare("disabled") == 0 || + value_env_str.compare("0") == 0 || + value_env_str.compare("no") == 0 || + value_env_str.compare("off") == 0 || + value_env_str.compare("false") == 0) { return false; } - else if(std::strcmp(value_env_str.c_str(), "enable") == 0 || - std::strcmp(value_env_str.c_str(), "enabled") == 0 || - std::strcmp(value_env_str.c_str(), "1") == 0 || - std::strcmp(value_env_str.c_str(), "yes") == 0 || - std::strcmp(value_env_str.c_str(), "on") == 0 || - std::strcmp(value_env_str.c_str(), "true") == 0) + else if(value_env_str.compare("enable") == 0 || + value_env_str.compare("enabled") == 0 || + value_env_str.compare("1") == 0 || + value_env_str.compare("yes") == 0 || + value_env_str.compare("on") == 0 || + value_env_str.compare("true") == 0) { return true; } diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index 3c1a8d6ca7..beadd48cf1 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -38,7 +38,7 @@ static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); const auto s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); - return (!s_envVar.empty() && std::strcmp(s_envVar.c_str(), float_arg) == 0); + return (s_envVar.compare(float_arg) == 0); } void GetArgs(const std::string& param, std::vector& tokens) From 434f15412b97d5293feb15bc77a722ce7f99870d Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 15:12:52 -0600 Subject: [PATCH 19/73] rename IsDefault to IsUnset, check unset in disabled/enabled check --- src/execution_context.cpp | 3 +- src/include/miopen/convolution.hpp | 8 ++--- src/include/miopen/env.hpp | 29 ++++++++++++------- src/solver/conv_MP_bidirectional_winograd.cpp | 2 +- src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp | 3 +- .../conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp | 3 +- .../conv_hip_implicit_gemm_bwd_v1r1.cpp | 3 +- ...conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp | 3 +- .../conv_hip_implicit_gemm_bwd_v4r1.cpp | 3 +- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 6 ++-- src/solver/conv_multipass_wino3x3WrW.cpp | 9 ++---- src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp | 3 +- src/solver/conv_ocl_dir2Dfwd1x1.cpp | 3 +- 13 files changed, 37 insertions(+), 41 deletions(-) diff --git a/src/execution_context.cpp b/src/execution_context.cpp index 296308c431..0eb8695f30 100644 --- a/src/execution_context.cpp +++ b/src/execution_context.cpp @@ -201,8 +201,7 @@ bool IsHipKernelsEnabled() #if MIOPEN_USE_HIP_KERNELS return !miopen::IsDisabled(MIOPEN_DEBUG_HIP_KERNELS{}); #else - return !miopen::IsDefault(MIOPEN_DEBUG_HIP_KERNELS{}) && - miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); + return miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); #endif } diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index aaf0c1ba4b..12a89e174e 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -76,7 +76,7 @@ struct ConvolutionAttribute inline int Get() const { - if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{})) + if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{})) return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{}); return value; } @@ -105,7 +105,7 @@ struct ConvolutionAttribute inline miopenF8RoundingMode_t Get() const { - if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) + if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) return static_cast( miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); return rounding_mode; @@ -114,7 +114,7 @@ struct ConvolutionAttribute inline uint32_t GetSeed() const { // assert(rounding_mode == miopenF8RoundingModeStochastic); - if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{})) + if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{})) return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{}); return seed; } @@ -130,7 +130,7 @@ struct ConvolutionAttribute public: inline int Get() const { - if(!miopen::IsDefault(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) + if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) return static_cast( miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})); return value; diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 0c62175034..c0e79764e4 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -101,17 +101,17 @@ struct EnvVar { private: T value{}; - bool is_default = true; + bool is_unset = true; public: const T& GetValue() const { return value; } - bool IsDefault() const { return is_default; } + bool IsUnset() const { return is_unset; } void UpdateValue(const T& val) { - is_default = false; - value = val; + is_unset = false; + value = val; } explicit EnvVar(const char* const name, const T& def_val) @@ -120,8 +120,8 @@ struct EnvVar const char* vp = std::getenv(name); if(vp != nullptr) // a value was provided { - is_default = false; - value = ParseEnvVal::go(vp); + is_unset = false; + value = ParseEnvVal::go(vp); } else // no value provided, use default value { @@ -145,6 +145,15 @@ struct EnvVar } \ }; +#define MIOPEN_DECLARE_ENV_VAR_BOOL(name) \ + MIOPEN_DECLARE_ENV_VAR(#name, bool, false) + +#define MIOPEN_DECLARE_ENV_VAR_UINT64(name) \ + MIOPEN_DECLARE_ENV_VAR(#name, uint64_t, 0) + +#define MIOPEN_DECLARE_ENV_VAR_STR(name) \ + MIOPEN_DECLARE_ENV_VAR(#name, std::string, "") + /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() /// that returns env var value as only 64-bit ints @@ -160,14 +169,14 @@ template inline bool IsEnabled(EnvVar) { static_assert(std::is_same_v); - return EnvVar::Ref().GetValue(); + return !EnvVar::Ref().IsUnset() && EnvVar::Ref().GetValue(); } template inline bool IsDisabled(EnvVar) { static_assert(std::is_same_v); - return !EnvVar::Ref().GetValue(); + return !EnvVar::Ref().IsUnset() && !EnvVar::Ref().GetValue(); } template @@ -178,9 +187,9 @@ inline uint64_t Value(EnvVar) } template -inline bool IsDefault(EnvVar) +inline bool IsUnset(EnvVar) { - return EnvVar::Ref().IsDefault(); + return EnvVar::Ref().IsUnset(); } /// updates the cached value of an environment variable diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index 2cf2ad9fbc..1f724bfc3a 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -51,7 +51,7 @@ #if WORKAROUND_SWDEV_257202 // Workaround, solver disabled by default. -#define IS_DISABLED(expr) miopen::IsDefault(expr) || !miopen::IsEnabled(expr) +#define IS_DISABLED(expr) !miopen::IsEnabled(expr) #else // Normal behavior (solver enabled by default). #define IS_DISABLED(expr) miopen::IsDisabled(expr) diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index 7d13fee721..ad4fff3e85 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -1541,8 +1541,7 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ExecutionContext #if WORKAROUND_SWDEV_306318 if((problem.GetWeightsHeight_() == 1) && (problem.GetWeightsWidth_() == 1) && (problem.GetInChannels_() % 8 != 0)) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) return false; #endif diff --git a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp index c149fa1dd0..f72eae5c19 100644 --- a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp +++ b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp @@ -90,8 +90,7 @@ bool ConvCkIgemmFwdV6r1DlopsNchw::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_411729 - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #else if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) #endif diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp index 7ee6c0ab92..920f8a4f9b 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp @@ -663,8 +663,7 @@ bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ExecutionContext& ctx, return false; #if WORKAROUND_ISSUE_309 if(problem.IsBfp16()) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) return false; #endif diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp index 84961dc7dc..9d6b78900b 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp @@ -757,8 +757,7 @@ bool ConvHipImplicitGemmBwdDataV1R1Xdlops::IsApplicable(const ExecutionContext& const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_251757 - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) return false; #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp index 781b8a3a5d..f0672589e9 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp @@ -731,8 +731,7 @@ bool ConvHipImplicitGemmBwdDataV4R1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_229277_227616_229195 - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 2606c91b6f..a9e24537a6 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -818,16 +818,14 @@ bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ExecutionContext& #if WORKAROUND_ISSUE_1206 if(problem.IsFp32()) { - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif #if WORKAROUND_SWDEV_329642 if(problem.IsBfp16() && ctx.GetStream().GetDeviceName() == "gfx90a") { - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) return false; } #endif diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index 48de70e20d..4f6a3574f6 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -396,16 +396,13 @@ bool ConvWinograd3x3MultipassWrW if(problem.IsFp16() && (StartsWith(name, "gfx908") || StartsWith(name, "gfx906"))) { if(wino_data_tile == 3 && wino_filter_tile == 4) - if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) return false; if(wino_data_tile == 3 && wino_filter_tile == 5) - if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) return false; if(wino_data_tile == 3 && wino_filter_tile == 6) - if(miopen::IsDefault(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) return false; } else diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp index a0adb34794..a6b97a0e34 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp @@ -48,8 +48,7 @@ bool ConvOclBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_266868 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) return false; #endif if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) diff --git a/src/solver/conv_ocl_dir2Dfwd1x1.cpp b/src/solver/conv_ocl_dir2Dfwd1x1.cpp index 92e020efb0..08e06baab8 100644 --- a/src/solver/conv_ocl_dir2Dfwd1x1.cpp +++ b/src/solver/conv_ocl_dir2Dfwd1x1.cpp @@ -47,8 +47,7 @@ bool ConvOclDirectFwd1x1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_SWDEV_271887 if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) - if(miopen::IsDefault(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{}) || - !miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) + if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) From 55dfe4d0e989054249b53af2052dc3ecac85684d Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 21:42:12 +0000 Subject: [PATCH 20/73] macro for bool + string envs --- driver/conv_driver.hpp | 4 +-- src/binary_cache.cpp | 4 +-- src/comgr.cpp | 14 +++++----- src/conv/solver_finders.cpp | 12 ++++----- src/convolution.cpp | 12 ++++----- src/db_path.cpp.in | 4 +-- src/execution_context.cpp | 10 +++---- src/expanduser.cpp | 2 +- src/find_controls.cpp | 6 ++--- src/hip/hip_build_utils.cpp | 4 +-- src/hipoc/hipoc_kernel.cpp | 2 +- src/hipoc/hipoc_program.cpp | 6 ++--- src/include/miopen/convolution.hpp | 2 +- src/include/miopen/env.hpp | 4 +-- src/include/miopen/find_db.hpp | 2 +- .../miopen/generic_search_controls.hpp | 2 +- .../miopen/solver/ck_utility_common.hpp | 4 +-- .../miopen/solver/implicitgemm_util.hpp | 10 +++---- src/include/miopen/sqlite_db.hpp | 4 +-- src/kernel_cache.cpp | 2 +- src/logger.cpp | 12 ++++----- src/mlo_dir_conv.cpp | 2 +- src/ocl/clhelper.cpp | 2 +- src/ocl/convolutionocl.cpp | 12 ++++----- src/ocl/gcn_asm_utils.cpp | 2 +- src/ocl/rnnocl.cpp | 2 +- src/ocl_kernel.cpp | 2 +- src/reducetensor.cpp | 2 +- src/solver.cpp | 2 +- src/solver/batchnorm/backward_ck.cpp | 2 +- .../backward_per_activation_fused.cpp | 2 +- src/solver/batchnorm/forward_inference_ck.cpp | 2 +- .../batchnorm/forward_inference_fused.cpp | 2 +- .../forward_per_activation_fused.cpp | 2 +- src/solver/batchnorm/forward_training_ck.cpp | 2 +- src/solver/conv_MP_bidirectional_winograd.cpp | 26 +++++++++---------- src/solver/conv_asm_1x1u.cpp | 8 +++--- src/solver/conv_asm_1x1u_bias_activ_fused.cpp | 2 +- src/solver/conv_asm_1x1u_stride2.cpp | 6 ++--- src/solver/conv_asm_3x3u.cpp | 4 +-- src/solver/conv_asm_5x10u2v2b1.cpp | 2 +- src/solver/conv_asm_5x10u2v2f1.cpp | 2 +- .../conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW1x1.cpp | 6 ++--- src/solver/conv_asm_dir_BwdWrW3x3.cpp | 6 ++--- ...onv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp | 2 +- src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp | 2 +- .../conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp | 4 +-- src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp | 2 +- .../conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp | 4 +-- .../conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp | 4 +-- .../conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp | 4 +-- .../conv_asm_implicit_gemm_v4r1_dynamic.cpp | 4 +-- ...m_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp | 2 +- ...onv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp | 2 +- src/solver/conv_bin_wino3x3U.cpp | 2 +- src/solver/conv_bin_winoRxS.cpp | 8 +++--- src/solver/conv_bin_winoRxS_fused.cpp | 4 +-- .../conv_ck_igemm_fwd_bias_activ_fused.cpp | 2 +- .../conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp | 2 +- src/solver/conv_direct_naive_conv.cpp | 2 +- src/solver/conv_direct_naive_conv_bwd.cpp | 2 +- src/solver/conv_direct_naive_conv_fwd.cpp | 2 +- src/solver/conv_direct_naive_conv_wrw.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 2 +- ...conv_hip_implicit_gemm_bwd_data_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_bwd_v1r1.cpp | 2 +- ...conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_bwd_v4r1.cpp | 2 +- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_fwd_v4r1.cpp | 4 +-- .../conv_hip_implicit_gemm_fwd_v4r4.cpp | 2 +- ...conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp | 2 +- ...licit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp | 2 +- ...conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_fwd_xdlops.cpp | 2 +- ...v_hip_implicit_gemm_grouped_fwd_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_wrw_v4r4.cpp | 2 +- ...conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp | 2 +- ...licit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp | 2 +- src/solver/conv_mlir_igemm_bwd.cpp | 2 +- src/solver/conv_mlir_igemm_bwd_xdlops.cpp | 2 +- src/solver/conv_mlir_igemm_fwd.cpp | 2 +- src/solver/conv_mlir_igemm_fwd_xdlops.cpp | 2 +- src/solver/conv_mlir_igemm_wrw.cpp | 2 +- src/solver/conv_mlir_igemm_wrw_xdlops.cpp | 2 +- src/solver/conv_multipass_wino3x3WrW.cpp | 18 ++++++------- src/solver/conv_ocl_dir2D11x11.cpp | 2 +- src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp | 2 +- src/solver/conv_ocl_dir2D_bwdWrW_2.cpp | 4 +-- src/solver/conv_ocl_dir2D_bwdWrW_53.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd1x1.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd_fused.cpp | 2 +- src/solver/conv_ocl_dir2Dfwdgen.cpp | 2 +- src/solver/conv_winoRxS.cpp | 12 ++++----- src/solver/conv_winoRxS_fused.cpp | 2 +- src/solver/conv_wino_fury_RxS.cpp | 4 +-- src/solver/fft.cpp | 2 +- src/solver/gemm.cpp | 2 +- src/solver/gemm_bwd.cpp | 2 +- src/solver/gemm_wrw.cpp | 2 +- src/target_properties.cpp | 4 +-- src/tmp_dir.cpp | 2 +- test/driver.hpp | 2 +- test/gpu_conv.hpp | 2 +- test/gtest/conv_embed_db.cpp | 2 +- test/gtest/conv_hip_igemm_xdlops.cpp | 2 +- test/gtest/conv_igemm_dynamic.cpp | 2 +- 111 files changed, 204 insertions(+), 204 deletions(-) diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index f9a8dfeaa3..748f68ac8d 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -78,8 +78,8 @@ miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, int f #define WORKAROUND_ISSUE_2176 1 // https://github.com/AMDComputeLibraries/MLOpen/issues/2176 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_PAD_BUFFERS_2M, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_USE_GPU_REFERENCE, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DRIVER_PAD_BUFFERS_2M) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DRIVER_USE_GPU_REFERENCE) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_SUBNORM_PERCENTAGE, uint64_t, 0) // Support in the library discontinued, but left in the driver diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index 17b9b0ab46..1ab5f4aff4 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -46,8 +46,8 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DISABLE_CACHE, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CUSTOM_CACHE_DIR, std::string, "") +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DISABLE_CACHE) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_CUSTOM_CACHE_DIR) static boost::filesystem::path ComputeSysCachePath() { diff --git a/src/comgr.cpp b/src/comgr.cpp index fddfc2a167..1905fbb171 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -61,8 +61,8 @@ /// More info at https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1257. #define WORKAROUND_ISSUE_1257 (HIP_PACKAGE_VERSION_FLAT >= 4003021331ULL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_CALLS, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_LOG_CALLS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES) /// 0: Off. /// 1: Logs each option on a separate line. @@ -74,14 +74,14 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_OPTIONS, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT, uint64_t, 0) /// \todo Temporary for debugging: -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT) /// \todo Temporary for debugging: -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN) /// \todo see issue #1222, PR #1316 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SRAM_EDC_DISABLED, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_SRAM_EDC_DISABLED) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) #ifndef MIOPEN_AMD_COMGR_VERSION_MAJOR #define MIOPEN_AMD_COMGR_VERSION_MAJOR 0 @@ -137,7 +137,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) /// have wavesize != 64 (currently gfx10 with default build settings). #define WORKAROUND_ISSUE_1431 PCH_IS_SUPPORTED -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE) #define COMPILER_LC 1 diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index ec75e7b642..c1c2b42282 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -34,13 +34,13 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_WINOGRAD, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_GEMM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_WINOGRAD) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_FFT) namespace conv { namespace { diff --git a/src/convolution.cpp b/src/convolution.cpp index bb713fb074..88e681cf35 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -50,12 +50,12 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_WINOGRAD, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_GEMM, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_WINOGRAD) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_GEMM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_FFT) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK) namespace miopen { diff --git a/src/db_path.cpp.in b/src/db_path.cpp.in index 1f9dcb0550..0f84feedf7 100644 --- a/src/db_path.cpp.in +++ b/src/db_path.cpp.in @@ -37,8 +37,8 @@ #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_SYSTEM_DB_PATH, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_USER_DB_PATH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_SYSTEM_DB_PATH) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_USER_DB_PATH) namespace miopen { diff --git a/src/execution_context.cpp b/src/execution_context.cpp index 0eb8695f30..45f7047e04 100644 --- a/src/execution_context.cpp +++ b/src/execution_context.cpp @@ -36,12 +36,12 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_KERNELS, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_GCN_ASM_KERNELS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_HIP_KERNELS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER) static std::ostream& operator<<(std::ostream& os, const rocm_meta_version& rmv) { diff --git a/src/expanduser.cpp b/src/expanduser.cpp index 5bd2c7bed2..1182d9248c 100644 --- a/src/expanduser.cpp +++ b/src/expanduser.cpp @@ -81,7 +81,7 @@ #endif #endif // __linux__ -MIOPEN_DECLARE_ENV_VAR(HOME, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(HOME) namespace miopen { diff --git a/src/find_controls.cpp b/src/find_controls.cpp index a90fe1d80b..04bc4b1779 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -39,9 +39,9 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_FIND_ENFORCE, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FIND_ONLY_SOLVER, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_FIND_MODE, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_FIND_ENFORCE) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_FIND_ONLY_SOLVER) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_FIND_MODE) namespace miopen { diff --git a/src/hip/hip_build_utils.cpp b/src/hip/hip_build_utils.cpp index 932309d649..a7ec36345a 100644 --- a/src/hip/hip_build_utils.cpp +++ b/src/hip/hip_build_utils.cpp @@ -37,8 +37,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_VERBOSE, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_HIP_DUMP, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_HIP_VERBOSE) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_HIP_DUMP) namespace miopen { diff --git a/src/hipoc/hipoc_kernel.cpp b/src/hipoc/hipoc_kernel.cpp index 1ed7278e5b..e3876ed134 100644 --- a/src/hipoc/hipoc_kernel.cpp +++ b/src/hipoc/hipoc_kernel.cpp @@ -36,7 +36,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) namespace miopen { diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 2e829e0aaf..3b28bccd05 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -55,10 +55,10 @@ /// 4 - "-mcode-object-version=2/3/4" MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_USE_HIPRTC, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_USE_HIPRTC) #define MIOPEN_WORKAROUND_ISSUE_1359 1 diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 12a89e174e..1ae5b7caf9 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -49,7 +49,7 @@ #include MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, 0) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, uint64_t, 0) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index c0e79764e4..29840fbc26 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -146,13 +146,13 @@ struct EnvVar }; #define MIOPEN_DECLARE_ENV_VAR_BOOL(name) \ - MIOPEN_DECLARE_ENV_VAR(#name, bool, false) + MIOPEN_DECLARE_ENV_VAR_BOOL(#name) #define MIOPEN_DECLARE_ENV_VAR_UINT64(name) \ MIOPEN_DECLARE_ENV_VAR(#name, uint64_t, 0) #define MIOPEN_DECLARE_ENV_VAR_STR(name) \ - MIOPEN_DECLARE_ENV_VAR(#name, std::string, "") + MIOPEN_DECLARE_ENV_VAR_STR(#name) /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() diff --git a/src/include/miopen/find_db.hpp b/src/include/miopen/find_db.hpp index 500c5d6795..becdd014c8 100644 --- a/src/include/miopen/find_db.hpp +++ b/src/include/miopen/find_db.hpp @@ -41,7 +41,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_FIND_DB, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_DISABLE_FIND_DB) namespace miopen { diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index cea10a05d2..37980762e0 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -46,7 +46,7 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, std::thread::hardware_concurrency() / 2) #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) } // namespace solver } // namespace miopen diff --git a/src/include/miopen/solver/ck_utility_common.hpp b/src/include/miopen/solver/ck_utility_common.hpp index 509e0ffab9..cd48631460 100644 --- a/src/include/miopen/solver/ck_utility_common.hpp +++ b/src/include/miopen/solver/ck_utility_common.hpp @@ -39,8 +39,8 @@ #include "../composable_kernel/host/solver/include/convolution_problem_descriptor.hpp" #include "../composable_kernel/host/solver/include/solver_common.hpp" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CK_USE_AMD_BUFFER_ADDRESSING, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CK_USE_AMD_BUFFER_ADDRESSING) namespace miopen { namespace solver { diff --git a/src/include/miopen/solver/implicitgemm_util.hpp b/src/include/miopen/solver/implicitgemm_util.hpp index abeb49ce6d..39b9046bd2 100644 --- a/src/include/miopen/solver/implicitgemm_util.hpp +++ b/src/include/miopen/solver/implicitgemm_util.hpp @@ -34,11 +34,11 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM) #define WORKAROUND_SWDEV_229277_227616_229195 1 // workaround for unnecessary VGPA <--> AGRP data movement when using mfma LLVM intrinsic diff --git a/src/include/miopen/sqlite_db.hpp b/src/include/miopen/sqlite_db.hpp index 2794d7fdf6..7ba40310ff 100644 --- a/src/include/miopen/sqlite_db.hpp +++ b/src/include/miopen/sqlite_db.hpp @@ -59,8 +59,8 @@ class path; } // namespace boost namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DISABLE_SQL_WAL, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_PERFDB_OVERRIDE, std::string, "") +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_DISABLE_SQL_WAL) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_PERFDB_OVERRIDE) constexpr bool InMemDb = MIOPEN_EMBED_DB; #if MIOPEN_ENABLE_SQLITE_BACKOFF diff --git a/src/kernel_cache.cpp b/src/kernel_cache.cpp index a171320ad9..1fc7a88605 100644 --- a/src/kernel_cache.cpp +++ b/src/kernel_cache.cpp @@ -48,7 +48,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) namespace miopen { diff --git a/src/logger.cpp b/src/logger.cpp index 7ed0f0e938..07096829b7 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -41,27 +41,27 @@ namespace miopen { /// Enable logging of the most important function calls. /// Name of envvar in a bit inadequate due to historical reasons. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING) /// Prints driver command lines into log. /// Works from any application which uses the library. /// Allows to reproduce library use cases using the driver instead of the actual application. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_CMD, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_CMD) /// Prefix each log line with information which allows the user /// to uniquiely identify log records printed from different processes /// or threads. Useful for debugging multi-process/multi-threaded apps. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_MPMT, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_MPMT) /// Add timestamps to each log line. /// Not useful with multi-process/multi-threaded apps. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME) /// See LoggingLevel in the header. MIOPEN_DECLARE_ENV_VAR(MIOPEN_LOG_LEVEL, uint64_t, 0) /// Enable logging of function calls to ROCTX api. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_ENABLE_LOGGING_ROCTX, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_ROCTX) namespace debug { @@ -70,7 +70,7 @@ bool LoggingQuiet = false; // NOLINT (cppcoreguidelines-avoid-non-const-global-v } // namespace debug /// Disable logging quieting. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE) namespace { diff --git a/src/mlo_dir_conv.cpp b/src/mlo_dir_conv.cpp index 33a775b529..a205738048 100644 --- a/src/mlo_dir_conv.cpp +++ b/src/mlo_dir_conv.cpp @@ -55,7 +55,7 @@ #define WORKAROUND_SWDEV_227826 0 #if WORKAROUND_SWDEV_227826 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS) #endif miopen::PerformanceDb miopen::GetDb(const miopen::ExecutionContext& ctx) diff --git a/src/ocl/clhelper.cpp b/src/ocl/clhelper.cpp index 433ca17bb3..a47e1d2203 100644 --- a/src/ocl/clhelper.cpp +++ b/src/ocl/clhelper.cpp @@ -45,7 +45,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) namespace miopen { diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 3c3727323d..1cb47db420 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -58,12 +58,12 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMMED_FALLBACK, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMPILE_ONLY, bool, false) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DUMP_TENSOR_PATH, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMMED_FALLBACK) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DUMP_TENSOR_PATH) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK) static inline bool IsValidFilterChannelNumber(const TensorDescriptor& x, const TensorDescriptor& w, diff --git a/src/ocl/gcn_asm_utils.cpp b/src/ocl/gcn_asm_utils.cpp index 6bdb261bc9..fbf82229ce 100644 --- a/src/ocl/gcn_asm_utils.cpp +++ b/src/ocl/gcn_asm_utils.cpp @@ -60,7 +60,7 @@ bool ValidateGcnAssembler() { return true; } /// \todo Try to assemble AMD GCN source? #define WORKAROUND_SWDEV_233338 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH) static const char option_no_co_v3[] = "-mno-code-object-v3"; diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index d2e3c5c98a..61b69ab75f 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -36,7 +36,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_RNNFWD_exp, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_RNNFWD_exp) namespace miopen { diff --git a/src/ocl_kernel.cpp b/src/ocl_kernel.cpp index 0d9b523d4f..e34c42f170 100644 --- a/src/ocl_kernel.cpp +++ b/src/ocl_kernel.cpp @@ -30,7 +30,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) static std::string DimToFormattedString(const size_t* dims, size_t count) { diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index b148f92077..82dd1aa5db 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -47,7 +47,7 @@ #include <../composable_kernel/composable_kernel/include/utility/data_type_enum.hpp> #include <../composable_kernel/composable_kernel/include/utility/reduction_enums.hpp> -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_DYNAMIC_REDUCTION, bool, true); +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_DYNAMIC_REDUCTION); #define WORKAROUND_MIOPEN_ISSUE_557 1 diff --git a/src/solver.cpp b/src/solver.cpp index 847de11756..a70ed6b816 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -43,7 +43,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENABLE_DEPRECATED_SOLVERS, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_ENABLE_DEPRECATED_SOLVERS) namespace miopen { namespace solver { diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index 6a2b507ec9..b553e5ead7 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -33,7 +33,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_BACK, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_CK_BN_BACK) namespace miopen { namespace solver { diff --git a/src/solver/batchnorm/backward_per_activation_fused.cpp b/src/solver/batchnorm/backward_per_activation_fused.cpp index b3a5e90b78..32f66eb94c 100644 --- a/src/solver/batchnorm/backward_per_activation_fused.cpp +++ b/src/solver/batchnorm/backward_per_activation_fused.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_BWDTRG_ACTIV_FUSED, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_BN_BWDTRG_ACTIV_FUSED) namespace miopen { diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index d6ed0cd558..b9afd73ed5 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -32,7 +32,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_INFER, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_CK_BN_INFER) namespace miopen { namespace solver { diff --git a/src/solver/batchnorm/forward_inference_fused.cpp b/src/solver/batchnorm/forward_inference_fused.cpp index ba064f1523..a057e1995a 100644 --- a/src/solver/batchnorm/forward_inference_fused.cpp +++ b/src/solver/batchnorm/forward_inference_fused.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_FWDINFER_ACTIV_FUSED, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_BN_FWDINFER_ACTIV_FUSED) namespace miopen { diff --git a/src/solver/batchnorm/forward_per_activation_fused.cpp b/src/solver/batchnorm/forward_per_activation_fused.cpp index f49a4c13e4..189315b913 100644 --- a/src/solver/batchnorm/forward_per_activation_fused.cpp +++ b/src/solver/batchnorm/forward_per_activation_fused.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_BN_FWDTRG_ACTIV_FUSED, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_BN_FWDTRG_ACTIV_FUSED) namespace miopen { diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index 49d0323ebc..09fe5adf21 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -33,7 +33,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_BN_FWD_TRAINING, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_CK_BN_FWD_TRAINING) namespace miopen { namespace solver { diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index 1f724bfc3a..7af559e237 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -63,23 +63,23 @@ namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3, bool, true) - -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F2X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F3X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F4X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F5X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F6X3, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3) + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F2X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F3X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F4X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F5X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F6X3) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. // These names represent shader parameters, e.g. shader C is batch_size etc and useful for diff --git a/src/solver/conv_asm_1x1u.cpp b/src/solver/conv_asm_1x1u.cpp index df970ce5ca..aed7db93eb 100644 --- a/src/solver/conv_asm_1x1u.cpp +++ b/src/solver/conv_asm_1x1u.cpp @@ -41,10 +41,10 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR, bool, false) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_1x1u_bias_activ_fused.cpp b/src/solver/conv_asm_1x1u_bias_activ_fused.cpp index 89aa86e9f2..822e5b90f9 100644 --- a/src/solver/conv_asm_1x1u_bias_activ_fused.cpp +++ b/src/solver/conv_asm_1x1u_bias_activ_fused.cpp @@ -46,7 +46,7 @@ using half_float::half; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_GCN_ASM_KERNELS) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_1x1u_stride2.cpp b/src/solver/conv_asm_1x1u_stride2.cpp index 56ef938bed..e87ac91334 100644 --- a/src/solver/conv_asm_1x1u_stride2.cpp +++ b/src/solver/conv_asm_1x1u_stride2.cpp @@ -37,9 +37,9 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2, bool, true) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_3x3u.cpp b/src/solver/conv_asm_3x3u.cpp index 35518c1cd6..081c828aa1 100644 --- a/src/solver/conv_asm_3x3u.cpp +++ b/src/solver/conv_asm_3x3u.cpp @@ -40,8 +40,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U, bool, true) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_5x10u2v2b1.cpp b/src/solver/conv_asm_5x10u2v2b1.cpp index 0a405953af..6c64ea1cf4 100644 --- a/src/solver/conv_asm_5x10u2v2b1.cpp +++ b/src/solver/conv_asm_5x10u2v2b1.cpp @@ -31,7 +31,7 @@ #define WORKAROUND_ISSUE_1146 1 // check asm solver applicability for gfx90a -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_5x10u2v2f1.cpp b/src/solver/conv_asm_5x10u2v2f1.cpp index bb33ac57b3..10ea56aa86 100644 --- a/src/solver/conv_asm_5x10u2v2f1.cpp +++ b/src/solver/conv_asm_5x10u2v2f1.cpp @@ -32,7 +32,7 @@ #define WORKAROUND_ISSUE_1146 1 // check asm solver applicability for gfx90a -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp index cc9cf3e5f8..25adbed185 100644 --- a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp +++ b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp @@ -32,7 +32,7 @@ #define WORKAROUND_ISSUE_1146 1 // check asm solver applicability for gfx90a -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index 054e757c8e..8f7d880ce0 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -37,9 +37,9 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1, bool, true) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index 4fa440f299..0b6b08d1e4 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -41,9 +41,9 @@ #define MIOPEN_GCN_ASM_DIRECT_3X3WRW_SEARCH_LWC_FIXED 0 #define WORKAROUND_SWDEV_330460 1 // ConvAsmBwdWrw3x3 has precision issues on MI200 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3, bool, false) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp index 4d2f2457a7..0e67743f6a 100644 --- a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp index 5b704541a4..8b51de6057 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp index 853aa8f6c9..2a81aab49c 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp @@ -32,8 +32,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS_NHWC, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS_NHWC) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) #define BWD_MAX_GEMM_K_SPLITS 8 // #define DEBUG_IGEMM_ASM_BWD_NHWC_CHECK_VALID_TILE_LIST diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index ad4fff3e85..0e3c2918da 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -33,7 +33,7 @@ #define WORKAROUND_SWDEV_306318 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp index dce670c47d..e1c14a520e 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp @@ -32,8 +32,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_DLOPS_NCHWC, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_DLOPS_NCHWC) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) // #define DEBUG_IGEMM_ASM_FWD_NCHWC_CHECK_VALID_TILE_LIST diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp index 05a1f3ca94..23f66fa95d 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp @@ -32,8 +32,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS_NHWC, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS_NHWC) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) #define FWD_MAX_GEMM_K_SPLITS 8 // #define DEBUG_IGEMM_ASM_FWD_NHWC_CHECK_VALID_TILE_LIST diff --git a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp index a722fdcd9e..69057b66e4 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp @@ -35,8 +35,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS_NHWC, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS_NHWC) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16) #define WRW_MAX_GEMM_K_SPLITS 10 #define WORKAROUND_ISSUE_2496 1 diff --git a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp index 1e0243f2f3..e0ef5c1e2a 100644 --- a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp @@ -30,8 +30,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp index aee5798041..70805c767e 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp @@ -34,7 +34,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp index 59cb10160a..4ddfdd2700 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1) namespace miopen { namespace solver { diff --git a/src/solver/conv_bin_wino3x3U.cpp b/src/solver/conv_bin_wino3x3U.cpp index 5eda9fc136..50d4f7d1ab 100644 --- a/src/solver/conv_bin_wino3x3U.cpp +++ b/src/solver/conv_bin_wino3x3U.cpp @@ -35,7 +35,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_3X3, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_3X3) namespace miopen { namespace solver { diff --git a/src/solver/conv_bin_winoRxS.cpp b/src/solver/conv_bin_winoRxS.cpp index ade87d6814..f9f83ea38b 100644 --- a/src/solver/conv_bin_winoRxS.cpp +++ b/src/solver/conv_bin_winoRxS.cpp @@ -36,12 +36,12 @@ #include /// Global switch -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS) /// Sub-switches for testing/debugging -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD) /// \todo Detect at runtime and remove this var: -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SRAM_EDC_DISABLED, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_SRAM_EDC_DISABLED) /// \return v rounded up (towards +inf) to the nearest multiple of m. /// Defined for positive values only. diff --git a/src/solver/conv_bin_winoRxS_fused.cpp b/src/solver/conv_bin_winoRxS_fused.cpp index f4411f88a9..b42c2a21ec 100644 --- a/src/solver/conv_bin_winoRxS_fused.cpp +++ b/src/solver/conv_bin_winoRxS_fused.cpp @@ -36,8 +36,8 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_FUSED_WINOGRAD, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GCN_ASM_KERNELS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_FUSED_WINOGRAD) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_GCN_ASM_KERNELS) /// \return v rounded up (towards +inf) to the nearest multiple of m. /// Defined for positive values only. diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index 76dc4b5f66..4658b9d7cf 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -37,7 +37,7 @@ #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_BIAS_ACTIV, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_BIAS_ACTIV) #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL // Forward declare CK's function. diff --git a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp index f72eae5c19..d512f52a2d 100644 --- a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp +++ b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp @@ -36,7 +36,7 @@ #define WORKAROUND_SWDEV_411729 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW) namespace miopen { namespace solver { diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index 0cc73ab0b9..d6be34677f 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -119,7 +119,7 @@ bool IsOutputInt32(const ProblemDescription& problem) problem.GetOutDataType() == miopenInt32; } -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS, bool, false); +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS); std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) { diff --git a/src/solver/conv_direct_naive_conv_bwd.cpp b/src/solver/conv_direct_naive_conv_bwd.cpp index 0b5fd517b8..ec88136d44 100644 --- a/src/solver/conv_direct_naive_conv_bwd.cpp +++ b/src/solver/conv_direct_naive_conv_bwd.cpp @@ -29,7 +29,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD) namespace miopen { namespace solver { diff --git a/src/solver/conv_direct_naive_conv_fwd.cpp b/src/solver/conv_direct_naive_conv_fwd.cpp index afd6132c51..d9fd9ffbc2 100644 --- a/src/solver/conv_direct_naive_conv_fwd.cpp +++ b/src/solver/conv_direct_naive_conv_fwd.cpp @@ -28,7 +28,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD) namespace miopen { namespace solver { diff --git a/src/solver/conv_direct_naive_conv_wrw.cpp b/src/solver/conv_direct_naive_conv_wrw.cpp index 0468565b85..4b6e7fafe0 100644 --- a/src/solver/conv_direct_naive_conv_wrw.cpp +++ b/src/solver/conv_direct_naive_conv_wrw.cpp @@ -29,7 +29,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 8d44dbf0b4..c58d0000fb 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -36,7 +36,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index c76e620744..64f7b8eedc 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -36,7 +36,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index b75427b808..4f48c4913b 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -36,7 +36,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index 51e2a92376..91c7fc0855 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -36,7 +36,7 @@ #endif #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp index 920f8a4f9b..664660b8b7 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp @@ -34,7 +34,7 @@ #define WORKAROUND_ISSUE_309 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp index 9d6b78900b..47602b6f23 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp @@ -31,7 +31,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp index f0672589e9..7ddfe5cbb8 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index a9e24537a6..3a23613577 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -36,7 +36,7 @@ #define WORKAROUND_SWDEV_329642 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS, std::string, "") diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index 5adc5b5078..c10f5cb819 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -34,8 +34,8 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp index 1e5b10506d..cacba33291 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp @@ -31,7 +31,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp index b44cbd0fbd..6edc8a8d29 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS) /* this fix is for fp16 xdlops vectorizable kernels due to followings, we may revisit this fix after compiler fix: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp index 0cbeeb028a..05dc1b7783 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp @@ -41,7 +41,7 @@ /// Fatal compiler errors with ROCm 3.7 on some BF16 configs. #define WORKAROUND_MI100_BF16_FATAL_COMPILER_ERRORS (HIP_PACKAGE_VERSION_FLAT <= 3007999999ULL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS) /* this fix is for fp16 xdlops vectorizable kernels due to followings, we may revisit this fix after compiler fix: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp index c88812dda6..ccce73be32 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS) /* this fix is for fp16 xdlops vectorizable kernels due to followings, we may revisit this fix after compiler fix: diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 7f76f088ea..3ad6e80617 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -36,7 +36,7 @@ #endif #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index 17357df167..0fddd42a29 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -35,7 +35,7 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp index 6b53b2eb00..8918137b73 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp index cda8d70875..6a7cb0b668 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp @@ -35,7 +35,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp index 863748744b..22a5beab5b 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp @@ -38,7 +38,7 @@ /// Fatal compiler errors with ROCm 3.7 on some BF16 configs. #define WORKAROUND_MI100_BF16_FATAL_COMPILER_ERRORS (HIP_PACKAGE_VERSION_FLAT <= 3007999999ULL) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_bwd.cpp b/src/solver/conv_mlir_igemm_bwd.cpp index f69fce78c8..e1d3df1bc4 100644 --- a/src/solver/conv_mlir_igemm_bwd.cpp +++ b/src/solver/conv_mlir_igemm_bwd.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_bwd_xdlops.cpp b/src/solver/conv_mlir_igemm_bwd_xdlops.cpp index 63a9e54fa0..7c507933fd 100644 --- a/src/solver/conv_mlir_igemm_bwd_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_bwd_xdlops.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_fwd.cpp b/src/solver/conv_mlir_igemm_fwd.cpp index ca039800a3..8d58d20bfe 100644 --- a/src/solver/conv_mlir_igemm_fwd.cpp +++ b/src/solver/conv_mlir_igemm_fwd.cpp @@ -32,7 +32,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_fwd_xdlops.cpp b/src/solver/conv_mlir_igemm_fwd_xdlops.cpp index c213829678..2d39a7ec9d 100644 --- a/src/solver/conv_mlir_igemm_fwd_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_fwd_xdlops.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_wrw.cpp b/src/solver/conv_mlir_igemm_wrw.cpp index 658cc544db..2635e9b039 100644 --- a/src/solver/conv_mlir_igemm_wrw.cpp +++ b/src/solver/conv_mlir_igemm_wrw.cpp @@ -33,7 +33,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW) namespace miopen { namespace solver { diff --git a/src/solver/conv_mlir_igemm_wrw_xdlops.cpp b/src/solver/conv_mlir_igemm_wrw_xdlops.cpp index f34daf9b7f..e1381793e7 100644 --- a/src/solver/conv_mlir_igemm_wrw_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_wrw_xdlops.cpp @@ -34,7 +34,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS) namespace miopen { namespace solver { diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index 4f6a3574f6..d84fd69b14 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -48,15 +48,15 @@ namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4) MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX, uint64_t, 0) // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. diff --git a/src/solver/conv_ocl_dir2D11x11.cpp b/src/solver/conv_ocl_dir2D11x11.cpp index c86cf7e33b..50119be43f 100644 --- a/src/solver/conv_ocl_dir2D11x11.cpp +++ b/src/solver/conv_ocl_dir2D11x11.cpp @@ -31,7 +31,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp index a6b97a0e34..a0de6c7e76 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1) #define TWO_PASSES 1 diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp index 67ee850762..6a2545cbd0 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp @@ -36,8 +36,8 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2_SEARCH_OPTIMIZED, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2_SEARCH_OPTIMIZED) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp index a662df4c49..4f45f1869e 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwd.cpp b/src/solver/conv_ocl_dir2Dfwd.cpp index a9f22d0e2d..00fe1e7526 100644 --- a/src/solver/conv_ocl_dir2Dfwd.cpp +++ b/src/solver/conv_ocl_dir2Dfwd.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwd1x1.cpp b/src/solver/conv_ocl_dir2Dfwd1x1.cpp index 08e06baab8..79c32322e3 100644 --- a/src/solver/conv_ocl_dir2Dfwd1x1.cpp +++ b/src/solver/conv_ocl_dir2Dfwd1x1.cpp @@ -33,7 +33,7 @@ #define WORKAROUND_SWDEV_271887 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwd_fused.cpp b/src/solver/conv_ocl_dir2Dfwd_fused.cpp index 09fc368ffd..4aa6e677e0 100644 --- a/src/solver/conv_ocl_dir2Dfwd_fused.cpp +++ b/src/solver/conv_ocl_dir2Dfwd_fused.cpp @@ -36,7 +36,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD) namespace miopen { namespace solver { diff --git a/src/solver/conv_ocl_dir2Dfwdgen.cpp b/src/solver/conv_ocl_dir2Dfwdgen.cpp index 8541bed9a3..0ff0df4be8 100644 --- a/src/solver/conv_ocl_dir2Dfwdgen.cpp +++ b/src/solver/conv_ocl_dir2Dfwdgen.cpp @@ -29,7 +29,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN) namespace miopen { namespace solver { diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 51de839c03..4e478fa9ff 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -66,14 +66,14 @@ /// we can disable Winograd without any performance implications. #define WORKAROUND_ISSUE_2493 1 -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_WORKAROUND_ISSUE_2493, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_WORKAROUND_ISSUE_2493) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS, std::string, "") +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS) #define MAX_CU_LIMIT 512 diff --git a/src/solver/conv_winoRxS_fused.cpp b/src/solver/conv_winoRxS_fused.cpp index 2422f0c732..05b013dac9 100644 --- a/src/solver/conv_winoRxS_fused.cpp +++ b/src/solver/conv_winoRxS_fused.cpp @@ -43,7 +43,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1) #define IS3X2 (Winodata == 3 && Winofilter == 2) diff --git a/src/solver/conv_wino_fury_RxS.cpp b/src/solver/conv_wino_fury_RxS.cpp index 12db5256be..1978c2d976 100644 --- a/src/solver/conv_wino_fury_RxS.cpp +++ b/src/solver/conv_wino_fury_RxS.cpp @@ -30,8 +30,8 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3, bool, true) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2) namespace miopen { namespace solver { diff --git a/src/solver/fft.cpp b/src/solver/fft.cpp index 4bd0590a52..bdfcfcd5ca 100644 --- a/src/solver/fft.cpp +++ b/src/solver/fft.cpp @@ -41,7 +41,7 @@ namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_FFT, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_FFT) static void cgemm_grid(size_t* global_work_size, size_t* local_work_size, diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index 5da9ca08fa..78c075d779 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -41,7 +41,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) /// MIOpenGEMM issues with ROCm 3.7, most likely related to the /// issues in the OpenCL compiler. Not reproducible in ROCm 4.0. diff --git a/src/solver/gemm_bwd.cpp b/src/solver/gemm_bwd.cpp index 7f6beaa308..cf231c8965 100644 --- a/src/solver/gemm_bwd.cpp +++ b/src/solver/gemm_bwd.cpp @@ -40,7 +40,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) #define WORKAROUND_MIOPENGEMM_ISSUE_59 1 diff --git a/src/solver/gemm_wrw.cpp b/src/solver/gemm_wrw.cpp index 1bb9377116..483a7d5ee9 100644 --- a/src/solver/gemm_wrw.cpp +++ b/src/solver/gemm_wrw.cpp @@ -9,7 +9,7 @@ #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING, bool, true) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) // copy from convolution.cpp // Workaround for issue 1430. diff --git a/src/target_properties.cpp b/src/target_properties.cpp index b27d2d9de3..d5f763694e 100644 --- a/src/target_properties.cpp +++ b/src/target_properties.cpp @@ -32,8 +32,8 @@ #define WORKAROUND_ISSUE_1204 1 // ROCm may incorrectly report "sramecc-" for gfx900. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_ENFORCE_DEVICE, std::string, "") -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_ARCH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_ENFORCE_DEVICE) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) namespace miopen { diff --git a/src/tmp_dir.cpp b/src/tmp_dir.cpp index b2441c184e..c4770b9db7 100644 --- a/src/tmp_dir.cpp +++ b/src/tmp_dir.cpp @@ -30,7 +30,7 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_SAVE_TEMP_DIR, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_SAVE_TEMP_DIR) namespace miopen { diff --git a/test/driver.hpp b/test/driver.hpp index 6d1ac397a9..fe6a472b3d 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -100,7 +100,7 @@ auto cpu_async(V& v, Ts&&... xs) -> std::future return std::async(std::launch::deferred, [&] { return v.cpu(xs...); }); } -MIOPEN_DECLARE_ENV_VAR(MIOPEN_VERIFY_CACHE_PATH, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_VERIFY_CACHE_PATH) struct test_driver { diff --git a/test/gpu_conv.hpp b/test/gpu_conv.hpp index c0e1f34bad..491b0aaecc 100644 --- a/test/gpu_conv.hpp +++ b/test/gpu_conv.hpp @@ -42,7 +42,7 @@ extern bool LoggingQuiet; // NOLINT (cppcoreguidelines-avoid-non-const-gl } // namespace debug } // namespace miopen -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF) struct AutoPrepareForGpuReference { diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index beadd48cf1..2f1b6ffcb4 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -32,7 +32,7 @@ #include "../conv2d.hpp" #include "get_handle.hpp" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_FLOAT_ARG, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) static bool IsTestRunWith(const char* float_arg) { diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 0cd2fefbf2..77361a8cea 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -32,7 +32,7 @@ #include "../conv2d.hpp" #include "get_handle.hpp" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_FLOAT_ARG, std::string, "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) static bool IsTestRunWith(const char* float_arg) { diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 01dc4bb8be..03a6ea3257 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -33,7 +33,7 @@ using TestCase = std::tuple, std::string>; -MIOPEN_DECLARE_ENV_VAR(MIOPEN_TEST_GPU_XNACK_ENABLED, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_GPU_XNACK_ENABLED) static bool SkipTest(void) { return miopen::IsEnabled(MIOPEN_TEST_GPU_XNACK_ENABLED{}); } From c1a257a5e0d57b1fd2aba133e7cb79d8f549b100 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 21:56:43 +0000 Subject: [PATCH 21/73] fix macros --- src/include/miopen/env.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 29840fbc26..2da7ea7384 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -146,13 +146,13 @@ struct EnvVar }; #define MIOPEN_DECLARE_ENV_VAR_BOOL(name) \ - MIOPEN_DECLARE_ENV_VAR_BOOL(#name) + MIOPEN_DECLARE_ENV_VAR(name, bool, false) #define MIOPEN_DECLARE_ENV_VAR_UINT64(name) \ - MIOPEN_DECLARE_ENV_VAR(#name, uint64_t, 0) + MIOPEN_DECLARE_ENV_VAR(name, uint64_t, 0) #define MIOPEN_DECLARE_ENV_VAR_STR(name) \ - MIOPEN_DECLARE_ENV_VAR_STR(#name) + MIOPEN_DECLARE_ENV_VAR(name, std::string, "") /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() From e21d7da32daa67c16f856dbdf18c4eff0111faf9 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 17:13:11 -0600 Subject: [PATCH 22/73] relabel uint64 envs --- driver/conv_driver.hpp | 2 +- src/check_numerics.cpp | 2 +- src/comgr.cpp | 4 +-- src/execution_context.cpp | 2 +- src/gemm_v2.cpp | 2 +- src/generic_search.cpp | 26 ++++++++++++++++--- src/hip/handlehip.cpp | 2 +- src/hipoc/hipoc_program.cpp | 4 +-- src/include/miopen/convolution.hpp | 6 ++--- .../miopen/generic_search_controls.hpp | 18 +++---------- src/logger.cpp | 2 +- src/solver/conv_MP_bidirectional_winograd.cpp | 2 +- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 4 +-- ...conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp | 4 +-- ...licit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp | 4 +-- ...conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp | 4 +-- src/solver/conv_multipass_wino3x3WrW.cpp | 2 +- 17 files changed, 48 insertions(+), 42 deletions(-) diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index 748f68ac8d..6c95f2508e 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -80,7 +80,7 @@ miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, int f MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DRIVER_PAD_BUFFERS_2M) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DRIVER_USE_GPU_REFERENCE) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DRIVER_SUBNORM_PERCENTAGE, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DRIVER_SUBNORM_PERCENTAGE) // Support in the library discontinued, but left in the driver // for reference in the future. diff --git a/src/check_numerics.cpp b/src/check_numerics.cpp index 5a7e76f1de..5802f984aa 100644 --- a/src/check_numerics.cpp +++ b/src/check_numerics.cpp @@ -33,7 +33,7 @@ namespace miopen { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_CHECK_NUMERICS, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_CHECK_NUMERICS) bool CheckNumericsEnabled(const int bitMask) { diff --git a/src/comgr.cpp b/src/comgr.cpp index 1905fbb171..cd9fecb1f7 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -67,11 +67,11 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES) /// 0: Off. /// 1: Logs each option on a separate line. /// 2: Logs all options altogether, on single line. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_OPTIONS, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_COMGR_LOG_OPTIONS) /// Integer, set to max number of first characters /// you would like to log onto console. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT) /// \todo Temporary for debugging: MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT) diff --git a/src/execution_context.cpp b/src/execution_context.cpp index 45f7047e04..e25d1946c6 100644 --- a/src/execution_context.cpp +++ b/src/execution_context.cpp @@ -40,7 +40,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_GCN_ASM_KERNELS) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_HIP_KERNELS) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER) static std::ostream& operator<<(std::ostream& os, const rocm_meta_version& rmv) diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index 3e2e335a00..7f33636844 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -246,7 +246,7 @@ rocblas_status miopen_rocblas_gemm_strided_batched_ex3(const miopen::Handle& han #endif // MIOPEN_USE_ROCBLAS -MIOPEN_DECLARE_ENV_VAR(MIOPEN_GEMM_ENFORCE_BACKEND, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_GEMM_ENFORCE_BACKEND) namespace miopen { diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 4bdc047b34..31b9bf9550 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -34,15 +34,35 @@ namespace miopen { namespace solver { -std::size_t GetTuningIterationsMax() { return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}); } +std::size_t GetTuningIterationsMax() +{ + return !isUnset(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) ? + Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) : + std::numeric_limits::max(); +} std::chrono::milliseconds GetTuningTimeMax() { - static const auto res = std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})}; + static const auto fallback = + std::chrono::duration_cast(std::chrono::hours{2}); + static const auto res = + !IsUnset(MIOPEN_TUNING_TIME_MS_MAX{}) ? + std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})} : + std::chrono::milliseconds{fallback.count()}; return res; } -std::size_t GetTuningThreadsMax() { return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}); } +std::size_t GetTuningThreadsMax() +{ +#if MIOPEN_USE_COMGR + const auto def_max = 1; // COMGR is not parallelizable +#else + const int def_max = std::thread::hardware_concurrency() / 2; +#endif + return !IsUnset(MIOPEN_COMPILE_PARALLEL_LEVEL{}) ? + Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}) : + def_max; +} } // namespace solver } // namespace miopen diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp index 11bed4dff5..f0bda6a75e 100644 --- a/src/hip/handlehip.cpp +++ b/src/hip/handlehip.cpp @@ -65,7 +65,7 @@ /// Brute-force W/A: return fixed values. #define WORKAROUND_FAULTY_HIPMEMGETINFO_VEGA_NAVI2X (ROCM_FEATURE_DEPRECATED_VEGA_NAVI2X) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEVICE_CU, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEVICE_CU) namespace miopen { diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 3b28bccd05..3b871b7b6f 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -53,8 +53,8 @@ /// "-Xclang -target-feature -Xclang +code-object-v3" /// 3 - "-mnocode-object-v3" / "-mcode-object-v3" /// 4 - "-mcode-object-version=2/3/4" -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION) MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 1ae5b7caf9..3351df54a0 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -48,10 +48,10 @@ #include #include -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE, uint64_t, 0) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED) namespace miopen { diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 37980762e0..87ad1d63e2 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -31,21 +31,9 @@ namespace miopen { namespace solver { -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX, - uint64_t, - std::numeric_limits::max()) -MIOPEN_DECLARE_ENV_VAR( - MIOPEN_TUNING_TIME_MS_MAX, - uint64_t, - std::chrono::duration_cast(std::chrono::hours{2}).count()) -#if MIOPEN_USE_COMGR -// COMGR is not parallelizable -MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, 1) -#else -MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, - uint64_t, - std::thread::hardware_concurrency() / 2) -#endif +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_TUNING_TIME_MS_MAX) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_COMPILE_PARALLEL_LEVEL) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) } // namespace solver diff --git a/src/logger.cpp b/src/logger.cpp index 07096829b7..d57eacf331 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -58,7 +58,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_MPMT) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME) /// See LoggingLevel in the header. -MIOPEN_DECLARE_ENV_VAR(MIOPEN_LOG_LEVEL, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_LOG_LEVEL) /// Enable logging of function calls to ROCTX api. MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_ROCTX) diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index 7af559e237..5c8f4b010c 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -75,7 +75,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F4X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F5X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_XDLOPS_WINOGRAD_F6X3) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM) diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 3a23613577..15ffdf7a86 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -37,9 +37,7 @@ #define WORKAROUND_SWDEV_329642 1 MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS, - std::string, - "") +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp index 6edc8a8d29..63d702990e 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp @@ -38,8 +38,8 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS) 1. compiler issues(25% impact) 2. LDS write performance(75% impact) */ -MIOPEN_DECLARE_ENV_VAR( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp index 05dc1b7783..c54f135b76 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp @@ -48,8 +48,8 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_ 1. compiler issues(25% impact) 2. LDS write performance(75% impact) */ -MIOPEN_DECLARE_ENV_VAR( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM) namespace miopen { namespace solver { diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp index ccce73be32..c07e1a5fb2 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp @@ -38,8 +38,8 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS) 1. compiler issues(25% impact) 2. LDS write performance(75% impact) */ -MIOPEN_DECLARE_ENV_VAR( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM, bool, false) +MIOPEN_DECLARE_ENV_VAR_BOOL( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM) namespace miopen { namespace solver { diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index d84fd69b14..cab1804de1 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -57,7 +57,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4) -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX, uint64_t, 0) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX) // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. // These names represent shader parameters, e.g. shader C is batch_size etc and useful for From f037fe9971d839b027cc971edad961d1663051eb Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 9 Nov 2023 23:18:29 +0000 Subject: [PATCH 23/73] typo fix + format --- src/generic_search.cpp | 18 ++++++++---------- src/include/miopen/env.hpp | 27 +++++++++------------------ 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 31b9bf9550..1c8c29f433 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -36,19 +36,18 @@ namespace solver { std::size_t GetTuningIterationsMax() { - return !isUnset(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) ? - Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) : - std::numeric_limits::max(); + return !IsUnset(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) + ? Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) + : std::numeric_limits::max(); } std::chrono::milliseconds GetTuningTimeMax() { static const auto fallback = std::chrono::duration_cast(std::chrono::hours{2}); - static const auto res = - !IsUnset(MIOPEN_TUNING_TIME_MS_MAX{}) ? - std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})} : - std::chrono::milliseconds{fallback.count()}; + static const auto res = !IsUnset(MIOPEN_TUNING_TIME_MS_MAX{}) + ? std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})} + : std::chrono::milliseconds{fallback.count()}; return res; } @@ -59,9 +58,8 @@ std::size_t GetTuningThreadsMax() #else const int def_max = std::thread::hardware_concurrency() / 2; #endif - return !IsUnset(MIOPEN_COMPILE_PARALLEL_LEVEL{}) ? - Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}) : - def_max; + return !IsUnset(MIOPEN_COMPILE_PARALLEL_LEVEL{}) ? Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}) + : def_max; } } // namespace solver diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 2da7ea7384..fa57b5d2ec 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -57,21 +57,15 @@ struct ParseEnvVal } } - if(value_env_str.compare("disable") == 0 || - value_env_str.compare("disabled") == 0 || - value_env_str.compare("0") == 0 || - value_env_str.compare("no") == 0 || - value_env_str.compare("off") == 0 || - value_env_str.compare("false") == 0) + if(value_env_str.compare("disable") == 0 || value_env_str.compare("disabled") == 0 || + value_env_str.compare("0") == 0 || value_env_str.compare("no") == 0 || + value_env_str.compare("off") == 0 || value_env_str.compare("false") == 0) { return false; } - else if(value_env_str.compare("enable") == 0 || - value_env_str.compare("enabled") == 0 || - value_env_str.compare("1") == 0 || - value_env_str.compare("yes") == 0 || - value_env_str.compare("on") == 0 || - value_env_str.compare("true") == 0) + else if(value_env_str.compare("enable") == 0 || value_env_str.compare("enabled") == 0 || + value_env_str.compare("1") == 0 || value_env_str.compare("yes") == 0 || + value_env_str.compare("on") == 0 || value_env_str.compare("true") == 0) { return true; } @@ -145,14 +139,11 @@ struct EnvVar } \ }; -#define MIOPEN_DECLARE_ENV_VAR_BOOL(name) \ - MIOPEN_DECLARE_ENV_VAR(name, bool, false) +#define MIOPEN_DECLARE_ENV_VAR_BOOL(name) MIOPEN_DECLARE_ENV_VAR(name, bool, false) -#define MIOPEN_DECLARE_ENV_VAR_UINT64(name) \ - MIOPEN_DECLARE_ENV_VAR(name, uint64_t, 0) +#define MIOPEN_DECLARE_ENV_VAR_UINT64(name) MIOPEN_DECLARE_ENV_VAR(name, uint64_t, 0) -#define MIOPEN_DECLARE_ENV_VAR_STR(name) \ - MIOPEN_DECLARE_ENV_VAR(name, std::string, "") +#define MIOPEN_DECLARE_ENV_VAR_STR(name) MIOPEN_DECLARE_ENV_VAR(name, std::string, "") /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() From 051538c06de86e66022a96838398debf9a69b5ef Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 10 Nov 2023 01:49:49 +0000 Subject: [PATCH 24/73] tidy --- src/include/miopen/env.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index fa57b5d2ec..8181f0444f 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -57,15 +57,13 @@ struct ParseEnvVal } } - if(value_env_str.compare("disable") == 0 || value_env_str.compare("disabled") == 0 || - value_env_str.compare("0") == 0 || value_env_str.compare("no") == 0 || - value_env_str.compare("off") == 0 || value_env_str.compare("false") == 0) + if(value_env_str == "disable" || value_env_str == "disabled" || value_env_str == "0" || + value_env_str == "no" || value_env_str == "off" || value_env_str == "false") { return false; } - else if(value_env_str.compare("enable") == 0 || value_env_str.compare("enabled") == 0 || - value_env_str.compare("1") == 0 || value_env_str.compare("yes") == 0 || - value_env_str.compare("on") == 0 || value_env_str.compare("true") == 0) + else if(value_env_str == "enable" || value_env_str == "enabled" || value_env_str == "1" || + value_env_str == "yes" || value_env_str == "on" || value_env_str == "true") { return true; } From fbc855801bad7d41acebf2494cde81ed98e4ebe8 Mon Sep 17 00:00:00 2001 From: Chris Erb Date: Fri, 10 Nov 2023 10:34:24 -0600 Subject: [PATCH 25/73] Update src/include/miopen/env.hpp Co-authored-by: Artem Tamazov --- src/include/miopen/env.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 8181f0444f..140dc9d46d 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -76,6 +76,8 @@ struct ParseEnvVal } }; +// Supports hexadecimals (with leading "0x"), octals (if prefix is "0") and decimals (default). +// Returns 0 if environment variable is in wrong format (strtoull fails to parse the string). template <> struct ParseEnvVal { From 9074f83af2a8c259971137de465bd8d30f6c8216 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 10 Nov 2023 16:35:52 +0000 Subject: [PATCH 26/73] update tests with env syntax --- test/gtest/conv_hip_igemm_xdlops.cpp | 6 ++++-- test/gtest/conv_igemm_mlir.cpp | 20 ++++++++++++-------- test/gtest/conv_igemm_mlir_xdlops.cpp | 16 ++++++++++------ test/gtest/conv_trans.cpp | 4 +++- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 77361a8cea..87c9fc5119 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -33,6 +33,8 @@ #include "get_handle.hpp" MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_COMPOSABLEKERNEL) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) static bool IsTestRunWith(const char* float_arg) { @@ -110,8 +112,8 @@ TEST_P(ConfigWithInt8, Int8Test) #else // MIOPEN_BACKEND_HIP, OCL_DISABLED const auto& handle = get_handle(); if(IsTestSupportedForDevice(handle) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_COMPOSABLEKERNEL") && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && IsTestRunWith("--int8")) + miopen::IsEnabled(MIOPEN_TEST_COMPOSABLEKERNEL{}) && + miopen::IsEnabled(MIOPEN_TEST_ALL{}) && IsTestRunWith("--int8")) { Run2dDriver(miopenInt8); } diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index c99d0a03c4..74423adf16 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -32,16 +32,20 @@ #include "../conv2d.hpp" #include "get_handle.hpp" +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_MLIR) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) + using TestCase = std::tuple, std::string>; std::string GetFloatArg() { - static const auto tmp = miopen::GetEnv("MIOPEN_TEST_FLOAT_ARG"); + const auto tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); if(tmp.empty()) { return ""; } - return tmp.front(); + return tmp; }; void GetArgs(const TestCase& param, std::vector& tokens) @@ -116,8 +120,8 @@ TEST_P(ConfigWithFloat, FloatTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_MLIR") && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && GetFloatArg() == "--float") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && + miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--float") { Run2dDriver(miopenFloat); } @@ -138,8 +142,8 @@ TEST_P(ConfigWithHalf, HalfTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_MLIR") && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && GetFloatArg() == "--half") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && + miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--half") { Run2dDriver(miopenHalf); } @@ -160,8 +164,8 @@ TEST_P(ConfigWithInt8, Int8Test) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_MLIR") && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && GetFloatArg() == "--int8") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && + miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--int8") { Run2dDriver(miopenInt8); } diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index ea14e3b68e..90bc87a9ec 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -7,16 +7,20 @@ #include "conv_2d.hpp" #include "get_handle.hpp" +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_MLIR) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) + using TestCase = std::tuple, std::string>; std::string GetFloatArg() { - static const auto tmp = miopen::GetEnv("MIOPEN_TEST_FLOAT_ARG"); + const auto tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); if(tmp.empty()) { return ""; } - return tmp.front(); + return tmp; }; void GetArgs(const TestCase& param, std::vector& tokens) @@ -89,8 +93,8 @@ TEST_P(ConfigWithHalf, HalfTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a")) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_MLIR") && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && GetFloatArg() == "--half") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && + miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--half") { Run2dDriver(miopenHalf); } @@ -111,8 +115,8 @@ TEST_P(ConfigWithInt8, Int8Test) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a")) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_MLIR") && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && GetFloatArg() == "--int8") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && + miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--int8") { Run2dDriver(miopenInt8); } diff --git a/test/gtest/conv_trans.cpp b/test/gtest/conv_trans.cpp index a5266d9f9c..7be11dffda 100644 --- a/test/gtest/conv_trans.cpp +++ b/test/gtest/conv_trans.cpp @@ -30,6 +30,8 @@ #include "../conv2d.hpp" #include "get_handle.hpp" +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) + void GetArgs(const std::string& param, std::vector& tokens) { std::stringstream ss(param); @@ -95,7 +97,7 @@ bool IsTestSupportedForDevice(const miopen::Handle& handle) TEST_P(ConfigWithFloat, FloatTest) { const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL")) + if(IsTestSupportedForDevice(handle) && miopen::IsEnabled(MIOPEN_TEST_ALL{})) { Run2dDriver(miopenFloat); } From 57a80f51deb521ed04e7a2d010eeb7b7020a20bd Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 10 Nov 2023 17:07:52 +0000 Subject: [PATCH 27/73] format --- test/gtest/conv_hip_igemm_xdlops.cpp | 3 +-- test/gtest/conv_igemm_mlir.cpp | 12 ++++++------ test/gtest/conv_igemm_mlir_xdlops.cpp | 8 ++++---- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index 87c9fc5119..eb2d455d9f 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -111,8 +111,7 @@ TEST_P(ConfigWithInt8, Int8Test) #else // MIOPEN_BACKEND_HIP, OCL_DISABLED const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && - miopen::IsEnabled(MIOPEN_TEST_COMPOSABLEKERNEL{}) && + if(IsTestSupportedForDevice(handle) && miopen::IsEnabled(MIOPEN_TEST_COMPOSABLEKERNEL{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && IsTestRunWith("--int8")) { Run2dDriver(miopenInt8); diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index 74423adf16..fdd642ce87 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -120,8 +120,8 @@ TEST_P(ConfigWithFloat, FloatTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && - miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--float") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + GetFloatArg() == "--float") { Run2dDriver(miopenFloat); } @@ -142,8 +142,8 @@ TEST_P(ConfigWithHalf, HalfTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && - miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--half") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + GetFloatArg() == "--half") { Run2dDriver(miopenHalf); } @@ -164,8 +164,8 @@ TEST_P(ConfigWithInt8, Int8Test) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && - miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--int8") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + GetFloatArg() == "--int8") { Run2dDriver(miopenInt8); } diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index 90bc87a9ec..7e22e86201 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -93,8 +93,8 @@ TEST_P(ConfigWithHalf, HalfTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && - miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--half") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + GetFloatArg() == "--half") { Run2dDriver(miopenHalf); } @@ -115,8 +115,8 @@ TEST_P(ConfigWithInt8, Int8Test) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && - miopen::IsEnabled(MIOPEN_TEST_ALL{}) && GetFloatArg() == "--int8") + miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + GetFloatArg() == "--int8") { Run2dDriver(miopenInt8); } From 86c61c7417ad89b83d2c14bd14c44b8b9b079e01 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 13 Nov 2023 11:10:09 -0600 Subject: [PATCH 28/73] string references --- test/gtest/conv_embed_db.cpp | 2 +- test/gtest/conv_hip_igemm_xdlops.cpp | 2 +- test/gtest/conv_igemm_mlir.cpp | 6 +++--- test/gtest/conv_igemm_mlir_xdlops.cpp | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index 2f1b6ffcb4..ea2e29f6a1 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -37,7 +37,7 @@ MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); - const auto s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); return (s_envVar.compare(float_arg) == 0); } diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index eb2d455d9f..a2e34598b6 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -39,7 +39,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); - const auto s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); return (!s_envVar.empty() && std::strcmp(s_envVar.c_str(), float_arg) == 0); } diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index fdd642ce87..92a0582b67 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -40,7 +40,7 @@ using TestCase = std::tuple, std::string>; std::string GetFloatArg() { - const auto tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); if(tmp.empty()) { return ""; @@ -221,13 +221,13 @@ std::vector GetTestCases(const std::string& precision) TestCase{igemm_bwd, precision + flags_bwd + " --input 128 512 7 7 --weights 512 512 3 3 --pads_strides_dilations 1 1 1 1 1 1" + layout}, TestCase{igemm_bwd, precision + flags_bwd + " --input 128 64 56 56 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1"}, TestCase{igemm_bwd, precision + flags_bwd + " --input 128 64 56 56 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, - + TestCase{igemm_wrw, precision + flags_wrw + " --input 64 1024 14 14 --weights 256 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1"}, TestCase{igemm_wrw, precision + flags_wrw + " --input 64 1024 14 14 --weights 256 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, TestCase{igemm_wrw, precision + flags_wrw + " --input 256 256 14 14 --weights 256 256 3 3 --pads_strides_dilations 0 0 2 2 1 1"}, TestCase{igemm_wrw, precision + flags_wrw + " --input 256 256 14 14 --weights 256 256 3 3 --pads_strides_dilations 0 0 2 2 1 1" + layout}, TestCase{igemm_wrw, precision + flags_wrw + " --input 128 2048 7 7 --weights 512 2048 1 1 --pads_strides_dilations 0 0 1 1 1 1"}, - TestCase{igemm_wrw, precision + flags_wrw + " --input 128 2048 7 7 --weights 512 2048 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, + TestCase{igemm_wrw, precision + flags_wrw + " --input 128 2048 7 7 --weights 512 2048 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, TestCase{igemm_wrw, precision + flags_wrw + " --input 128 64 56 56 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, TestCase{igemm_wrw, precision + flags_wrw + " --input 256 1024 14 14 --weights 1024 32 1 1 --pads_strides_dilations 0 0 1 1 1 1" + groupCount_32} // clang-format on diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index 7e22e86201..e81f2a88ac 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -15,7 +15,7 @@ using TestCase = std::tuple, std::string>; std::string GetFloatArg() { - const auto tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); if(tmp.empty()) { return ""; @@ -172,13 +172,13 @@ std::vector GetTestCases(const std::string& precision) TestCase{bwd, precision + flags_bwd + " --input 128 512 7 7 --weights 512 512 3 3 --pads_strides_dilations 1 1 1 1 1 1" + layout}, TestCase{bwd, precision + flags_bwd + " --input 128 64 56 56 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1"}, TestCase{bwd, precision + flags_bwd + " --input 128 64 56 56 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, - + TestCase{wrw, precision + flags_wrw + " --input 64 1024 14 14 --weights 256 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1"}, TestCase{wrw, precision + flags_wrw + " --input 64 1024 14 14 --weights 256 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, TestCase{wrw, precision + flags_wrw + " --input 256 256 14 14 --weights 256 256 3 3 --pads_strides_dilations 0 0 2 2 1 1"}, TestCase{wrw, precision + flags_wrw + " --input 256 256 14 14 --weights 256 256 3 3 --pads_strides_dilations 0 0 2 2 1 1" + layout}, TestCase{wrw, precision + flags_wrw + " --input 128 2048 7 7 --weights 512 2048 1 1 --pads_strides_dilations 0 0 1 1 1 1"}, - TestCase{wrw, precision + flags_wrw + " --input 128 2048 7 7 --weights 512 2048 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, + TestCase{wrw, precision + flags_wrw + " --input 128 2048 7 7 --weights 512 2048 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, TestCase{wrw, precision + flags_wrw + " --input 128 64 56 56 --weights 64 64 1 1 --pads_strides_dilations 0 0 1 1 1 1" + layout}, TestCase{wrw, precision + flags_wrw + " --input 256 1024 14 14 --weights 1024 32 1 1 --pads_strides_dilations 0 0 1 1 1 1" + groupCount_32}, TestCase{wrw, precision + flags_wrw + " --input 64 1024 14 14 --weights 1024 1024 1 1 --pads_strides_dilations 0 0 1 1 1 1"} From 312a321d547d6de515b4b5c51ed21e19a1ecbbf4 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 13 Nov 2023 21:38:43 +0000 Subject: [PATCH 29/73] move generic_search env defaults into header --- src/generic_search.cpp | 25 +++---------------- .../miopen/generic_search_controls.hpp | 17 ++++++++++--- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 1c8c29f433..60c7033eb3 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -34,33 +34,14 @@ namespace miopen { namespace solver { -std::size_t GetTuningIterationsMax() -{ - return !IsUnset(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) - ? Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}) - : std::numeric_limits::max(); -} +std::size_t GetTuningIterationsMax() { return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}); } std::chrono::milliseconds GetTuningTimeMax() { - static const auto fallback = - std::chrono::duration_cast(std::chrono::hours{2}); - static const auto res = !IsUnset(MIOPEN_TUNING_TIME_MS_MAX{}) - ? std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})} - : std::chrono::milliseconds{fallback.count()}; - return res; + return std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})}; } -std::size_t GetTuningThreadsMax() -{ -#if MIOPEN_USE_COMGR - const auto def_max = 1; // COMGR is not parallelizable -#else - const int def_max = std::thread::hardware_concurrency() / 2; -#endif - return !IsUnset(MIOPEN_COMPILE_PARALLEL_LEVEL{}) ? Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}) - : def_max; -} +std::size_t GetTuningThreadsMax() { return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}); } } // namespace solver } // namespace miopen diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 87ad1d63e2..669e1ba121 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -31,9 +31,20 @@ namespace miopen { namespace solver { -MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX) -MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_TUNING_TIME_MS_MAX) -MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_COMPILE_PARALLEL_LEVEL) +MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX, + uint64_t, + std::numeric_limits::max()) +MIOPEN_DECLARE_ENV_VAR( + MIOPEN_TUNING_TIME_MS_MAX, + uint64_t, + (std::chrono::duration_cast(std::chrono::hours{2})).count()) +#if MIOPEN_USE_COMGR +MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, uint64_t, 1) // COMGR is not parallelizable +#else +MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, + uint64_t, + std::thread::hardware_concurrency() / 2) +#endif MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) } // namespace solver From cae82dc613a46b5df0704002a400b68c44da818d Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 15 Nov 2023 10:14:39 -0600 Subject: [PATCH 30/73] mergefix --- src/solver/norm/forward_layernorm2d_ck.cpp | 2 +- src/solver/norm/forward_layernorm4d_ck.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solver/norm/forward_layernorm2d_ck.cpp b/src/solver/norm/forward_layernorm2d_ck.cpp index 7b14c77429..ac3fbec273 100644 --- a/src/solver/norm/forward_layernorm2d_ck.cpp +++ b/src/solver/norm/forward_layernorm2d_ck.cpp @@ -31,7 +31,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_LAYERNORM2DCKFORWARD_CONV_CK_LN) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_LAYERNORM2DCKFORWARD_CONV_CK_LN) namespace miopen { namespace solver { diff --git a/src/solver/norm/forward_layernorm4d_ck.cpp b/src/solver/norm/forward_layernorm4d_ck.cpp index 29d706cd2c..88c6fec48f 100644 --- a/src/solver/norm/forward_layernorm4d_ck.cpp +++ b/src/solver/norm/forward_layernorm4d_ck.cpp @@ -31,7 +31,7 @@ #include #include #endif -MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_LAYERNORM4DCKFORWARD_CONV_CK_LN) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_LAYERNORM4DCKFORWARD_CONV_CK_LN) namespace miopen { namespace solver { From 2692bb8e905a843f2df27fdd96d3c0d91cf3ce38 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 16 Nov 2023 10:25:42 -0600 Subject: [PATCH 31/73] mergefix --- test/gtest/layernorm.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/gtest/layernorm.cpp b/test/gtest/layernorm.cpp index 56db7be0a6..753278510f 100644 --- a/test/gtest/layernorm.cpp +++ b/test/gtest/layernorm.cpp @@ -25,9 +25,12 @@ *******************************************************************************/ #include "layernorm.hpp" +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) + std::string GetFloatArg() { - static const auto tmp = miopen::GetEnv("MIOPEN_TEST_FLOAT_ARG"); + const auto& tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); if(tmp.empty()) { return ""; @@ -41,7 +44,7 @@ struct LayerNormTestFloat : LayerNormTest TEST_P(LayerNormTestFloat, LayerNormTestFw) { - if(!(miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL")) && (GetFloatArg() != "--float")) + if(!(miopen::IsEnabled(MIOPEN_TEST_ALL{})) || (GetFloatArg() != "--float")) { GTEST_SKIP(); } From 4f46583f16c0077d35cd1e756e5a971a550f6ffc Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 16 Nov 2023 18:32:12 +0000 Subject: [PATCH 32/73] lstm gtest --- test/CMakeLists.txt | 108 +++++++++++++------------- test/gtest/lstm.cpp | 184 ++++++++++++++++++++++++++++++++++++++++++++ test/lstm.cpp | 88 --------------------- 3 files changed, 238 insertions(+), 142 deletions(-) create mode 100644 test/gtest/lstm.cpp delete mode 100644 test/lstm.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d89a8bdffb..496ce73db3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -961,28 +961,28 @@ if(MIOPEN_TEST_DEEPBENCH) COMMAND $ --verbose --batch-size 32 --seq-len 50 --vector-len 2560 --hidden-size 2560 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 64 --seq-len 50 --vector-len 2560 --hidden-size 2560 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 128 --seq-len 50 --vector-len 2560 --hidden-size 2560 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - COMMAND $ --verbose --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill + #COMMAND $ --verbose --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 1500 --vector-len 2816 --hidden-size 2816 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 750 --vector-len 2816 --hidden-size 2816 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 375 --vector-len 2816 --hidden-size 2816 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill @@ -1054,38 +1054,38 @@ COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-se COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-hy --no-dhx ) -add_custom_test(test_lstm_extra SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-cx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx --no-dcy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-cx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx --no-dcy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-dhx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-cy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy --no-dcx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-dhx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-cy -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy --no-dcx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx -COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx -) +#add_custom_test(test_lstm_extra SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-cx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx --no-dcy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-cx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx --no-dcy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-dhx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-cy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy --no-dcx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-dhx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-cy +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy --no-dcx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx +#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx +#) add_custom_test(test_conv_extra SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED diff --git a/test/gtest/lstm.cpp b/test/gtest/lstm.cpp new file mode 100644 index 0000000000..0a42aa92c7 --- /dev/null +++ b/test/gtest/lstm.cpp @@ -0,0 +1,184 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "lstm_common.hpp" +#include "get_handle.hpp" +#include + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_LSTM_ENABLED) + +using TestCase = std::string; +struct LSTMTest : public testing::TestWithParam> +{}; + +template +struct lstm_driver : lstm_basic_driver +{ + lstm_driver() : lstm_basic_driver() + { + std::vector modes(2, 0); + modes[1] = 1; + std::vector defaultBS(1); + + this->add(this->batchSize, "batch-size", this->generate_data(get_lstm_batchSize(), {17})); + this->add(this->seqLength, "seq-len", this->generate_data(get_lstm_seq_len(), {2})); + this->add(this->inVecLen, "vector-len", this->generate_data(get_lstm_vector_len())); + this->add(this->hiddenSize, "hidden-size", this->generate_data(get_lstm_hidden_size())); + this->add(this->numLayers, "num-layers", this->generate_data(get_lstm_num_layers())); + this->add(this->nohx, "no-hx", this->flag()); + this->add(this->nodhy, "no-dhy", this->flag()); + this->add(this->nocx, "no-cx", this->flag()); + this->add(this->nodcy, "no-dcy", this->flag()); + this->add(this->nohy, "no-hy", this->flag()); + this->add(this->nodhx, "no-dhx", this->flag()); + this->add(this->nocy, "no-cy", this->flag()); + this->add(this->nodcx, "no-dcx", this->flag()); + this->add(this->flatBatchFill, "flat-batch-fill", this->flag()); + this->add(this->useDropout, "use-dropout", this->generate_data({0})); + this->add(this->usePadding, "use-padding", this->generate_data({false, true})); + +#if(MIO_LSTM_TEST_DEBUG == 3) + this->biasMode = 0; + this->dirMode = 0; + this->inputMode = 0; + this->algoMode = 0; +#else + this->add(this->inputMode, "in-mode", this->generate_data(modes)); + this->add(this->biasMode, "bias-mode", this->generate_data(modes)); + this->add(this->dirMode, "dir-mode", this->generate_data(modes)); + this->add(this->algoMode, "algo-mode", this->generate_data(modes)); +#endif + this->add( + this->batchSeq, + "batch-seq", + this->lazy_generate_data( + [=] { return generate_batchSeq(this->batchSize, this->seqLength); }, defaultBS)); + } +}; + +int RunLSTMDriver(std::string cmd)//int argc, const char* argv[]) +{ + atd::vector ptrs; + boost::split(ptrs, cmd, " "); + ptrs.insert(ptrs.begin(), "test_lstm"); +#if(MIO_RNN_TIME_EVERYTHING > 0) + auto t_start = std::chrono::high_resolution_clock::now(); +#endif + test_drive(ptrs.size(), ptrs.data()); + +#if(MIO_RNN_TIME_EVERYTHING > 0) + auto t_end = std::chrono::high_resolution_clock::now(); + + std::cout << "Wall clock: RNN test pass time: " + << std::chrono::duration(t_end - t_start).count() << " seconds." << std::endl; +#endif + exit(0); // NOLINT (concurrency-mt-unsafe) +} + + +TEST_P(LSTMTest, test_lstm_deepbench_rnn) +{ + if(miopen::IsEnabled(MIOPEN_TEST_ALL{})) + { + RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + RunLSTMDriver("--verbose --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + } + else + { + GTEST_SKIP() + } +} + +TEST_P(LSTMTest, test_lstm_extra) +{ + if(miopen::IsEnabled(MIOPEN_TEST_ALL{})) + { + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-cx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx --no-dcy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-cx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx --no-dcy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-dhx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-cy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy --no-dcx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-dhx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-cy"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy --no-dcx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"); + RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"); + } + else + { + GTEST_SKIP() + } +} + +int main(int argc, char **argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + +//const auto& handle = get_handle(); +//std::string devName = handle.getDeviceName(); +//if(miopen::StartsWith(devName,"gfx94") || miopen::StartsWith(devName, "gfx103") || miopen::StartsWith(devName, "gfx110")) diff --git a/test/lstm.cpp b/test/lstm.cpp deleted file mode 100644 index 5f51649e8f..0000000000 --- a/test/lstm.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#include "lstm_common.hpp" - -template -struct lstm_driver : lstm_basic_driver -{ - lstm_driver() : lstm_basic_driver() - { - std::vector modes(2, 0); - modes[1] = 1; - std::vector defaultBS(1); - - this->add(this->batchSize, "batch-size", this->generate_data(get_lstm_batchSize(), {17})); - this->add(this->seqLength, "seq-len", this->generate_data(get_lstm_seq_len(), {2})); - this->add(this->inVecLen, "vector-len", this->generate_data(get_lstm_vector_len())); - this->add(this->hiddenSize, "hidden-size", this->generate_data(get_lstm_hidden_size())); - this->add(this->numLayers, "num-layers", this->generate_data(get_lstm_num_layers())); - this->add(this->nohx, "no-hx", this->flag()); - this->add(this->nodhy, "no-dhy", this->flag()); - this->add(this->nocx, "no-cx", this->flag()); - this->add(this->nodcy, "no-dcy", this->flag()); - this->add(this->nohy, "no-hy", this->flag()); - this->add(this->nodhx, "no-dhx", this->flag()); - this->add(this->nocy, "no-cy", this->flag()); - this->add(this->nodcx, "no-dcx", this->flag()); - this->add(this->flatBatchFill, "flat-batch-fill", this->flag()); - this->add(this->useDropout, "use-dropout", this->generate_data({0})); - this->add(this->usePadding, "use-padding", this->generate_data({false, true})); - -#if(MIO_LSTM_TEST_DEBUG == 3) - this->biasMode = 0; - this->dirMode = 0; - this->inputMode = 0; - this->algoMode = 0; -#else - this->add(this->inputMode, "in-mode", this->generate_data(modes)); - this->add(this->biasMode, "bias-mode", this->generate_data(modes)); - this->add(this->dirMode, "dir-mode", this->generate_data(modes)); - this->add(this->algoMode, "algo-mode", this->generate_data(modes)); -#endif - this->add( - this->batchSeq, - "batch-seq", - this->lazy_generate_data( - [=] { return generate_batchSeq(this->batchSize, this->seqLength); }, defaultBS)); - } -}; - -int main(int argc, const char* argv[]) -{ -#if(MIO_RNN_TIME_EVERYTHING > 0) - auto t_start = std::chrono::high_resolution_clock::now(); -#endif - test_drive(argc, argv); - -#if(MIO_RNN_TIME_EVERYTHING > 0) - auto t_end = std::chrono::high_resolution_clock::now(); - - std::cout << "Wall clock: RNN test pass time: " - << std::chrono::duration(t_end - t_start).count() << " seconds." << std::endl; -#endif - exit(0); // NOLINT (concurrency-mt-unsafe) -} From 3cfee151eadbd0072668f308c5527e11f28605a8 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 16 Nov 2023 12:55:52 -0600 Subject: [PATCH 33/73] missing include --- test/gtest/layernorm.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/gtest/layernorm.cpp b/test/gtest/layernorm.cpp index 753278510f..01a10c7d22 100644 --- a/test/gtest/layernorm.cpp +++ b/test/gtest/layernorm.cpp @@ -23,6 +23,7 @@ * SOFTWARE. * *******************************************************************************/ +#include #include "layernorm.hpp" MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) From 65f0f89c65c277ec04acd8d537f0ee9c282baa16 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 16 Nov 2023 14:02:49 -0600 Subject: [PATCH 34/73] string fix --- test/gtest/layernorm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/layernorm.cpp b/test/gtest/layernorm.cpp index 01a10c7d22..4ceea982f8 100644 --- a/test/gtest/layernorm.cpp +++ b/test/gtest/layernorm.cpp @@ -36,7 +36,7 @@ std::string GetFloatArg() { return ""; } - return tmp.front(); + return tmp; } struct LayerNormTestFloat : LayerNormTest From f8afcc8192f03fdb77c5c6ee10fc47813e8d5ae1 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Thu, 16 Nov 2023 23:15:53 +0000 Subject: [PATCH 35/73] fixup lstm gtest --- test/gtest/lstm.cpp | 53 +++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/test/gtest/lstm.cpp b/test/gtest/lstm.cpp index 0a42aa92c7..d6e68ab493 100644 --- a/test/gtest/lstm.cpp +++ b/test/gtest/lstm.cpp @@ -26,14 +26,17 @@ #include "lstm_common.hpp" #include "get_handle.hpp" -#include +#include +#include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_LSTM_ENABLED) using TestCase = std::string; struct LSTMTest : public testing::TestWithParam> -{}; +{ +}; template struct lstm_driver : lstm_basic_driver @@ -80,15 +83,21 @@ struct lstm_driver : lstm_basic_driver } }; -int RunLSTMDriver(std::string cmd)//int argc, const char* argv[]) +int RunLSTMDriver(std::string cmd) { - atd::vector ptrs; - boost::split(ptrs, cmd, " "); + std::vector ptrs; + boost::split(ptrs, cmd, boost::is_any_of(" \t"), boost::token_compress_on); ptrs.insert(ptrs.begin(), "test_lstm"); + std::vector char_ptrs; + for(const auto& elem : ptrs) + { + char_ptrs.push_back(elem.c_str()); + } + #if(MIO_RNN_TIME_EVERYTHING > 0) auto t_start = std::chrono::high_resolution_clock::now(); #endif - test_drive(ptrs.size(), ptrs.data()); + test_drive(char_ptrs.size(), char_ptrs.data()); #if(MIO_RNN_TIME_EVERYTHING > 0) auto t_end = std::chrono::high_resolution_clock::now(); @@ -96,14 +105,18 @@ int RunLSTMDriver(std::string cmd)//int argc, const char* argv[]) std::cout << "Wall clock: RNN test pass time: " << std::chrono::duration(t_end - t_start).count() << " seconds." << std::endl; #endif + + auto capture = testing::internal::GetCapturedStderr(); + std::cout << capture; + exit(0); // NOLINT (concurrency-mt-unsafe) } - TEST_P(LSTMTest, test_lstm_deepbench_rnn) { - if(miopen::IsEnabled(MIOPEN_TEST_ALL{})) + if(miopen::IsEnabled(MIOPEN_TEST_DEEPBENCH{})) { + // clang-format off RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); @@ -126,10 +139,11 @@ TEST_P(LSTMTest, test_lstm_deepbench_rnn) RunLSTMDriver("--verbose --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); RunLSTMDriver("--verbose --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); RunLSTMDriver("--verbose --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); + // clang-format on } else { - GTEST_SKIP() + GTEST_SKIP(); } } @@ -137,6 +151,7 @@ TEST_P(LSTMTest, test_lstm_extra) { if(miopen::IsEnabled(MIOPEN_TEST_ALL{})) { + // clang-format off RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx"); RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy"); RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy"); @@ -167,18 +182,24 @@ TEST_P(LSTMTest, test_lstm_extra) RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy --no-dcx"); RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"); RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"); + // clang-format on } else { - GTEST_SKIP() + GTEST_SKIP(); } } -int main(int argc, char **argv) { - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); +std::vector GetTestCases() +{ + std::vector test_cases; + return test_cases; } -//const auto& handle = get_handle(); -//std::string devName = handle.getDeviceName(); -//if(miopen::StartsWith(devName,"gfx94") || miopen::StartsWith(devName, "gfx103") || miopen::StartsWith(devName, "gfx110")) +INSTANTIATE_TEST_SUITE_P(LSTM, LSTMTest, testing::Values(GetTestCases())); + +int main(int argc, char** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} From 9a6697692d02762bddad30dd18386cf8a051e307 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 17 Nov 2023 01:32:16 +0000 Subject: [PATCH 36/73] tidy --- test/gtest/lstm.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/gtest/lstm.cpp b/test/gtest/lstm.cpp index d6e68ab493..adac6f2c0b 100644 --- a/test/gtest/lstm.cpp +++ b/test/gtest/lstm.cpp @@ -26,6 +26,7 @@ #include "lstm_common.hpp" #include "get_handle.hpp" +#include #include #include From b422b577b737f42e3daf1eda50db173b96c5e9f9 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 17 Nov 2023 16:30:10 +0000 Subject: [PATCH 37/73] tidy --- test/gtest/lstm.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/gtest/lstm.cpp b/test/gtest/lstm.cpp index adac6f2c0b..786f0f6bb7 100644 --- a/test/gtest/lstm.cpp +++ b/test/gtest/lstm.cpp @@ -90,10 +90,9 @@ int RunLSTMDriver(std::string cmd) boost::split(ptrs, cmd, boost::is_any_of(" \t"), boost::token_compress_on); ptrs.insert(ptrs.begin(), "test_lstm"); std::vector char_ptrs; - for(const auto& elem : ptrs) - { - char_ptrs.push_back(elem.c_str()); - } + std::transform(ptrs.begin(), ptrs.end(), std::back_inserter(char_ptrs), [](const auto& str) { + return str.c_str(); + }); #if(MIO_RNN_TIME_EVERYTHING > 0) auto t_start = std::chrono::high_resolution_clock::now(); From 3a42c21accbc7017560d40ebf08ae18c4026dfcf Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 17 Nov 2023 16:38:31 +0000 Subject: [PATCH 38/73] remove commented lines --- test/CMakeLists.txt | 56 --------------------------------------------- 1 file changed, 56 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 496ce73db3..3860d261d4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -961,28 +961,6 @@ if(MIOPEN_TEST_DEEPBENCH) COMMAND $ --verbose --batch-size 32 --seq-len 50 --vector-len 2560 --hidden-size 2560 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 64 --seq-len 50 --vector-len 2560 --hidden-size 2560 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 128 --seq-len 50 --vector-len 2560 --hidden-size 2560 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill - #COMMAND $ --verbose --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 1500 --vector-len 2816 --hidden-size 2816 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 750 --vector-len 2816 --hidden-size 2816 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill COMMAND $ --verbose --batch-size 32 --seq-len 375 --vector-len 2816 --hidden-size 2816 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill @@ -1054,40 +1032,6 @@ COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-se COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-hy --no-dhx ) -#add_custom_test(test_lstm_extra SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-cx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx --no-dcy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-cx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx --no-dcy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-dhx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-cy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy --no-dcx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-dhx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-cy -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy --no-dcx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx -#COMMAND $ --verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx -#) - - add_custom_test(test_conv_extra SKIP_UNLESS_ALL GFX94X_ENABLED GFX103X_ENABLED GFX110X_ENABLED # COMMAND $ --verbose --input 1 1 1 1 --weights 1 1 2 2 --pads_strides_dilations 0 0 3 3 1 1 COMMAND $ --verbose --input 4 1 161 700 --weights 4 1 5 20 --pads_strides_dilations 0 0 2 2 1 1 From 479d97e5f4485a402701b7f03098edf68dddd46e Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Fri, 17 Nov 2023 12:04:01 -0600 Subject: [PATCH 39/73] mergefix --- src/include/miopen/convolution.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 6d080b2d3a..3351df54a0 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -108,7 +108,6 @@ struct ConvolutionAttribute if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) return static_cast( miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); - } return rounding_mode; } From 6665114a8dc401d7b8952f34737e2d5ba6a591ed Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 20 Nov 2023 02:45:40 -0600 Subject: [PATCH 40/73] address comments --- src/find_controls.cpp | 6 +++--- src/generic_search.cpp | 1 - src/include/miopen/generic_search_controls.hpp | 2 ++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 6286cb62e4..8bbf3e37c5 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -105,7 +105,7 @@ FindEnforceAction GetFindEnforceActionImpl() FindEnforceAction GetFindEnforceAction() { - static const FindEnforceAction val = GetFindEnforceActionImpl(); + const FindEnforceAction val = GetFindEnforceActionImpl(); return val; } @@ -167,7 +167,7 @@ std::ostream& operator<<(std::ostream& os, const FindEnforce& val) boost::optional> GetEnvFindOnlySolver() { - static const auto once = GetEnvFindOnlySolverImpl(); + const auto once = GetEnvFindOnlySolverImpl(); return once; } @@ -234,7 +234,7 @@ FindMode::Values GetFindModeValueImpl() FindMode::Values GetFindModeValue() { - static const FindMode::Values val = GetFindModeValueImpl(); + const FindMode::Values val = GetFindModeValueImpl(); return val; } diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 60c7033eb3..14443d829f 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -28,7 +28,6 @@ #include #include -#include #include namespace miopen { diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 669e1ba121..0351fdb99e 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -26,7 +26,9 @@ #pragma once #include +#include #include +#include namespace miopen { namespace solver { From 6be69d4fdea0452713d34dd6b1237570992d0218 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 20 Nov 2023 11:09:10 -0600 Subject: [PATCH 41/73] small performance remedy for find controls, remove log pollutants --- src/find_controls.cpp | 45 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 8bbf3e37c5..7dcecd518f 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -66,9 +66,8 @@ const char* ToCString(const FindEnforceAction mode) return ""; } -FindEnforceAction GetFindEnforceActionImpl() +FindEnforceAction GetFindEnforceActionImpl(std::string str) { - auto str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); if(str.empty()) return FindEnforceAction::Default_; for(auto& c : str) @@ -105,14 +104,21 @@ FindEnforceAction GetFindEnforceActionImpl() FindEnforceAction GetFindEnforceAction() { - const FindEnforceAction val = GetFindEnforceActionImpl(); + static FindEnforceAction val = FindEnforceAction::Default_; + static std::string prev_env_str; + const auto& str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); + if(prev_env_str != str) + { + val = GetFindEnforceActionImpl(str); + prev_env_str = str; + } + return val; } -boost::optional> GetEnvFindOnlySolverImpl() +boost::optional> GetEnvFindOnlySolverImpl(const std::string& slv_str) { static_assert(miopen::solver::Id::invalid_value == 0, "miopen::solver::Id::invalid_value == 0"); - const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); std::vector res; if(!slv_str.empty()) { @@ -167,8 +173,16 @@ std::ostream& operator<<(std::ostream& os, const FindEnforce& val) boost::optional> GetEnvFindOnlySolver() { - const auto once = GetEnvFindOnlySolverImpl(); - return once; + static boost::optional> val = boost::none; + static std::string prev_env_str; + const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); + if(prev_env_str != slv_str) + { + val = GetEnvFindOnlySolverImpl(slv_str); + prev_env_str = slv_str; + } + + return val; } namespace { @@ -192,9 +206,8 @@ std::ostream& operator<<(std::ostream& os, const FindMode::Values& v) return os << ToCString(v) << "(" << static_cast(v) << ')'; } -FindMode::Values GetFindModeValueImpl2() +FindMode::Values GetFindModeValueImpl2(std::string str) { - auto str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); if(str.empty()) return FindMode::Values::Default_; for(auto& c : str) @@ -225,16 +238,24 @@ FindMode::Values GetFindModeValueImpl2() return FindMode::Values::Default_; } -FindMode::Values GetFindModeValueImpl() +FindMode::Values GetFindModeValueImpl(const std::string& env_str) { - auto rv = GetFindModeValueImpl2(); + auto rv = GetFindModeValueImpl2(env_str); MIOPEN_LOG_NQI("MIOPEN_FIND_MODE = " << rv); return rv; } FindMode::Values GetFindModeValue() { - const FindMode::Values val = GetFindModeValueImpl(); + static FindMode::Values val = FindMode::Values::Default_; + static std::string prev_env_str; + const auto& str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); + if(prev_env_str != str) + { + val = GetFindModeValueImpl(str); + prev_env_str = slv_str; + } + return val; } From 18cc17a45d1a928e46ca19e7f70cb0d8e9fcc2fa Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 20 Nov 2023 17:12:46 +0000 Subject: [PATCH 42/73] clang format --- src/find_controls.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 7dcecd518f..5c5cd1efa1 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -109,7 +109,7 @@ FindEnforceAction GetFindEnforceAction() const auto& str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); if(prev_env_str != str) { - val = GetFindEnforceActionImpl(str); + val = GetFindEnforceActionImpl(str); prev_env_str = str; } @@ -178,7 +178,7 @@ boost::optional> GetEnvFindOnlySolver() const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); if(prev_env_str != slv_str) { - val = GetEnvFindOnlySolverImpl(slv_str); + val = GetEnvFindOnlySolverImpl(slv_str); prev_env_str = slv_str; } @@ -252,7 +252,7 @@ FindMode::Values GetFindModeValue() const auto& str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); if(prev_env_str != str) { - val = GetFindModeValueImpl(str); + val = GetFindModeValueImpl(str); prev_env_str = slv_str; } From a7b434448693be214fc81423aef41216995fbf3b Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 20 Nov 2023 19:48:01 +0000 Subject: [PATCH 43/73] revert find_controls changes --- src/find_controls.cpp | 45 ++++++++++++------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 5c5cd1efa1..6286cb62e4 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -66,8 +66,9 @@ const char* ToCString(const FindEnforceAction mode) return ""; } -FindEnforceAction GetFindEnforceActionImpl(std::string str) +FindEnforceAction GetFindEnforceActionImpl() { + auto str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); if(str.empty()) return FindEnforceAction::Default_; for(auto& c : str) @@ -104,21 +105,14 @@ FindEnforceAction GetFindEnforceActionImpl(std::string str) FindEnforceAction GetFindEnforceAction() { - static FindEnforceAction val = FindEnforceAction::Default_; - static std::string prev_env_str; - const auto& str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); - if(prev_env_str != str) - { - val = GetFindEnforceActionImpl(str); - prev_env_str = str; - } - + static const FindEnforceAction val = GetFindEnforceActionImpl(); return val; } -boost::optional> GetEnvFindOnlySolverImpl(const std::string& slv_str) +boost::optional> GetEnvFindOnlySolverImpl() { static_assert(miopen::solver::Id::invalid_value == 0, "miopen::solver::Id::invalid_value == 0"); + const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); std::vector res; if(!slv_str.empty()) { @@ -173,16 +167,8 @@ std::ostream& operator<<(std::ostream& os, const FindEnforce& val) boost::optional> GetEnvFindOnlySolver() { - static boost::optional> val = boost::none; - static std::string prev_env_str; - const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); - if(prev_env_str != slv_str) - { - val = GetEnvFindOnlySolverImpl(slv_str); - prev_env_str = slv_str; - } - - return val; + static const auto once = GetEnvFindOnlySolverImpl(); + return once; } namespace { @@ -206,8 +192,9 @@ std::ostream& operator<<(std::ostream& os, const FindMode::Values& v) return os << ToCString(v) << "(" << static_cast(v) << ')'; } -FindMode::Values GetFindModeValueImpl2(std::string str) +FindMode::Values GetFindModeValueImpl2() { + auto str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); if(str.empty()) return FindMode::Values::Default_; for(auto& c : str) @@ -238,24 +225,16 @@ FindMode::Values GetFindModeValueImpl2(std::string str) return FindMode::Values::Default_; } -FindMode::Values GetFindModeValueImpl(const std::string& env_str) +FindMode::Values GetFindModeValueImpl() { - auto rv = GetFindModeValueImpl2(env_str); + auto rv = GetFindModeValueImpl2(); MIOPEN_LOG_NQI("MIOPEN_FIND_MODE = " << rv); return rv; } FindMode::Values GetFindModeValue() { - static FindMode::Values val = FindMode::Values::Default_; - static std::string prev_env_str; - const auto& str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); - if(prev_env_str != str) - { - val = GetFindModeValueImpl(str); - prev_env_str = slv_str; - } - + static const FindMode::Values val = GetFindModeValueImpl(); return val; } From a06e4643430ee87e211ca7ced5fd756cd5876dc9 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Tue, 21 Nov 2023 00:02:13 +0000 Subject: [PATCH 44/73] env namespace --- driver/CBAInferFusion_driver.hpp | 2 +- driver/conv_driver.hpp | 8 ++-- driver/random.hpp | 2 +- src/binary_cache.cpp | 4 +- src/check_numerics.cpp | 10 ++-- src/comgr.cpp | 28 +++++------ src/conv/solver_finders.cpp | 26 +++++----- src/convolution.cpp | 20 ++++---- src/db_path.cpp.in | 4 +- src/execution_context.cpp | 14 +++--- src/expanduser.cpp | 2 +- src/find_controls.cpp | 6 +-- src/gemm_v2.cpp | 2 +- src/generic_search.cpp | 6 +-- src/hip/handlehip.cpp | 2 +- src/hip/hip_build_utils.cpp | 4 +- src/hipoc/hipoc_kernel.cpp | 2 +- src/hipoc/hipoc_program.cpp | 12 ++--- src/include/miopen/convolution.hpp | 16 +++---- src/include/miopen/env.hpp | 10 +++- src/include/miopen/find_db.hpp | 4 +- src/include/miopen/generic_search.hpp | 2 +- .../miopen/generic_search_controls.hpp | 6 --- .../miopen/solver/ck_utility_common.hpp | 8 ++-- .../miopen/solver/implicitgemm_util.hpp | 8 ++-- src/include/miopen/sqlite_db.hpp | 9 ++-- src/kernel_cache.cpp | 2 +- src/logger.cpp | 24 +++++----- src/mlo_dir_conv.cpp | 8 ++-- src/ocl/clhelper.cpp | 2 +- src/ocl/convolutionocl.cpp | 18 +++---- src/ocl/gcn_asm_utils.cpp | 2 +- src/ocl/rnnocl.cpp | 2 +- src/ocl_kernel.cpp | 2 +- src/reducetensor.cpp | 8 ++-- src/solver.cpp | 2 +- src/solver/batchnorm/backward_ck.cpp | 2 +- .../backward_per_activation_fused.cpp | 2 +- src/solver/batchnorm/forward_inference_ck.cpp | 2 +- .../batchnorm/forward_inference_fused.cpp | 2 +- .../forward_per_activation_fused.cpp | 2 +- src/solver/batchnorm/forward_training_ck.cpp | 2 +- src/solver/conv_MP_bidirectional_winograd.cpp | 48 +++++++++---------- src/solver/conv_asm_1x1u.cpp | 10 ++-- src/solver/conv_asm_1x1u_bias_activ_fused.cpp | 2 +- src/solver/conv_asm_1x1u_stride2.cpp | 8 ++-- src/solver/conv_asm_3x3u.cpp | 4 +- src/solver/conv_asm_5x10u2v2b1.cpp | 2 +- src/solver/conv_asm_5x10u2v2f1.cpp | 2 +- .../conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp | 2 +- src/solver/conv_asm_dir_BwdWrW1x1.cpp | 6 +-- src/solver/conv_asm_dir_BwdWrW3x3.cpp | 8 ++-- ...onv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp | 2 +- src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp | 2 +- .../conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp | 6 +-- src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp | 4 +- .../conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp | 2 +- .../conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp | 6 +-- .../conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp | 6 +-- .../conv_asm_implicit_gemm_v4r1_dynamic.cpp | 4 +- ...m_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp | 2 +- ...onv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp | 2 +- src/solver/conv_bin_wino3x3U.cpp | 2 +- src/solver/conv_bin_winoRxS.cpp | 8 ++-- src/solver/conv_bin_winoRxS_fused.cpp | 4 +- .../conv_ck_igemm_fwd_bias_activ_fused.cpp | 2 +- .../conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp | 4 +- src/solver/conv_direct_naive_conv.cpp | 4 +- src/solver/conv_direct_naive_conv_bwd.cpp | 2 +- src/solver/conv_direct_naive_conv_fwd.cpp | 2 +- src/solver/conv_direct_naive_conv_wrw.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 4 +- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 4 +- ...conv_hip_implicit_gemm_bwd_data_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_bwd_v1r1.cpp | 4 +- ...conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp | 8 ++-- .../conv_hip_implicit_gemm_bwd_v4r1.cpp | 4 +- ...conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp | 14 +++--- .../conv_hip_implicit_gemm_fwd_v4r1.cpp | 4 +- .../conv_hip_implicit_gemm_fwd_v4r4.cpp | 2 +- ...conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp | 18 +++---- ...licit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp | 18 +++---- ...conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp | 18 +++---- .../conv_hip_implicit_gemm_fwd_xdlops.cpp | 2 +- ...v_hip_implicit_gemm_grouped_fwd_xdlops.cpp | 2 +- .../conv_hip_implicit_gemm_wrw_v4r4.cpp | 2 +- ...conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp | 6 +-- ...licit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp | 6 +-- src/solver/conv_mlir_igemm_bwd.cpp | 2 +- src/solver/conv_mlir_igemm_bwd_xdlops.cpp | 2 +- src/solver/conv_mlir_igemm_fwd.cpp | 2 +- src/solver/conv_mlir_igemm_fwd_xdlops.cpp | 2 +- src/solver/conv_mlir_igemm_wrw.cpp | 2 +- src/solver/conv_mlir_igemm_wrw_xdlops.cpp | 2 +- src/solver/conv_multipass_wino3x3WrW.cpp | 38 +++++++-------- src/solver/conv_ocl_dir2D11x11.cpp | 2 +- src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp | 4 +- src/solver/conv_ocl_dir2D_bwdWrW_2.cpp | 4 +- src/solver/conv_ocl_dir2D_bwdWrW_53.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd.cpp | 2 +- src/solver/conv_ocl_dir2Dfwd1x1.cpp | 4 +- src/solver/conv_ocl_dir2Dfwdgen.cpp | 2 +- src/solver/conv_winoRxS.cpp | 13 ++--- src/solver/conv_winoRxS_fused.cpp | 2 +- src/solver/conv_wino_fury_RxS.cpp | 4 +- src/solver/fft.cpp | 4 +- src/solver/gemm.cpp | 10 ++-- src/solver/gemm_bwd.cpp | 6 +-- src/solver/gemm_wrw.cpp | 4 +- src/solver/norm/forward_layernorm2d_ck.cpp | 2 +- src/solver/norm/forward_layernorm4d_ck.cpp | 2 +- src/target_properties.cpp | 4 +- src/tmp_dir.cpp | 4 +- test/driver.hpp | 2 +- test/gpu_conv.hpp | 6 +-- test/gtest/conv_embed_db.cpp | 2 +- test/gtest/conv_hip_igemm_xdlops.cpp | 6 +-- test/gtest/conv_igemm_dynamic.cpp | 2 +- test/gtest/conv_igemm_mlir.cpp | 8 ++-- test/gtest/conv_igemm_mlir_xdlops.cpp | 6 +-- test/gtest/conv_trans.cpp | 2 +- test/gtest/layernorm.cpp | 2 +- 123 files changed, 372 insertions(+), 366 deletions(-) diff --git a/driver/CBAInferFusion_driver.hpp b/driver/CBAInferFusion_driver.hpp index c50464f108..b24692c6cd 100644 --- a/driver/CBAInferFusion_driver.hpp +++ b/driver/CBAInferFusion_driver.hpp @@ -608,7 +608,7 @@ int CBAInferFusionDriver::AllocateBuffersAndCopy() else out_sz = in_sz; // This is for N+A so the output is the same as the input size - if(miopen::IsEnabled(MIOPEN_DRIVER_PAD_BUFFERS_2M{})) + if(miopen::IsEnabled(ENV(MIOPEN_DRIVER_PAD_BUFFERS_2M))) { PadBufferSize(wei_sz, sizeof(Tgpu)); } diff --git a/driver/conv_driver.hpp b/driver/conv_driver.hpp index e971605293..9c2a3dee19 100644 --- a/driver/conv_driver.hpp +++ b/driver/conv_driver.hpp @@ -1243,12 +1243,12 @@ int ConvDriver::AllocateBuffersAndCopy() size_t in_sz = GetTensorSize(inputTensor); size_t wei_sz = GetTensorSize(weightTensor); size_t out_sz = GetTensorSize(outputTensor); - auto subnorm_percentage = miopen::Value(MIOPEN_DRIVER_SUBNORM_PERCENTAGE{}); + auto subnorm_percentage = miopen::Value(ENV(MIOPEN_DRIVER_SUBNORM_PERCENTAGE)); if(subnorm_percentage != 0) std::cout << "MIOPEN_DRIVER_SUBNORM_PERCENTAGE = " << subnorm_percentage << std::endl; // Workaround: Pad buffers allocations to be a multiple of 2M - if(miopen::IsEnabled(MIOPEN_DRIVER_PAD_BUFFERS_2M{})) + if(miopen::IsEnabled(ENV(MIOPEN_DRIVER_PAD_BUFFERS_2M))) { // PadBufferSize(in_sz, sizeof(Tgpu)); PadBufferSize(wei_sz, sizeof(Tgpu)); @@ -1274,7 +1274,7 @@ int ConvDriver::AllocateBuffersAndCopy() size_t warmup_in_sz = GetTensorSize(warmupInputTensor); size_t warmup_wei_sz = GetTensorSize(warmupWeightTensor); size_t warmup_out_sz = GetTensorSize(warmupOutputTensor); - if(miopen::IsEnabled(MIOPEN_DRIVER_PAD_BUFFERS_2M{})) + if(miopen::IsEnabled(ENV(MIOPEN_DRIVER_PAD_BUFFERS_2M))) { PadBufferSize(warmup_wei_sz, sizeof(warmup_Tgpu)); PadBufferSize(warmup_out_sz, sizeof(warmup_Tgpu)); @@ -1607,7 +1607,7 @@ int ConvDriver::AllocateBuffersAndCopy() template bool ConvDriver::UseGPUReference() { - if(!miopen::IsDisabled(MIOPEN_DRIVER_USE_GPU_REFERENCE{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DRIVER_USE_GPU_REFERENCE))) { if((miopen_type{} == miopenFloat && (miopen_type{} == miopenFloat || miopen_type{} == miopenHalf || diff --git a/driver/random.hpp b/driver/random.hpp index 19757c1830..66bbfaab80 100644 --- a/driver/random.hpp +++ b/driver/random.hpp @@ -13,7 +13,7 @@ using glibc_gen = std::linear_congruential_engine #include -namespace miopen { - MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_CHECK_NUMERICS) +namespace miopen { + bool CheckNumericsEnabled(const int bitMask) { - return (miopen::Value(MIOPEN_CHECK_NUMERICS{}) & bitMask) != 0; + return (miopen::Value(ENV(MIOPEN_CHECK_NUMERICS)) & bitMask) != 0; } // Must keep this structure synchronized with one in MIOpenCheckNumerics @@ -140,7 +140,7 @@ bool checkNumericsImpl( bool checkNumericsInput(const Handle& handle, const TensorDescriptor& dDesc, ConstData_t data) { return checkNumericsImpl( - handle, static_cast(miopen::Value(MIOPEN_CHECK_NUMERICS{})), dDesc, data, true); + handle, static_cast(miopen::Value(ENV(MIOPEN_CHECK_NUMERICS))), dDesc, data, true); } // Synchronizes to wait for kernel to finish, then checks data for output: @@ -150,7 +150,7 @@ bool checkNumericsOutput(const Handle& handle, const TensorDescriptor& dDesc, Co handle.Finish(); return checkNumericsImpl( - handle, static_cast(miopen::Value(MIOPEN_CHECK_NUMERICS{})), dDesc, data, false); + handle, static_cast(miopen::Value(ENV(MIOPEN_CHECK_NUMERICS))), dDesc, data, false); } } // namespace miopen diff --git a/src/comgr.cpp b/src/comgr.cpp index 9f29542425..4ee5f635be 100644 --- a/src/comgr.cpp +++ b/src/comgr.cpp @@ -151,7 +151,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE) << GetStatusText(status)); \ (action); \ } \ - else if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_LOG_CALLS{})) \ + else if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_LOG_CALLS))) \ MIOPEN_LOG_I("Ok \'" #comgrcall "\' " << to_string(info)); \ } while(false) @@ -227,7 +227,7 @@ static void AddCompilerOptions(OptionList& list, const miopen::TargetProperties& #endif list.push_back("-mllvm"); list.push_back("-amdgpu-prelink"); - if(miopen::IsEnabled(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP))) { list.push_back("-mwavefrontsize64"); list.push_back("-mcumode"); @@ -268,14 +268,14 @@ static void RemoveOptionsUnwanted(OptionList& list) namespace hip { #if PCH_IS_SUPPORTED -static bool IsPchEnabled() { return !miopen::IsDisabled(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE{}); } +static bool IsPchEnabled() { return !miopen::IsDisabled(ENV(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE)); } #endif static std::string GetPchEnableStatus() { #if PCH_IS_SUPPORTED auto rv = std::string{IsPchEnabled() ? "1" : "0"}; - if(miopen::IsDisabled(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_COMGR_HIP_PCH_ENFORCE))) return rv += " (enforced)"; return rv; #else @@ -322,7 +322,7 @@ static void RemoveCompilerOptionsUnwanted(OptionList& list) list.erase(remove_if(list.begin(), list.end(), [&](const auto& option) { // clang-format off - return (!miopen::IsEnabled(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN{}) + return (!miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN)) && (IsLinkerOption(option))); // clang-format on }), list.end()); @@ -477,7 +477,7 @@ static std::string GetStatusText(const amd_comgr_status_t status, const bool unk static void LogOptions(const char* options[], size_t count) { - static const auto control = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_OPTIONS{}); + static const auto control = miopen::Value(ENV(MIOPEN_DEBUG_COMGR_LOG_OPTIONS)); if(!(control != 0 && miopen::IsLogging(miopen::LoggingLevel::Info))) return; if(control == 2) @@ -621,12 +621,12 @@ class Dataset : ComgrOwner const amd_comgr_data_kind_t type) const { const Data d(type); - if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES))) MIOPEN_LOG_I(name << ' ' << content.size() << " bytes"); d.SetName(name); d.SetBytes(content); AddData(d); - const auto show_first = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT{}); + const auto show_first = miopen::Value(ENV(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT)); if(show_first > 0 && miopen::IsLogging(miopen::LoggingLevel::Info) && (type == AMD_COMGR_DATA_KIND_SOURCE || type == AMD_COMGR_DATA_KIND_INCLUDE)) { @@ -640,7 +640,7 @@ class Dataset : ComgrOwner { const char name[] = "hip.pch"; const Data d(AMD_COMGR_DATA_KIND_PRECOMPILED_HEADER); - if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES))) { MIOPEN_LOG_I(name << ' ' << size << " bytes, ptr = " << static_cast(content)); @@ -766,7 +766,7 @@ static void SetIsaName(const ActionInfo& action, static std::string GetDebugCompilerOptionsInsert() { - const auto& p = miopen::GetStringEnv(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT{}); + const auto& p = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_COMGR_COMPILER_OPTIONS_INSERT)); return {p}; } @@ -813,7 +813,7 @@ void BuildHip(const std::string& name, action.SetLogging(true); const Dataset exe; - if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_HIP_BUILD_FATBIN))) { auto raw = options // + " " + GetDebugCompilerOptionsInsert() // @@ -1106,7 +1106,7 @@ static std::string GetStatusText(const hiprtcResult status) MIOPEN_LOG_E("\'" #call "\' " << to_string(info) << ": " << GetStatusText(status)); \ (action); \ } \ - else if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_LOG_CALLS{})) \ + else if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_LOG_CALLS))) \ MIOPEN_LOG_I("Ok \'" #call "\' " << to_string(info)); \ } while(false) @@ -1241,11 +1241,11 @@ class HiprtcProgram private: void LogInputFile(const std::string& name, const std::string& content) { - if(miopen::IsEnabled(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_COMGR_LOG_SOURCE_NAMES))) MIOPEN_LOG_I(name << ' ' << content.size() << " bytes"); if(miopen::IsLogging(miopen::LoggingLevel::Info)) { - const auto show_first = miopen::Value(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT{}); + const auto show_first = miopen::Value(ENV(MIOPEN_DEBUG_COMGR_LOG_SOURCE_TEXT)); if(show_first > 0) { const auto text_length = diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index 3229f9e700..9eeb31a025 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -32,8 +32,6 @@ #include #include -namespace miopen { - MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEVICE_ARCH) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_GEMM) @@ -42,6 +40,8 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_WINOGRAD) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_FFT) +namespace miopen { + namespace conv { namespace { @@ -58,7 +58,7 @@ class DirectSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, @@ -85,7 +85,7 @@ class ImplicitGemmSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, @@ -114,7 +114,7 @@ class FftSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, @@ -139,7 +139,7 @@ class GemmSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, @@ -164,7 +164,7 @@ class WinogradSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, @@ -209,7 +209,7 @@ static void EvaluateInvokers(Handle& handle, const AnyInvokeParams& invoke_ctx, DbRecord& record) { - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(!arch.empty()) return; @@ -325,15 +325,15 @@ bool IsAlgorithmDisabled(miopenConvAlgorithm_t algo) switch(algo) { // clang-format off case miopenConvolutionAlgoGEMM: - return !MIOPEN_USE_GEMM || miopen::IsDisabled(MIOPEN_DEBUG_CONV_GEMM{}); + return !MIOPEN_USE_GEMM || miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_GEMM)); case miopenConvolutionAlgoDirect: - return miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT{}); + return miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT)); case miopenConvolutionAlgoFFT: - return miopen::IsDisabled(MIOPEN_DEBUG_CONV_FFT{}); + return miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_FFT)); case miopenConvolutionAlgoWinograd: - return miopen::IsDisabled(MIOPEN_DEBUG_CONV_WINOGRAD{}); + return miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_WINOGRAD)); case miopenConvolutionAlgoImplicitGEMM: - return miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM{}); + return miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM)); default: // Disable future algos by default to enforce explicit handling: return true; } // clang-format on diff --git a/src/convolution.cpp b/src/convolution.cpp index 4507b23ab1..5653477fe3 100644 --- a/src/convolution.cpp +++ b/src/convolution.cpp @@ -79,7 +79,7 @@ std::size_t GetWorkSpaceSizeGEMM(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { #if MIOPEN_USE_GEMM - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_GEMM{}) || + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_GEMM)) || miopen::any_of(problem.GetConv().GetConvDilations(), [](auto v) { return v > 1; })) return 0; @@ -94,7 +94,7 @@ std::size_t GetWorkSpaceSizeGEMM(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeImplicitGemm(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM))) return 0; return GetMaxWorkSpaceSize(FindAllImplicitGemmWorkspaceSizes(ctx, problem)); } @@ -102,7 +102,7 @@ std::size_t GetWorkSpaceSizeImplicitGemm(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeDirect(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT))) return 0; return GetMaxWorkSpaceSize(AllDirectForwardBackwardDataWorkspaceSize(ctx, problem)); } @@ -110,7 +110,7 @@ std::size_t GetWorkSpaceSizeDirect(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeFFT(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_FFT{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_FFT))) return 0; return GetMaxWorkSpaceSize(AllFFTForwardBackwardDataWorkspaceSize(ctx, problem)); } @@ -118,7 +118,7 @@ std::size_t GetWorkSpaceSizeFFT(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeWinograd(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_WINOGRAD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_WINOGRAD))) return 0; return GetMaxWorkSpaceSize(FindAllWinogradWorkspaceSizes(ctx, problem)); } @@ -126,7 +126,7 @@ std::size_t GetWorkSpaceSizeWinograd(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeDirectWrW(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT))) return 0; return GetMaxWorkSpaceSize(AllDirectBwdWrW2DWorkspaceSize(ctx, problem)); } @@ -134,7 +134,7 @@ std::size_t GetWorkSpaceSizeDirectWrW(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeWinogradWrW(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_WINOGRAD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_WINOGRAD))) return 0; return GetMaxWorkSpaceSize(FindWinogradWrWWorkspaceSizes(ctx, problem)); } @@ -142,7 +142,7 @@ std::size_t GetWorkSpaceSizeWinogradWrW(const miopen::ExecutionContext& ctx, std::size_t GetWorkSpaceSizeImplicitGemmWrW(const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM))) return 0; return GetMaxWorkSpaceSize(FindImplicitGemmWrWWorkspaceSizes(ctx, problem)); } @@ -385,7 +385,7 @@ TensorDescriptor ConvolutionDescriptor::GetForwardOutputTensor(const TensorDescr bool ConvolutionDescriptor::IsWinograd3x3SupportedAndFast( const miopen::ExecutionContext& ctx, const conv::ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_WINOGRAD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_WINOGRAD))) return false; // Disable this performance optimization when we want to run some specific Solver. @@ -424,7 +424,7 @@ std::size_t ConvolutionDescriptor::GetWorkSpaceSize(ExecutionContext ctx, auto fallback = bool{}; const auto solutions = GetSolutions(ctx, problem, 1, &fallback); if(solutions.empty() || ((findMode.IsHybrid(ctx) && fallback) && - !miopen::IsEnabled(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK{}))) + !miopen::IsEnabled(ENV(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK)))) { ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(ctx); break; // Fall down to Normal Find. diff --git a/src/db_path.cpp.in b/src/db_path.cpp.in index 2526e0e456..53a3886d67 100644 --- a/src/db_path.cpp.in +++ b/src/db_path.cpp.in @@ -65,7 +65,7 @@ boost::filesystem::path GetLibPath() std::string GetSystemDbPath() { - auto p = GetStringEnv(MIOPEN_SYSTEM_DB_PATH{}); + auto p = GetStringEnv(ENV(MIOPEN_SYSTEM_DB_PATH)); if(p.empty()) #if MIOPEN_BUILD_DEV { @@ -89,7 +89,7 @@ boost::filesystem::path PrepareUserDbPath() { /// If MIOPEN_USER_DB_PATH is set in the environment, then assume that the user wants /// the library to use exactly that path. - const auto p = GetStringEnv(MIOPEN_USER_DB_PATH{}); + const auto p = GetStringEnv(ENV(MIOPEN_USER_DB_PATH)); if(!p.empty()) return ExpandUser(p); /// \anchor nfs-detection diff --git a/src/execution_context.cpp b/src/execution_context.cpp index fec509e6af..bfecefc56e 100644 --- a/src/execution_context.cpp +++ b/src/execution_context.cpp @@ -85,7 +85,7 @@ static std::ostream& operator<<(std::ostream& os, const rocm_meta_version& rmv) bool rocm_meta_version::UseV3() const { if(val == AMDHSA_COv2_COv3) - return !miopen::IsEnabled(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER{}); + return !miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_ROCM_METADATA_PREFER_OLDER)); return (val == AMDHSA_COv3); } @@ -137,7 +137,7 @@ static bool CalculateIsAmdRocmOpencl(const miopen::ExecutionContext& context) static rocm_meta_version AmdRocmMetadataVersionGetEnv() { const rocm_meta_version val( - static_cast(miopen::Value(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE{}))); + static_cast(miopen::Value(ENV(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE)))); if(!val.IsValid()) { MIOPEN_LOG_W("Incorrect MIOPEN_DEBUG_AMD_ROCM_ENFORCE_MDVERSION = " << val.getValue() @@ -207,9 +207,9 @@ static bool IsAmdRocmOpencl(miopen::ExecutionContext& context) bool IsHipKernelsEnabled() { #if MIOPEN_USE_HIP_KERNELS - return !miopen::IsDisabled(MIOPEN_DEBUG_HIP_KERNELS{}); + return !miopen::IsDisabled(ENV(MIOPEN_DEBUG_HIP_KERNELS)); #else - return miopen::IsEnabled(MIOPEN_DEBUG_HIP_KERNELS{}); + return miopen::IsEnabled(ENV(MIOPEN_DEBUG_HIP_KERNELS)); #endif } @@ -218,13 +218,13 @@ void ExecutionContext::DetectRocm() use_binaries = false; use_asm_kernels = false; use_hip_kernels = IsHipKernelsEnabled(); - use_opencl_convolutions = !IsDisabled(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS{}); + use_opencl_convolutions = !IsDisabled(ENV(MIOPEN_DEBUG_OPENCL_CONVOLUTIONS)); rmv = rocm_meta_version::Default; if(IsAmdRocmOpencl(*this)) { - use_asm_kernels = !IsDisabled(MIOPEN_DEBUG_GCN_ASM_KERNELS{}) && ValidateGcnAssembler(); + use_asm_kernels = !IsDisabled(ENV(MIOPEN_DEBUG_GCN_ASM_KERNELS)) && ValidateGcnAssembler(); #ifndef HIP_OC_FINALIZER - use_binaries = !IsDisabled(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES{}); + use_binaries = !IsDisabled(ENV(MIOPEN_DEBUG_AMD_ROCM_PRECOMPILED_BINARIES)); #endif } } diff --git a/src/expanduser.cpp b/src/expanduser.cpp index 1182d9248c..4cdbd2d43e 100644 --- a/src/expanduser.cpp +++ b/src/expanduser.cpp @@ -181,7 +181,7 @@ bool IsNetworkedFilesystem(const boost::filesystem::path& path_) namespace { std::string GetHomeDir() { - const auto p = GetStringEnv(HOME{}); + const auto p = GetStringEnv(ENV(HOME)); if(!(p.empty() || p == std::string("/"))) { return p; diff --git a/src/find_controls.cpp b/src/find_controls.cpp index 6286cb62e4..c9a651f5a7 100644 --- a/src/find_controls.cpp +++ b/src/find_controls.cpp @@ -68,7 +68,7 @@ const char* ToCString(const FindEnforceAction mode) FindEnforceAction GetFindEnforceActionImpl() { - auto str = miopen::GetStringEnv(MIOPEN_FIND_ENFORCE{}); + auto str = miopen::GetStringEnv(ENV(MIOPEN_FIND_ENFORCE)); if(str.empty()) return FindEnforceAction::Default_; for(auto& c : str) @@ -112,7 +112,7 @@ FindEnforceAction GetFindEnforceAction() boost::optional> GetEnvFindOnlySolverImpl() { static_assert(miopen::solver::Id::invalid_value == 0, "miopen::solver::Id::invalid_value == 0"); - const auto& slv_str = miopen::GetStringEnv(MIOPEN_DEBUG_FIND_ONLY_SOLVER{}); + const auto& slv_str = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_FIND_ONLY_SOLVER)); std::vector res; if(!slv_str.empty()) { @@ -194,7 +194,7 @@ std::ostream& operator<<(std::ostream& os, const FindMode::Values& v) FindMode::Values GetFindModeValueImpl2() { - auto str = miopen::GetStringEnv(MIOPEN_FIND_MODE{}); + auto str = miopen::GetStringEnv(ENV(MIOPEN_FIND_MODE)); if(str.empty()) return FindMode::Values::Default_; for(auto& c : str) diff --git a/src/gemm_v2.cpp b/src/gemm_v2.cpp index c973aa1f93..fa1969bfb3 100644 --- a/src/gemm_v2.cpp +++ b/src/gemm_v2.cpp @@ -339,7 +339,7 @@ static GemmBackend_t enforce_gemm_backend(miopenDataType_t data_type, // enforce backend based on env variable // I have left the commented lines here to preserve values for the enforce and hint at why are // they 1 and 3 - switch(Value(MIOPEN_GEMM_ENFORCE_BACKEND{})) + switch(Value(ENV(MIOPEN_GEMM_ENFORCE_BACKEND))) { case 1: gemm_backend_env = GemmBackend_t::rocblas; break; // case 2: gemm_backend_env = GemmBackend_t::miopengemm; break; diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 14443d829f..7a67ab99f6 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -33,14 +33,14 @@ namespace miopen { namespace solver { -std::size_t GetTuningIterationsMax() { return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}); } +std::size_t GetTuningIterationsMax() { return Value(ENV(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX)); } std::chrono::milliseconds GetTuningTimeMax() { - return std::chrono::milliseconds{Value(MIOPEN_TUNING_TIME_MS_MAX{})}; + return std::chrono::milliseconds{Value(ENV(MIOPEN_TUNING_TIME_MS_MAX))}; } -std::size_t GetTuningThreadsMax() { return Value(MIOPEN_COMPILE_PARALLEL_LEVEL{}); } +std::size_t GetTuningThreadsMax() { return Value(ENV(MIOPEN_COMPILE_PARALLEL_LEVEL)); } } // namespace solver } // namespace miopen diff --git a/src/hip/handlehip.cpp b/src/hip/handlehip.cpp index e5582f4125..e071782463 100644 --- a/src/hip/handlehip.cpp +++ b/src/hip/handlehip.cpp @@ -642,7 +642,7 @@ std::size_t Handle::GetGlobalMemorySize() const std::size_t Handle::GetMaxComputeUnits() const { - const std::size_t num_cu = Value(MIOPEN_DEVICE_CU{}); + const std::size_t num_cu = Value(ENV(MIOPEN_DEVICE_CU)); if(num_cu > 0) return num_cu; diff --git a/src/hip/hip_build_utils.cpp b/src/hip/hip_build_utils.cpp index a7ec36345a..552618f1ae 100644 --- a/src/hip/hip_build_utils.cpp +++ b/src/hip/hip_build_utils.cpp @@ -91,11 +91,11 @@ static boost::filesystem::path HipBuildImpl(boost::optional& tmp_dir, #endif #if MIOPEN_BUILD_DEV - if(miopen::IsEnabled(MIOPEN_DEBUG_HIP_VERBOSE{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_HIP_VERBOSE))) { params += " -v"; } - if(miopen::IsEnabled(MIOPEN_DEBUG_HIP_DUMP{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_HIP_DUMP))) { params += " -gline-tables-only"; params += " -save-temps"; diff --git a/src/hipoc/hipoc_kernel.cpp b/src/hipoc/hipoc_kernel.cpp index e3876ed134..1b72ebfc7c 100644 --- a/src/hipoc/hipoc_kernel.cpp +++ b/src/hipoc/hipoc_kernel.cpp @@ -79,7 +79,7 @@ void HIPOCKernelInvoke::run(void* args, std::size_t size) const stop = make_hip_event(); } - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(!arch.empty()) { MIOPEN_THROW("MIOPEN_DEVICE_ARCH used, escaping launching kernel"); diff --git a/src/hipoc/hipoc_program.cpp b/src/hipoc/hipoc_program.cpp index 1af5ecc33a..b66a848966 100644 --- a/src/hipoc/hipoc_program.cpp +++ b/src/hipoc/hipoc_program.cpp @@ -73,7 +73,7 @@ namespace { int DetectCodeObjectOptionSyntax() { - auto syntax = miopen::Value(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION{}); + auto syntax = miopen::Value(ENV(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION)); if(syntax > 4) { MIOPEN_LOG_E("Bad MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_OPTION, using default"); @@ -94,7 +94,7 @@ int DetectCodeObjectOptionSyntax() int DetectCodeObjectVersion() { - auto co_version = miopen::Value(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION{}); + auto co_version = miopen::Value(ENV(MIOPEN_DEBUG_OPENCL_ENFORCE_CODE_OBJECT_VERSION)); // Very basic syntax check: if(co_version == 1 || co_version > 4) { @@ -190,7 +190,7 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, const std::string& blob) : program(program_name) ///, module(CreateModuleInMem(blob)) { - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(!arch.empty()) return; module = CreateModuleInMem(blob); @@ -210,7 +210,7 @@ HIPOCProgramImpl::HIPOCProgramImpl(const std::string& program_name, } else { - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(arch.empty()) { module = CreateModule(hsaco_file); @@ -251,7 +251,7 @@ void HIPOCProgramImpl::BuildCodeObjectInFile(std::string& params, else { params += " " + GetCodeObjectVersionOption(); - if(miopen::IsEnabled(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP))) params += " -mwavefrontsize64 -mcumode"; WriteFile(src, dir->path / filename); params += " -target amdgcn-amd-amdhsa -x cl -D__AMD__=1 -O3"; @@ -285,7 +285,7 @@ void HIPOCProgramImpl::BuildCodeObjectInMemory(const std::string& params, if(miopen::EndsWith(filename, ".cpp")) { #if MIOPEN_USE_HIPRTC - if(!miopen::IsDisabled(MIOPEN_DEBUG_USE_HIPRTC{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_USE_HIPRTC))) hiprtc::BuildHip(filename, src, params, target, binary); else #endif // MIOPEN_USE_HIPRTC diff --git a/src/include/miopen/convolution.hpp b/src/include/miopen/convolution.hpp index 3351df54a0..340414d605 100644 --- a/src/include/miopen/convolution.hpp +++ b/src/include/miopen/convolution.hpp @@ -76,8 +76,8 @@ struct ConvolutionAttribute inline int Get() const { - if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{})) - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL{}); + if(!miopen::IsUnset(ENV(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL))) + return miopen::Value(ENV(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP16_ALT_IMPL)); return value; } @@ -105,17 +105,17 @@ struct ConvolutionAttribute inline miopenF8RoundingMode_t Get() const { - if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})) + if(!miopen::IsUnset(ENV(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE))) return static_cast( - miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE{})); + miopen::Value(ENV(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE))); return rounding_mode; } inline uint32_t GetSeed() const { // assert(rounding_mode == miopenF8RoundingModeStochastic); - if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{})) - return miopen::Value(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED{}); + if(!miopen::IsUnset(ENV(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED))) + return miopen::Value(ENV(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED)); return seed; } @@ -130,9 +130,9 @@ struct ConvolutionAttribute public: inline int Get() const { - if(!miopen::IsUnset(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) + if(!miopen::IsUnset(ENV(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC))) return static_cast( - miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})); + miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC))); return value; } operator bool() const diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 140dc9d46d..c8e28e1b9d 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -128,16 +128,21 @@ struct EnvVar // static inside function hides the variable and provides // thread-safety/locking +// declare in global namespace #define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ + namespace miopen::env { \ struct name \ { \ + static_assert(std::is_same_v, \ + "must be in miopen::env and must be unique"); \ using value_type = type; \ static miopen::internal::EnvVar& Ref() \ { \ static miopen::internal::EnvVar var{#name, default_val}; \ return var; \ } \ - }; + }; \ + } #define MIOPEN_DECLARE_ENV_VAR_BOOL(name) MIOPEN_DECLARE_ENV_VAR(name, bool, false) @@ -145,6 +150,9 @@ struct EnvVar #define MIOPEN_DECLARE_ENV_VAR_STR(name) MIOPEN_DECLARE_ENV_VAR(name, std::string, "") +#define ENV(name) \ + miopen::env::name {} + /// \todo the following functions should be renamed to either include the word Env /// or put inside a namespace 'env'. Right now we have a function named Value() /// that returns env var value as only 64-bit ints diff --git a/src/include/miopen/find_db.hpp b/src/include/miopen/find_db.hpp index becdd014c8..70c8b7b13a 100644 --- a/src/include/miopen/find_db.hpp +++ b/src/include/miopen/find_db.hpp @@ -97,7 +97,7 @@ class FindDbRecord_t ? *debug::testing_find_db_path_override() : GetInstalledPath(handle, path_suffix)), db(boost::make_optional>(debug::testing_find_db_enabled && - !IsEnabled(MIOPEN_DEBUG_DISABLE_FIND_DB{}), + !IsEnabled(ENV(MIOPEN_DEBUG_DISABLE_FIND_DB)), DbTimer{installed_path, path})) { if(!db.is_initialized()) @@ -118,7 +118,7 @@ class FindDbRecord_t db(boost::optional>{}) #else db(boost::make_optional>(debug::testing_find_db_enabled && - !IsEnabled(MIOPEN_DEBUG_DISABLE_FIND_DB{}), + !IsEnabled(ENV(MIOPEN_DEBUG_DISABLE_FIND_DB)), DbTimer{path, false})) #endif { diff --git a/src/include/miopen/generic_search.hpp b/src/include/miopen/generic_search.hpp index 4eed74b037..bcb22f8fc2 100644 --- a/src/include/miopen/generic_search.hpp +++ b/src/include/miopen/generic_search.hpp @@ -427,7 +427,7 @@ auto GenericSearch(const Solver s, std::ref(solution_queue)); } - if(!IsEnabled(MIOPEN_DEBUG_COMPILE_ONLY{})) + if(!IsEnabled(ENV(MIOPEN_DEBUG_COMPILE_ONLY))) { size_t n_current = 0; auto threads_remaining = total_threads; diff --git a/src/include/miopen/generic_search_controls.hpp b/src/include/miopen/generic_search_controls.hpp index 0351fdb99e..97092e3981 100644 --- a/src/include/miopen/generic_search_controls.hpp +++ b/src/include/miopen/generic_search_controls.hpp @@ -30,9 +30,6 @@ #include #include -namespace miopen { -namespace solver { - MIOPEN_DECLARE_ENV_VAR(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX, uint64_t, std::numeric_limits::max()) @@ -48,6 +45,3 @@ MIOPEN_DECLARE_ENV_VAR(MIOPEN_COMPILE_PARALLEL_LEVEL, std::thread::hardware_concurrency() / 2) #endif MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) - -} // namespace solver -} // namespace miopen diff --git a/src/include/miopen/solver/ck_utility_common.hpp b/src/include/miopen/solver/ck_utility_common.hpp index 42af5b60ad..9ef01376ea 100644 --- a/src/include/miopen/solver/ck_utility_common.hpp +++ b/src/include/miopen/solver/ck_utility_common.hpp @@ -130,12 +130,14 @@ static inline auto get_ck_common_compiler_flag(const Handle& handle) // sync LDS compiler_flag << " -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=" - << (miopen::IsDisabled(MIOPEN_DEBUG_CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM{}) ? '0' - : '1'); + << (miopen::IsDisabled(ENV(MIOPEN_DEBUG_CK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM)) + ? '0' + : '1'); // buffer addressing compiler_flag << " -DCK_USE_AMD_BUFFER_ADDRESSING=" - << (miopen::IsDisabled(MIOPEN_DEBUG_CK_USE_AMD_BUFFER_ADDRESSING{}) ? '0' : '1'); + << (miopen::IsDisabled(ENV(MIOPEN_DEBUG_CK_USE_AMD_BUFFER_ADDRESSING)) ? '0' + : '1'); return compiler_flag.str(); } diff --git a/src/include/miopen/solver/implicitgemm_util.hpp b/src/include/miopen/solver/implicitgemm_util.hpp index b38803407a..2077332fe8 100644 --- a/src/include/miopen/solver/implicitgemm_util.hpp +++ b/src/include/miopen/solver/implicitgemm_util.hpp @@ -206,7 +206,7 @@ inline static bool NextFlag(bool& v) static inline bool IsXdlopsSupport(const ExecutionContext& ctx) { - if(miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE))) return true; // disable xdlops kernels by default due to possible failures: @@ -215,7 +215,7 @@ static inline bool IsXdlopsSupport(const ExecutionContext& ctx) const bool is_xdlops_supported = StartsWith(ctx.GetStream().GetDeviceName(), "gfx908") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx90a") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx94"); - return is_xdlops_supported && !miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS{}); + return is_xdlops_supported && !miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS)); } ///\todo remove @@ -444,7 +444,7 @@ static inline bool use_amd_inline_asm(const ExecutionContext& ctx, problem.IsFp16()) return false; - return !miopen::IsDisabled(MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM{}); + return !miopen::IsDisabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_NON_XDLOPS_INLINE_ASM)); } static inline bool is_use_amd_buffer_load_store(const ExecutionContext& ctx) @@ -553,7 +553,7 @@ static inline auto get_static_ck_common_compiler_flag(const ExecutionContext& ct // LDS sync compiler_flag += std::string(" -DCK_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM=") + - (miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM{}) + (miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM)) ? '0' : '1'); diff --git a/src/include/miopen/sqlite_db.hpp b/src/include/miopen/sqlite_db.hpp index e10863d502..4a628ed621 100644 --- a/src/include/miopen/sqlite_db.hpp +++ b/src/include/miopen/sqlite_db.hpp @@ -52,6 +52,9 @@ #include #include +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_DISABLE_SQL_WAL) +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_PERFDB_OVERRIDE) + namespace boost { namespace filesystem { class path; @@ -59,8 +62,6 @@ class path; } // namespace boost namespace miopen { -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_DISABLE_SQL_WAL) -MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DEBUG_PERFDB_OVERRIDE) constexpr bool InMemDb = MIOPEN_EMBED_DB; #if MIOPEN_ENABLE_SQLITE_BACKOFF @@ -286,7 +287,7 @@ class SQLiteBase else { dbInvalid = false; - if(!is_system && !miopen::IsEnabled(MIOPEN_DEBUG_DISABLE_SQL_WAL{})) + if(!is_system && !miopen::IsEnabled(ENV(MIOPEN_DEBUG_DISABLE_SQL_WAL))) { auto res = sql.Exec("PRAGMA journal_mode=WAL;"); if(res.empty() || res[0]["journal_mode"] != "wal") @@ -451,7 +452,7 @@ class SQLitePerfDb : public SQLiteBase if(dbInvalid) return boost::none; - const auto& pdb_ovr = miopen::GetStringEnv(MIOPEN_DEBUG_PERFDB_OVERRIDE{}); + const auto& pdb_ovr = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_PERFDB_OVERRIDE)); if(!pdb_ovr.empty()) { MIOPEN_LOG_I2("overriding tuning params with: " << pdb_ovr); diff --git a/src/kernel_cache.cpp b/src/kernel_cache.cpp index 908b007228..88e119bdcd 100644 --- a/src/kernel_cache.cpp +++ b/src/kernel_cache.cpp @@ -126,7 +126,7 @@ Kernel KernelCache::AddKernel(const Handle& h, } Kernel kernel{}; - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(!arch.empty()) { kernel = Kernel{program, kernel_name}; diff --git a/src/logger.cpp b/src/logger.cpp index 413b6e0c5d..0ff18acf45 100644 --- a/src/logger.cpp +++ b/src/logger.cpp @@ -37,8 +37,6 @@ #include /* For SYS_xxx definitions */ #endif -namespace miopen { - /// Enable logging of the most important function calls. /// Name of envvar in a bit inadequate due to historical reasons. MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING) @@ -63,15 +61,17 @@ MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_LOG_LEVEL) /// Enable logging of function calls to ROCTX api. MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_ENABLE_LOGGING_ROCTX) +/// Disable logging quieting. +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE) + +namespace miopen { + namespace debug { bool LoggingQuiet = false; // NOLINT (cppcoreguidelines-avoid-non-const-global-variables) } // namespace debug -/// Disable logging quieting. -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE) - namespace { inline bool operator!=(const int& lhs, const LoggingLevel& rhs) @@ -118,22 +118,22 @@ inline float GetTimeDiff() bool IsLoggingDebugQuiet() { - return debug::LoggingQuiet && !miopen::IsEnabled(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE{}); + return debug::LoggingQuiet && !miopen::IsEnabled(ENV(MIOPEN_DEBUG_LOGGING_QUIETING_DISABLE)); } bool IsLoggingFunctionCalls() { - return miopen::IsEnabled(MIOPEN_ENABLE_LOGGING{}) && !IsLoggingDebugQuiet(); + return miopen::IsEnabled(ENV(MIOPEN_ENABLE_LOGGING)) && !IsLoggingDebugQuiet(); } bool IsLoggingToRoctx() { - return miopen::IsEnabled(MIOPEN_ENABLE_LOGGING_ROCTX{}) && !IsLoggingDebugQuiet(); + return miopen::IsEnabled(ENV(MIOPEN_ENABLE_LOGGING_ROCTX)) && !IsLoggingDebugQuiet(); } bool IsLogging(const LoggingLevel level, const bool disableQuieting) { - auto enabled_level = miopen::Value(MIOPEN_LOG_LEVEL{}); + auto enabled_level = miopen::Value(ENV(MIOPEN_LOG_LEVEL)); if(IsLoggingDebugQuiet() && !disableQuieting) { // Disable all levels higher than fatal. @@ -166,13 +166,13 @@ const char* LoggingLevelToCString(const LoggingLevel level) } bool IsLoggingCmd() { - return miopen::IsEnabled(MIOPEN_ENABLE_LOGGING_CMD{}) && !IsLoggingDebugQuiet(); + return miopen::IsEnabled(ENV(MIOPEN_ENABLE_LOGGING_CMD)) && !IsLoggingDebugQuiet(); } std::string LoggingPrefix() { std::stringstream ss; - if(miopen::IsEnabled(MIOPEN_ENABLE_LOGGING_MPMT{})) + if(miopen::IsEnabled(ENV(MIOPEN_ENABLE_LOGGING_MPMT))) { ss << GetProcessAndThreadId() << ' '; } @@ -182,7 +182,7 @@ std::string LoggingPrefix() #elif MIOPEN_BACKEND_HIP ss << "(HIP)"; #endif - if(miopen::IsEnabled(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME{})) + if(miopen::IsEnabled(ENV(MIOPEN_ENABLE_LOGGING_ELAPSED_TIME))) { ss << std::fixed << std::setprecision(3) << std::setw(8) << GetTimeDiff(); } diff --git a/src/mlo_dir_conv.cpp b/src/mlo_dir_conv.cpp index a205738048..5ad7f5f84c 100644 --- a/src/mlo_dir_conv.cpp +++ b/src/mlo_dir_conv.cpp @@ -256,7 +256,7 @@ FindAllImplicitGemmWorkspaceSizes(const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) { #if WORKAROUND_SWDEV_227826 - if(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS))) return GetImplicitGemmSolvers().GetWorkspaceSizes(ctx, problem); else return GetImplicitGemmSolvers().GetWorkspaceSizes(ctx, problem, 1); @@ -271,7 +271,7 @@ FindAllImplicitGemmSolutions(const miopen::ExecutionContext& ctx, const miopen::AnyInvokeParams& invoke_ctx) { #if WORKAROUND_SWDEV_227826 - if(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS))) return GetImplicitGemmSolvers().SearchForAllSolutions(ctx, problem, GetDb(ctx), invoke_ctx); else return GetImplicitGemmSolvers().SearchForAllSolutions( @@ -309,7 +309,7 @@ FindImplicitGemmWrWWorkspaceSizes(const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) { #if WORKAROUND_SWDEV_227826 - if(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS))) return GetImplicitGemmWrWSolvers().GetWorkspaceSizes(ctx, problem); else return GetImplicitGemmWrWSolvers().GetWorkspaceSizes(ctx, problem, 1); @@ -324,7 +324,7 @@ FindImplicitGemmWrWAllSolutions(const miopen::ExecutionContext& ctx, const miopen::AnyInvokeParams& invoke_ctx) { #if WORKAROUND_SWDEV_227826 - if(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_FIND_ALL_SOLUTIONS))) return GetImplicitGemmWrWSolvers().SearchForAllSolutions( ctx, problem, GetDb(ctx), invoke_ctx); else diff --git a/src/ocl/clhelper.cpp b/src/ocl/clhelper.cpp index a47e1d2203..40fea57b15 100644 --- a/src/ocl/clhelper.cpp +++ b/src/ocl/clhelper.cpp @@ -211,7 +211,7 @@ ClProgramPtr LoadProgram(cl_context ctx, else // OpenCL programs. { ClProgramPtr result{CreateProgram(ctx, source.data(), source.size())}; - if(miopen::IsEnabled(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP))) params += " -Wf,-mwavefrontsize64 -Wf,-mcumode"; #if MIOPEN_BUILD_DEV params += " -Werror"; diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 8dee5a0c8c..0185717712 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -56,8 +56,6 @@ #include -namespace miopen { - MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMMED_FALLBACK) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) @@ -65,6 +63,8 @@ MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DUMP_TENSOR_PATH) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK) +namespace miopen { + static inline bool IsValidFilterChannelNumber(const TensorDescriptor& x, const TensorDescriptor& w, const miopenTensorLayout_t layout, @@ -204,7 +204,7 @@ static inline std::vector FindConvolution(const ExecutionContext& ctx auto sols = conv.GetSolutions(ctx, problem, 1, &fallback); // override the normal find with immed mode with env var if(!sols.empty() && (!(findMode.IsHybrid(ctx) && fallback) || - miopen::IsEnabled(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK{}))) + miopen::IsEnabled(ENV(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK)))) sol = sols.front(); // In Hybrid Find mode, we use Normal Find instead of Immediate fallback kernels. } @@ -230,7 +230,7 @@ static inline std::vector FindConvolution(const ExecutionContext& ctx }); } - if(IsEnabled(MIOPEN_DEBUG_COMPILE_ONLY{})) + if(IsEnabled(ENV(MIOPEN_DEBUG_COMPILE_ONLY))) { MIOPEN_THROW( miopenStatusGpuOperationsSkipped, @@ -391,7 +391,7 @@ static void ConvForwardCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.yDesc, tensors.y); - const auto& file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + const auto& file_name = miopen::GetStringEnv(ENV(MIOPEN_DUMP_TENSOR_PATH)); if(flag && !file_name.empty()) { DumpTensorToFileFromDevice(handle, tensors.xDesc, tensors.x, file_name + "_x.bin"); @@ -591,7 +591,7 @@ ConvolutionDescriptor::GetSolutionsFallback(const ExecutionContext& ctx, const conv::ProblemDescription& problem, const size_t maxSolutionCount) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMMED_FALLBACK{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMMED_FALLBACK))) { MIOPEN_LOG_I("Disabled via environment"); return {}; @@ -609,7 +609,7 @@ ConvolutionDescriptor::GetSolutionsFallback(const ExecutionContext& ctx, // TunaNet Fallback #if MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK - if(!miopen::IsDisabled(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK))) { const static std::string arch = ctx.GetStream().GetDeviceName(); auto solvers = ai::immed_mode::PredictSolver(problem, ctx, arch); @@ -920,7 +920,7 @@ static void ConvBwdCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.dxDesc, tensors.dx); - const auto& file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + const auto& file_name = miopen::GetStringEnv(ENV(MIOPEN_DUMP_TENSOR_PATH)); if(flag && !file_name.empty()) { DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name + "_dy.bin"); @@ -1127,7 +1127,7 @@ static void ConvWrwCheckNumerics(const Handle& handle, flag |= miopen::checkNumericsOutput(handle, tensors.dwDesc, tensors.dw); - const auto& file_name = miopen::GetStringEnv(MIOPEN_DUMP_TENSOR_PATH{}); + const auto& file_name = miopen::GetStringEnv(ENV(MIOPEN_DUMP_TENSOR_PATH)); if(flag && !file_name.empty()) { DumpTensorToFileFromDevice(handle, tensors.dyDesc, tensors.dy, file_name + "_dy.bin"); diff --git a/src/ocl/gcn_asm_utils.cpp b/src/ocl/gcn_asm_utils.cpp index fbf82229ce..23020f2278 100644 --- a/src/ocl/gcn_asm_utils.cpp +++ b/src/ocl/gcn_asm_utils.cpp @@ -70,7 +70,7 @@ static std::string CleanupPath(const char* p); std::string GetGcnAssemblerPathImpl() { - const auto& asm_path_env_p = miopen::GetStringEnv(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH{}); + const auto& asm_path_env_p = miopen::GetStringEnv(ENV(MIOPEN_EXPERIMENTAL_GCN_ASM_PATH)); if(!asm_path_env_p.empty()) { return CleanupPath(asm_path_env_p.c_str()); diff --git a/src/ocl/rnnocl.cpp b/src/ocl/rnnocl.cpp index c7206c67a6..131d69db46 100644 --- a/src/ocl/rnnocl.cpp +++ b/src/ocl/rnnocl.cpp @@ -2500,7 +2500,7 @@ void RNNDescriptor::RNNForwardTrainingPackedTensors( if(rnnMode == miopenLSTM && algoMode == miopenRNNdefault && !use_dropout && nLayers > 1 && dirMode == miopenRNNunidirection && inputMode != miopenRNNskip && - !(miopen::IsDisabled(MIOPEN_RNNFWD_exp{})) && xDesc[0].GetType() == miopenFloat && + !(miopen::IsDisabled(ENV(MIOPEN_RNNFWD_exp))) && xDesc[0].GetType() == miopenFloat && seqLen >= 32) { RNNForwardTraining_MS(handle, diff --git a/src/ocl_kernel.cpp b/src/ocl_kernel.cpp index e34c42f170..da047071aa 100644 --- a/src/ocl_kernel.cpp +++ b/src/ocl_kernel.cpp @@ -58,7 +58,7 @@ void OCLKernelInvoke::run() const MIOPEN_HANDLE_LOCK - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(!arch.empty()) { MIOPEN_THROW("MIOPEN_DEVICE_ARCH used, escaping launching kernel"); diff --git a/src/reducetensor.cpp b/src/reducetensor.cpp index 5b6217a14e..eb1ce895b1 100644 --- a/src/reducetensor.cpp +++ b/src/reducetensor.cpp @@ -546,7 +546,7 @@ std::size_t ReduceTensorDescriptor::GetWorkspaceSize(const Handle& handle, int blockSize; - if(!miopen::IsDisabled(MIOPEN_DEBUG_DYNAMIC_REDUCTION{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_DYNAMIC_REDUCTION))) { const tunable_generic_reduction* tunable = &default_tunable_generic_reduction; blockSize = tunable->BlockSize; @@ -571,7 +571,7 @@ std::size_t ReduceTensorDescriptor::GetWorkspaceSize(const Handle& handle, 64 + sizeof(int); // dynamic reduction use one additional page for storing tensor descriptors - if(!miopen::IsDisabled(MIOPEN_DEBUG_DYNAMIC_REDUCTION{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_DYNAMIC_REDUCTION))) wsSizeInBytes += 4096; return (wsSizeInBytes); @@ -638,7 +638,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, const tunable_generic_reduction* tunable = &default_tunable_generic_reduction; const int blockSize = - !miopen::IsDisabled(MIOPEN_DEBUG_DYNAMIC_REDUCTION{}) ? tunable->BlockSize : 256; + !miopen::IsDisabled(ENV(MIOPEN_DEBUG_DYNAMIC_REDUCTION)) ? tunable->BlockSize : 256; detail::ReductionKernelConfigurator configurator(blockSize, handle.GetWavefrontWidth()); const bool need_indices = @@ -721,7 +721,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, ? static_cast(*reinterpret_cast(beta)) : *reinterpret_cast(beta); - if(miopen::IsDisabled(MIOPEN_DEBUG_DYNAMIC_REDUCTION{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_DYNAMIC_REDUCTION))) { // use static reduction std::vector invariantLengths; std::vector invariantStrides; diff --git a/src/solver.cpp b/src/solver.cpp index 5aefe0486a..66810637b1 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -616,7 +616,7 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) bool ThisSolverIsDeprecatedStatic::IsDisabled(const ExecutionContext& ctx) { static const bool device_is_allowed = [&]() { - if(miopen::IsEnabled(MIOPEN_DEBUG_ENABLE_DEPRECATED_SOLVERS{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_ENABLE_DEPRECATED_SOLVERS))) return true; const auto device = ctx.GetStream().GetTargetProperties().Name(); return device == "gfx803" // Fiji diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index b553e5ead7..36e5cc0cdc 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -191,7 +191,7 @@ bool BnCKBwdBackward::IsApplicable( [[maybe_unused]] const miopen::batchnorm::ProblemDescription& bn_problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_BN_BACK{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_CK_BN_BACK))) return false; if(!bn_problem.IsLayoutNHWC()) return false; diff --git a/src/solver/batchnorm/backward_per_activation_fused.cpp b/src/solver/batchnorm/backward_per_activation_fused.cpp index 32f66eb94c..e2e1a98e9a 100644 --- a/src/solver/batchnorm/backward_per_activation_fused.cpp +++ b/src/solver/batchnorm/backward_per_activation_fused.cpp @@ -47,7 +47,7 @@ bool BnBwdTrgActivationFused::IsApplicable(const FusionContext& /*context*/, const auto& desc = *problem.fusion_plan_desc; if(desc.op_map.empty()) MIOPEN_THROW(""); - if(miopen::IsDisabled(MIOPEN_DEBUG_BN_BWDTRG_ACTIV_FUSED{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_BN_BWDTRG_ACTIV_FUSED))) return false; if(desc.op_map.size() != 2) return false; diff --git a/src/solver/batchnorm/forward_inference_ck.cpp b/src/solver/batchnorm/forward_inference_ck.cpp index b9afd73ed5..a15e60dac2 100644 --- a/src/solver/batchnorm/forward_inference_ck.cpp +++ b/src/solver/batchnorm/forward_inference_ck.cpp @@ -180,7 +180,7 @@ bool BnCKFwdInference::IsApplicable( [[maybe_unused]] const miopen::batchnorm::ProblemDescription& bn_problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_BN_INFER{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_CK_BN_INFER))) return false; if(!bn_problem.IsLayoutNHWC()) return false; diff --git a/src/solver/batchnorm/forward_inference_fused.cpp b/src/solver/batchnorm/forward_inference_fused.cpp index a057e1995a..57a49049e2 100644 --- a/src/solver/batchnorm/forward_inference_fused.cpp +++ b/src/solver/batchnorm/forward_inference_fused.cpp @@ -47,7 +47,7 @@ bool BnFwdInferActivationFused::IsApplicable(const FusionContext& /*context*/, const auto& desc = *problem.fusion_plan_desc; if(desc.op_map.empty()) MIOPEN_THROW(""); - if(miopen::IsDisabled(MIOPEN_DEBUG_BN_FWDINFER_ACTIV_FUSED{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_BN_FWDINFER_ACTIV_FUSED))) return false; if(desc.op_map.size() != 2) return false; diff --git a/src/solver/batchnorm/forward_per_activation_fused.cpp b/src/solver/batchnorm/forward_per_activation_fused.cpp index 189315b913..9b5c3f97ca 100644 --- a/src/solver/batchnorm/forward_per_activation_fused.cpp +++ b/src/solver/batchnorm/forward_per_activation_fused.cpp @@ -46,7 +46,7 @@ bool BnFwdTrgActivationFused::IsApplicable(const FusionContext& /*context*/, const auto& desc = *problem.fusion_plan_desc; if(desc.op_map.empty()) MIOPEN_THROW(""); - if(miopen::IsDisabled(MIOPEN_DEBUG_BN_FWDTRG_ACTIV_FUSED{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_BN_FWDTRG_ACTIV_FUSED))) return false; if(desc.op_map.size() != 2) return false; diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index 09fe5adf21..f63a65ae16 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -183,7 +183,7 @@ bool BnCKFwdTraining::IsApplicable( [[maybe_unused]] const miopen::batchnorm::ProblemDescription& bn_problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_BN_FWD_TRAINING{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_CK_BN_FWD_TRAINING))) return false; if(!bn_problem.IsLayoutNHWC()) return false; diff --git a/src/solver/conv_MP_bidirectional_winograd.cpp b/src/solver/conv_MP_bidirectional_winograd.cpp index d979dce041..dd1bdb1975 100644 --- a/src/solver/conv_MP_bidirectional_winograd.cpp +++ b/src/solver/conv_MP_bidirectional_winograd.cpp @@ -57,12 +57,6 @@ #define IS_DISABLED(expr) miopen::IsDisabled(expr) #endif -namespace miopen { -namespace solver { -namespace conv { - -using ProblemDescription = miopen::conv::ProblemDescription; - MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3) @@ -81,6 +75,12 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TR MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) +namespace miopen { +namespace solver { +namespace conv { + +using ProblemDescription = miopen::conv::ProblemDescription; + // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. // These names represent shader parameters, e.g. shader C is batch_size etc and useful for // programming. @@ -160,7 +160,7 @@ static bool IsApplicableGEMM(const ProblemDescription& problem) #if(MIOPEN_BACKEND_HIP && MIOPEN_USE_ROCBLAS) const miopenDataType_t transform_data_type = - miopen::IsEnabled(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM{}) + miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM)) ? problem.GetInDataType() : miopenFloat; @@ -207,7 +207,7 @@ static bool IsApplicableTransform(const ExecutionContext& ctx, const ProblemDesc #endif { - std::size_t limit = miopen::Value(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX{}); + std::size_t limit = miopen::Value(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_WORKSPACE_MAX)); #if WORKAROUND_SWDEV_203031 if(limit == 0) { @@ -246,7 +246,7 @@ static bool IsApplicableTransform(const ExecutionContext& ctx, const ProblemDesc DEFINE_SHADER_ALIASES(problem) { const miopenDataType_t transform_data_type = - miopen::IsEnabled(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM{}) + miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM)) ? problem.GetInDataType() : miopenFloat; @@ -341,27 +341,27 @@ bool ConvMPBidirectWinograd::IsA if(wino_data_tile == 6 && wino_filter_tile == 3) { - if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3{})) + if(IS_DISABLED(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F6X3))) return false; } if(wino_data_tile == 5 && wino_filter_tile == 3) { - if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3{})) + if(IS_DISABLED(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F5X3))) return false; } if(wino_data_tile == 4 && wino_filter_tile == 3) { - if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3{})) + if(IS_DISABLED(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F4X3))) return false; } if(wino_data_tile == 3 && wino_filter_tile == 3) { - if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3{})) + if(IS_DISABLED(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F3X3))) return false; } if(wino_data_tile == 2 && wino_filter_tile == 3) { - if(IS_DISABLED(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3{})) + if(IS_DISABLED(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_F2X3))) return false; } @@ -373,7 +373,7 @@ size_t ConvMPBidirectWinograd::G const ExecutionContext&, const ProblemDescription& problem) const { const miopenDataType_t transform_data_type = - miopen::IsEnabled(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM{}) + miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM)) ? problem.GetInDataType() : miopenFloat; @@ -428,7 +428,7 @@ static InvokerFactory MakeWinogradInvokerFactory(const ExecutionContext& ctx, GetTypeSize(problem.GetWeightsDataType())); const miopenDataType_t transform_data_type = - miopen::IsEnabled(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM{}) + miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM)) ? problem.GetInDataType() : miopenFloat; auto wino_in = GetWinoBuffer( @@ -664,7 +664,7 @@ ConvSolution ConvMPBidirectWinograd g_wk{g_wk_0, 1, 1}; const miopenDataType_t transform_data_type = - miopen::IsEnabled(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM{}) + miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_MP_BD_WINOGRAD_EXPEREMENTAL_FP16_TRANSFORM)) ? problem.GetInDataType() : miopenFloat; std::ostringstream options_in; @@ -756,7 +756,7 @@ ProblemDescription ConvMPBidirectWinograd_xdlops @@ -866,27 +866,27 @@ bool ConvMPBidirectWinograd_xdlops(read_size)) break; - if(!miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED))) { /// Narrow search space in optimized mode. if(use_spare_set ? !Next_1_4(k_mult) : !NextTwoPower<8, 32>(k_mult)) @@ -205,7 +205,7 @@ bool PerformanceConfigConvAsm1x1U::SetNextValue(const ProblemDescription&) PerformanceConfigConvAsm1x1U::PerformanceConfigConvAsm1x1U(bool spare) : PerformanceConfigConvAsm1x1U(1, 1, 1, 1, 1, 1, 1, 1, spare) { - if(!miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_SEARCH_OPTIMIZED))) { k_mult = spare ? 1 : 8; chunk_size = spare ? 1 : 16; @@ -391,7 +391,7 @@ bool PerformanceConfigConvAsm1x1U::ModelApplyToken(int index, static bool IsModelApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_AI_HEUR))) return false; if(ctx.GetStream().GetDeviceName() != "gfx908") return false; @@ -522,7 +522,7 @@ bool ConvAsm1x1U::IsValidPerformanceConfig(const ExecutionContext&, bool ConvAsm1x1U::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -827,7 +827,7 @@ ConvSolution ConvAsm1x1U::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm1x1U fromEnv; { - const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1U_PERF_VALS)); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) diff --git a/src/solver/conv_asm_1x1u_bias_activ_fused.cpp b/src/solver/conv_asm_1x1u_bias_activ_fused.cpp index b3f0b0f77b..4532b7ffa0 100644 --- a/src/solver/conv_asm_1x1u_bias_activ_fused.cpp +++ b/src/solver/conv_asm_1x1u_bias_activ_fused.cpp @@ -222,7 +222,7 @@ bool ConvBiasActivAsm1x1U::IsApplicable(const FusionContext& context, { MIOPEN_THROW(""); } - if(miopen::IsDisabled(MIOPEN_DEBUG_GCN_ASM_KERNELS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_GCN_ASM_KERNELS))) return false; // check the sequence of prims if(desc.op_map.size() > 3) diff --git a/src/solver/conv_asm_1x1u_stride2.cpp b/src/solver/conv_asm_1x1u_stride2.cpp index e87ac91334..605d638264 100644 --- a/src/solver/conv_asm_1x1u_stride2.cpp +++ b/src/solver/conv_asm_1x1u_stride2.cpp @@ -202,7 +202,7 @@ bool PerformanceConfigConvAsm1x1UV2::SetNextValue(const ProblemDescription&) // Increment with wrap-around: do { - if(!miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED))) { if(!IncPack<16, 32, 64>(chunk_size)) break; @@ -263,7 +263,7 @@ bool PerformanceConfigConvAsm1x1UV2::SetNextValue(const ProblemDescription&) PerformanceConfigConvAsm1x1UV2::PerformanceConfigConvAsm1x1UV2(bool spare) : PerformanceConfigConvAsm1x1UV2(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, spare) { - if(!miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED{})) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_SEARCH_OPTIMIZED))) { k_mult = spare ? 1 : 8; chunk_size = 16; @@ -482,7 +482,7 @@ bool ConvAsm1x1UV2::IsValidPerformanceConfig(const ExecutionContext&, bool ConvAsm1x1UV2::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -609,7 +609,7 @@ ConvSolution ConvAsm1x1UV2::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm1x1UV2 fromEnv; { - const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_1X1UV2_PERF_VALS)); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValidValue()) diff --git a/src/solver/conv_asm_3x3u.cpp b/src/solver/conv_asm_3x3u.cpp index 081c828aa1..5c1f6a9045 100644 --- a/src/solver/conv_asm_3x3u.cpp +++ b/src/solver/conv_asm_3x3u.cpp @@ -171,7 +171,7 @@ bool ConvAsm3x3U::IsValidPerformanceConfig(const ExecutionContext&, bool ConvAsm3x3U::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -254,7 +254,7 @@ ConvSolution ConvAsm3x3U::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsm3x3U fromEnv; { - const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_3X3U_PERF_VALS)); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(problem)) diff --git a/src/solver/conv_asm_5x10u2v2b1.cpp b/src/solver/conv_asm_5x10u2v2b1.cpp index 6c64ea1cf4..7f85c3d341 100644 --- a/src/solver/conv_asm_5x10u2v2b1.cpp +++ b/src/solver/conv_asm_5x10u2v2b1.cpp @@ -42,7 +42,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvAsm5x10u2v2b1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2))) return false; if(!ctx.use_asm_kernels) return false; diff --git a/src/solver/conv_asm_5x10u2v2f1.cpp b/src/solver/conv_asm_5x10u2v2f1.cpp index 10ea56aa86..e500fa3648 100644 --- a/src/solver/conv_asm_5x10u2v2f1.cpp +++ b/src/solver/conv_asm_5x10u2v2f1.cpp @@ -43,7 +43,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvAsm5x10u2v2f1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_5X10U2V2))) return false; if(!ctx.use_asm_kernels) return false; diff --git a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp index 25adbed185..019ac834e9 100644 --- a/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp +++ b/src/solver/conv_asm_7x7c3h224w224k64u2v2p3q3f1.cpp @@ -43,7 +43,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvAsm7x7c3h224w224k64u2v2p3q3f1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_7X7C3H224W224))) return false; if(!ctx.use_asm_kernels) return false; diff --git a/src/solver/conv_asm_dir_BwdWrW1x1.cpp b/src/solver/conv_asm_dir_BwdWrW1x1.cpp index 41330674bc..0e8d66eb9d 100644 --- a/src/solver/conv_asm_dir_BwdWrW1x1.cpp +++ b/src/solver/conv_asm_dir_BwdWrW1x1.cpp @@ -149,7 +149,7 @@ bool PerformanceConfigConvAsmBwdWrW1x1::SetNextValue(const ProblemDescription&) { // Increment with wrap-around: // select fast or full method - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_SEARCH_OPTIMIZED))) { do { @@ -475,7 +475,7 @@ bool ConvAsmBwdWrW1x1::IsValidPerformanceConfig( bool ConvAsmBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -744,7 +744,7 @@ ConvSolution ConvAsmBwdWrW1x1::GetSolution(const ExecutionContext& ctx, PerformanceConfigConvAsmBwdWrW1x1 fromEnv; { - const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW1X1_PERF_VALS)); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) diff --git a/src/solver/conv_asm_dir_BwdWrW3x3.cpp b/src/solver/conv_asm_dir_BwdWrW3x3.cpp index 17831f5ddd..d1d0ecc867 100644 --- a/src/solver/conv_asm_dir_BwdWrW3x3.cpp +++ b/src/solver/conv_asm_dir_BwdWrW3x3.cpp @@ -71,7 +71,7 @@ bool PerformanceConfigAsmDirect3x3WrW::SetNextValue(const ProblemDescription&) do { #if MIOPEN_GCN_ASM_DIRECT_3X3WRW_SEARCH_LWC_FIXED == 0 - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_SEARCH_OPTIMIZED))) { // (0 <= limit_wave_cnt && limit_wave_cnt <= 9) if(++limit_wave_cnt <= 9) @@ -387,7 +387,7 @@ bool ConvAsmBwdWrW3x3::IsValidPerformanceConfig( bool ConvAsmBwdWrW3x3::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -425,7 +425,7 @@ bool ConvAsmBwdWrW3x3::IsApplicable(const ExecutionContext& ctx, return false; #if WORKAROUND_SWDEV_330460 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3{}) && name == "gfx90a" && + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3)) && name == "gfx90a" && problem.IsFp32()) return false; #endif @@ -506,7 +506,7 @@ ConvSolution ConvAsmBwdWrW3x3::GetSolution(const ExecutionContext& ctx, PerformanceConfigAsmDirect3x3WrW fromEnv; { - const auto& s = miopen::GetStringEnv(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS{}); + const auto& s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_CONV_DIRECT_ASM_WRW3X3_PERF_VALS)); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsValid(ctx, problem)) diff --git a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp index 0e67743f6a..d8f42ee47f 100644 --- a/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_bwd_v4r1_dynamic.cpp @@ -133,7 +133,7 @@ static inline bool FindImplicitGemmDynamicKernelBwd(const ProblemDescription& pr bool ConvAsmImplicitGemmV4R1DynamicBwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_V4R1))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp index 8b51de6057..9ab08bdf59 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd.cpp @@ -979,7 +979,7 @@ FindImplicitGemmGtcDynamicBwdKernel(const ProblemDescription& problem) bool ConvAsmImplicitGemmGTCDynamicBwdXdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp index 403039b5a0..e142437df5 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_bwd_nhwc.cpp @@ -733,7 +733,7 @@ void PerformanceConfigAsmImplicitGemmGTCBwdXdlopsNHWC::HeuristicInit( if(need_k_split) { if(miopen::IsDisabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16))) { if(problem.IsFp16() && gks > 0) vector_store = 1; @@ -809,7 +809,7 @@ bool PerformanceConfigAsmImplicitGemmGTCBwdXdlopsNHWC::IsValid( return false; } - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16))) { if(problem.IsFp16() && gemm_k_global_split != 0 && vector_store != 1) return false; @@ -928,7 +928,7 @@ ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC::Search(const ExecutionContext& ctx, bool ConvAsmImplicitGemmGTCDynamicBwdXdlopsNHWC::IsApplicable( const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS_NHWC{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_BWD_GTC_XDLOPS_NHWC))) return false; if(problem.GetConv().attribute.deterministic) diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp index f971452fed..7836390a51 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd.cpp @@ -1504,7 +1504,7 @@ FindImplicitGemmGtcDynamicFwdKernel(const ProblemDescription& problem) bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); @@ -1542,7 +1542,7 @@ bool ConvAsmImplicitGemmGTCDynamicFwdXdlops::IsApplicable(const ExecutionContext if((problem.GetWeightsHeight_() == 1) && (problem.GetWeightsWidth_() == 1) && (problem.GetInChannels_() % 8 != 0)) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS))) return false; } #endif diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp index e1c14a520e..a1a3b4130a 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nchwc.cpp @@ -550,7 +550,7 @@ ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC::Search(const ExecutionContext& ctx, bool ConvAsmImplicitGemmGTCDynamicFwdDlopsNCHWC::IsApplicable( const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_DLOPS_NCHWC{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_DLOPS_NCHWC))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); diff --git a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp index dd133a87f0..d8fdcb462d 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_fwd_nhwc.cpp @@ -605,7 +605,7 @@ void PerformanceConfigAsmImplicitGemmGTCFwdXdlopsNHWC::HeuristicInit( if(need_k_split) { if(miopen::IsDisabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16))) { if(problem.IsFp16() && gks > 0) vector_store = 1; @@ -681,7 +681,7 @@ bool PerformanceConfigAsmImplicitGemmGTCFwdXdlopsNHWC::IsValid( return false; } - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16))) { if(problem.IsFp16() && gemm_k_global_split != 0 && vector_store != 1) return false; @@ -864,7 +864,7 @@ size_t ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC::GetWorkspaceSize( bool ConvAsmImplicitGemmGTCDynamicFwdXdlopsNHWC::IsApplicable( const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS_NHWC{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_GTC_XDLOPS_NHWC))) return false; if(problem.GetConv().attribute.deterministic) diff --git a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp index 48a69a53a7..0322fce750 100644 --- a/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp +++ b/src/solver/conv_asm_implicit_gemm_gtc_wrw_nhwc.cpp @@ -446,7 +446,7 @@ void PerformanceConfigAsmImplicitGemmGTCWrwXdlopsNHWC::SetParamsForKSplit( if(problem.IsFp16()) { if(tensor_b_thread_lengths[3] == 1 || - miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16))) vector_store = 1; } else if(problem.IsBfp16() && tensor_b_thread_lengths[3] == 1) @@ -765,7 +765,7 @@ bool PerformanceConfigAsmImplicitGemmGTCWrwXdlopsNHWC::IsValid( return false; } - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_PK_ATOMIC_ADD_FP16))) { if(problem.IsFp16() && tensor_b_thread_lengths[3] != 1 && gemm_k_global_split != 0 && vector_store != 1) @@ -854,7 +854,7 @@ ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::Search(const ExecutionContext& ctx, bool ConvAsmImplicitGemmGTCDynamicWrwXdlopsNHWC::IsApplicable( const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS_NHWC{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS_NHWC))) return false; if(problem.GetConv().attribute.deterministic) diff --git a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp index cab71d5d1c..817b79391b 100644 --- a/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_v4r1_dynamic.cpp @@ -283,7 +283,7 @@ bool TunableImplicitGemmV4R1Dynamic::IsValid(const ExecutionContext& ctx, bool ConvAsmImplicitGemmV4R1DynamicFwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); @@ -329,7 +329,7 @@ bool ConvAsmImplicitGemmV4R1DynamicFwd::IsApplicable(const ExecutionContext& ctx bool ConvAsmImplicitGemmV4R1DynamicFwd_1x1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_FWD_V4R1_1X1))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); diff --git a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp index 855129012d..1f0dbde947 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_gtc_dynamic_xdlops.cpp @@ -821,7 +821,7 @@ ConvAsmImplicitGemmGTCDynamicWrwXdlops::GetWorkspaceSize(const ExecutionContext& bool ConvAsmImplicitGemmGTCDynamicWrwXdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_GTC_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) diff --git a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp index 2a95a0d5f0..6ebd7fef96 100644 --- a/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp +++ b/src/solver/conv_asm_implicit_gemm_wrw_v4r1_dynamic.cpp @@ -298,7 +298,7 @@ static int GetGemmkGroups(const ProblemDescription& problem) bool ConvAsmImplicitGemmV4R1DynamicWrw::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_ASM_WRW_V4R1))) return false; const auto device_name = ctx.GetStream().GetDeviceName(); diff --git a/src/solver/conv_bin_wino3x3U.cpp b/src/solver/conv_bin_wino3x3U.cpp index 50d4f7d1ab..f28d98f2ae 100644 --- a/src/solver/conv_bin_wino3x3U.cpp +++ b/src/solver/conv_bin_wino3x3U.cpp @@ -46,7 +46,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvBinWinograd3x3U::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_3X3{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_3X3))) return false; if(!problem.Is2d()) return false; diff --git a/src/solver/conv_bin_winoRxS.cpp b/src/solver/conv_bin_winoRxS.cpp index f9f83ea38b..a4dcbd3d1e 100644 --- a/src/solver/conv_bin_winoRxS.cpp +++ b/src/solver/conv_bin_winoRxS.cpp @@ -227,11 +227,11 @@ bool ConvBinWinogradRxS::IsApplicable(const ExecutionContext& ctx, return false; if(problem.IsTensorsCasted()) return false; - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS))) return false; if(problem.IsDirectionBackwardWrW()) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_WRW))) return false; if(!(problem.IsFp32() && problem.GetKernelStrideW() == 1 && problem.GetKernelStrideH() == 1)) @@ -239,7 +239,7 @@ bool ConvBinWinogradRxS::IsApplicable(const ExecutionContext& ctx, } else { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_FWD_BWD))) return false; } if(!ctx.use_asm_kernels) @@ -345,7 +345,7 @@ ConvSolution ConvBinWinogradRxS::GetSolution(const ExecutionContext& ctx, { kernel.kernel_name = "miopenSp3AsmConvRxSU"; kernel.kernel_file = "Conv_Winograd_"; - if(miopen::IsEnabled(MIOPEN_DEBUG_SRAM_EDC_DISABLED{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_SRAM_EDC_DISABLED))) kernel.kernel_file += "v13_3_12"; else kernel.kernel_file += "v14_3_3"; diff --git a/src/solver/conv_bin_winoRxS_fused.cpp b/src/solver/conv_bin_winoRxS_fused.cpp index b0baa3dee3..24ba8820ac 100644 --- a/src/solver/conv_bin_winoRxS_fused.cpp +++ b/src/solver/conv_bin_winoRxS_fused.cpp @@ -58,9 +58,9 @@ namespace fusion { bool ConvBinWinogradRxSFused::IsApplicable(const FusionContext& context, const FusionDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_FUSED_WINOGRAD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_FUSED_WINOGRAD))) return false; - if(miopen::IsDisabled(MIOPEN_DEBUG_GCN_ASM_KERNELS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_GCN_ASM_KERNELS))) return false; if(!WinoCommonIsApplicable(context, problem)) return false; diff --git a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp index 4658b9d7cf..ecb413e7ff 100644 --- a/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp +++ b/src/solver/conv_ck_igemm_fwd_bias_activ_fused.cpp @@ -404,7 +404,7 @@ bool ConvCKIgemmFwdBiasActivFused::IsApplicable(const FusionContext& ctx, { MIOPEN_THROW(miopenStatusInternalError, "desc.op_map.empty()"); } - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_BIAS_ACTIV{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_BIAS_ACTIV))) return false; // check the sequence of prims if(desc.op_map.size() != 3) diff --git a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp index 43c02c3bf2..5fc93147aa 100644 --- a/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp +++ b/src/solver/conv_ck_igemm_fwd_v6r1_dlops_nchw.cpp @@ -90,9 +90,9 @@ bool ConvCkIgemmFwdV6r1DlopsNchw::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_411729 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW))) #else - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_CK_IGEMM_FWD_V6R1_DLOPS_NCHW))) #endif { return false; diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index 97f8ef2547..467184b35b 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -34,6 +34,8 @@ #include #include +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS); + namespace miopen { namespace debug { @@ -119,8 +121,6 @@ bool IsOutputInt32(const ProblemDescription& problem) problem.GetOutDataType() == miopenInt32; } -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS); - std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) { std::ostringstream kernel_name; diff --git a/src/solver/conv_direct_naive_conv_bwd.cpp b/src/solver/conv_direct_naive_conv_bwd.cpp index 9922d916ba..40e1002f52 100644 --- a/src/solver/conv_direct_naive_conv_bwd.cpp +++ b/src/solver/conv_direct_naive_conv_bwd.cpp @@ -41,7 +41,7 @@ bool ConvDirectNaiveConvBwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { if(!miopen::debug::AlwaysEnableConvDirectNaive && - miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD{})) + miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_BWD))) return false; if(!ConvDirectNaiveConvIsApplicableByKernelType(ctx, problem)) diff --git a/src/solver/conv_direct_naive_conv_fwd.cpp b/src/solver/conv_direct_naive_conv_fwd.cpp index d9fd9ffbc2..59128d2e32 100644 --- a/src/solver/conv_direct_naive_conv_fwd.cpp +++ b/src/solver/conv_direct_naive_conv_fwd.cpp @@ -40,7 +40,7 @@ bool ConvDirectNaiveConvFwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { if(!miopen::debug::AlwaysEnableConvDirectNaive && - miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD{})) + miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_FWD))) return false; if(!ConvDirectNaiveConvIsApplicableByKernelType(ctx, problem)) diff --git a/src/solver/conv_direct_naive_conv_wrw.cpp b/src/solver/conv_direct_naive_conv_wrw.cpp index 4b6e7fafe0..6edea56e49 100644 --- a/src/solver/conv_direct_naive_conv_wrw.cpp +++ b/src/solver/conv_direct_naive_conv_wrw.cpp @@ -41,7 +41,7 @@ bool ConvDirectNaiveConvWrw::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { if(!miopen::debug::AlwaysEnableConvDirectNaive && - miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW{})) + miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_CONV_WRW))) return false; if(!ConvDirectNaiveConvIsApplicableByKernelType(ctx, problem)) diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index c58d0000fb..1a04669b33 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -307,9 +307,9 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( [[maybe_unused]] const ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS))) return false; - if(miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC))) return false; if(problem.HasMixedDataTypes()) return false; diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 64f7b8eedc..ded1f3bdff 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -308,7 +308,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( [[maybe_unused]] const ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 4f48c4913b..543bb45592 100644 --- a/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -303,9 +303,9 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( [[maybe_unused]] const ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS))) return false; - if(miopen::IsEnabled(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC))) return false; if(problem.HasMixedDataTypes()) return false; diff --git a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp index 91c7fc0855..708beb4a18 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_data_xdlops.cpp @@ -259,7 +259,7 @@ bool ConvHipImplicitGemmBwdXdlops::IsApplicable( [[maybe_unused]] const ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp index 8c4aa5ea91..f4b49887fc 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1.cpp @@ -635,7 +635,7 @@ size_t ConvHipImplicitGemmBwdDataV1R1::GetWorkspaceSize(const ExecutionContext&, bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -666,7 +666,7 @@ bool ConvHipImplicitGemmBwdDataV1R1::IsApplicable(const ExecutionContext& ctx, #if WORKAROUND_ISSUE_309 if(problem.IsBfp16()) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1))) return false; } #endif diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp index b9f16d4afb..c7a908ef47 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v1r1_xdlops.cpp @@ -759,10 +759,10 @@ bool ConvHipImplicitGemmBwdDataV1R1Xdlops::IsApplicable(const ExecutionContext& const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_251757 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS))) return false; #endif - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V1R1_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) @@ -927,8 +927,8 @@ ConvSolution ConvHipImplicitGemmBwdDataV1R1Xdlops::GetSolution( std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_N=") + std::to_string(GemmBBlockCopySrcDataPerRead_GemmN) + std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_KPACK=") + std::to_string(GemmBBlockCopyDstDataPerWrite_GemmKPack) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp index 7ddfe5cbb8..b950c9e1ee 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1.cpp @@ -731,13 +731,13 @@ bool ConvHipImplicitGemmBwdDataV4R1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if WORKAROUND_SWDEV_229277_227616_229195 - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1))) return false; #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp index 15ffdf7a86..4f4c362bf3 100644 --- a/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_bwd_v4r1_xdlops.cpp @@ -816,18 +816,18 @@ bool ConvHipImplicitGemmBwdDataV4R1Xdlops::IsApplicable(const ExecutionContext& #if WORKAROUND_ISSUE_1206 if(problem.IsFp32()) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS))) return false; } #endif #if WORKAROUND_SWDEV_329642 if(problem.IsBfp16() && ctx.GetStream().GetDeviceName() == "gfx90a") { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS))) return false; } #endif - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -912,8 +912,8 @@ ConvSolution ConvHipImplicitGemmBwdDataV4R1Xdlops::GetSolution( PerformanceImplicitGemmBwdDataV4R1Xdlops fromEnv; { - const auto& s = - miopen::GetStringEnv(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS{}); + const auto& s = miopen::GetStringEnv( + ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_BWD_V4R1_XDLOPS_PERF_VALS)); if(!s.empty()) // else nothing to parse. { if(!fromEnv.Deserialize(s) || !fromEnv.IsReallyValid(problem)) @@ -1047,8 +1047,8 @@ ConvSolution ConvHipImplicitGemmBwdDataV4R1Xdlops::GetSolution( std::string(" -DCK_PARAM_TUNABLE_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_N=") + std::to_string(GemmBBlockCopySrcDataPerRead_GemmN) + std::string(" -DCK_PARAM_DEPENDENT_GRID_SIZE=") + std::to_string(grid_size) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + std::string(" -DCK_PARAM_GEMM_ID=") + std::to_string(gemm_id) + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp index 61b4d62f82..5f2db21f17 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r1.cpp @@ -46,7 +46,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R1))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; @@ -94,7 +94,7 @@ bool ConvHipImplicitGemmV4R1Fwd::IsApplicable(const ExecutionContext& ctx, bool ConvHipImplicitGemmV4R1WrW::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R1))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp index cacba33291..639fc473a3 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4.cpp @@ -578,7 +578,7 @@ ConvHipImplicitGemmV4R4Fwd::CalculateGemmSize(const ProblemDescription& problem) bool ConvHipImplicitGemmV4R4Fwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp index 63d702990e..020193b04e 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops.cpp @@ -97,8 +97,8 @@ bool PerformanceImplicitGemmForwardV4R4Xdlops::SetNextValue(const ProblemDescrip { // list performance parameters in reverse order, in order for tuning to iterate over the // range in normal order - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(!NextTwoPower<1, 8>(GemmBThreadDataPerRead_GemmN)) break; @@ -458,8 +458,8 @@ PerformanceImplicitGemmForwardV4R4Xdlops::CalculateGemmBBlockCopyPerformancePara // calculate threadwise copy size auto data_per_thread_copy = std::max(1, (GemmKPerBlock * GemmNPerBlock * GemmKPack) / block_size); - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(problem.IsFp16()) { @@ -778,8 +778,8 @@ bool PerformanceImplicitGemmForwardV4R4Xdlops::IsFastToBeUsedForTuning( // DstDataPerWrite_GemmKPack should not be too small, otherwise too many ds_write instruction // would cause bad performance { - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(problem.IsFp16()) { @@ -958,8 +958,8 @@ ConvSolution ConvHipImplicitGemmForwardV4R4Xdlops::GetSolution( std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_N=") + std::to_string(GemmBBlockCopySrcDataPerRead_GemmN) + std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_KPACK=") + std::to_string(GemmBBlockCopyDstDataPerWrite_GemmKPack) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; // clang-format on @@ -972,7 +972,7 @@ ConvSolution ConvHipImplicitGemmForwardV4R4Xdlops::GetSolution( bool ConvHipImplicitGemmForwardV4R4Xdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp index d8f8721948..7222aca80c 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r4_xdlops_padded_gemm.cpp @@ -118,8 +118,8 @@ bool PerformanceImplicitGemmForwardV4R4Xdlops_Padded_Gemm::SetNextValue(const Pr { // List performance parameters in reverse order, in order for tuning to iterate over the // range in normal order. - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(!NextTwoPower<1, 8>(GemmBThreadDataPerRead_GemmN)) break; @@ -500,8 +500,8 @@ PerformanceImplicitGemmForwardV4R4Xdlops_Padded_Gemm::CalculateGemmBBlockCopyPer // calculate threadwise copy size auto data_per_thread_copy = std::max(1, (GemmKPerBlock * GemmNPerBlock * GemmKPack) / block_size); - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(problem.IsFp16()) { @@ -819,8 +819,8 @@ bool PerformanceImplicitGemmForwardV4R4Xdlops_Padded_Gemm::IsFastToBeUsedForTuni // DstDataPerWrite_GemmKPack should not be too small, otherwise too many ds_write instruction // would cause bad performance. { - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R4_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(problem.IsFp16()) { @@ -1026,8 +1026,8 @@ ConvSolution ConvHipImplicitGemmForwardV4R4Xdlops_Padded_Gemm::GetSolution( std::string(" -DCK_GEMM_N_PAD=") + std::to_string(gemm_n_extra) + std::string(" -DCK_GEMM_K_PAD=") + std::to_string(gemm_k_extra) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; // clang-format on @@ -1040,7 +1040,7 @@ ConvSolution ConvHipImplicitGemmForwardV4R4Xdlops_Padded_Gemm::GetSolution( bool ConvHipImplicitGemmForwardV4R4Xdlops_Padded_Gemm::IsApplicable( const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R4_PADDED_GEMM_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp index c07e1a5fb2..3fb66c6962 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_v4r5_xdlops.cpp @@ -128,8 +128,8 @@ bool PerformanceImplicitGemmForwardV4R5Xdlops::SetNextValue(const ProblemDescrip { // list performance parameters in reverse order, in order for tuning to iterate over the // range in normal order - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(!NextTwoPower<1, 8>(GemmBThreadDataPerRead_GemmN)) break; @@ -494,8 +494,8 @@ PerformanceImplicitGemmForwardV4R5Xdlops::CalculateGemmBBlockCopyPerformancePara // calculate threadwise copy size auto data_per_thread_copy = std::max(1, (GemmKPerBlock * NWaves * BPerBlock * GemmKPack) / block_size); - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(problem.IsFp16()) { @@ -826,8 +826,8 @@ bool PerformanceImplicitGemmForwardV4R5Xdlops::IsFastToBeUsedForTuning( // DstDataPerWrite_GemmKPack should not be too small, otherwise too many ds_write instruction // would cause bad performance { - if(miopen::IsEnabled( - MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM{})) + if(miopen::IsEnabled(ENV( + MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_FWD_V4R5_XDLOPS_ADD_VECTOR_LOAD_GEMMN_TUNE_PARAM))) { if(problem.IsFp16()) { @@ -988,8 +988,8 @@ ConvSolution ConvHipImplicitGemmForwardV4R5Xdlops::GetSolution( std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_N=") + std::to_string(GemmBBlockCopySrcDataPerRead_GemmN) + std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_KPACK=") + std::to_string(GemmBBlockCopyDstDataPerWrite_GemmKPack) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; // clang-format on @@ -1002,7 +1002,7 @@ ConvSolution ConvHipImplicitGemmForwardV4R5Xdlops::GetSolution( bool ConvHipImplicitGemmForwardV4R5Xdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_V4R5_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp index 3ad6e80617..727d300789 100644 --- a/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_fwd_xdlops.cpp @@ -260,7 +260,7 @@ bool ConvHipImplicitGemmFwdXdlops::IsApplicable( [[maybe_unused]] const ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp index 0fddd42a29..e94e8af8d4 100644 --- a/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_grouped_fwd_xdlops.cpp @@ -288,7 +288,7 @@ bool ConvHipImplicitGemmGroupFwdXdlops::IsApplicable( [[maybe_unused]] const ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_GROUP_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS))) return false; if(problem.HasNonPackedTensors()) return false; diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp index 8918137b73..a87a58a091 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4.cpp @@ -581,7 +581,7 @@ ConvHipImplicitGemmV4R4WrW::CalculateGemmSize(const ProblemDescription& problem) bool ConvHipImplicitGemmV4R4WrW::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp index 1cf5e4d8f5..ca5cc1afcd 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops.cpp @@ -976,8 +976,8 @@ ConvSolution ConvHipImplicitGemmWrwV4R4Xdlops::GetSolution( std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_SRC_DATA_PER_READ_GEMM_KPACK=") + std::to_string(GemmBBlockCopySrcDataPerRead_GemmKPack) + std::string(" -DCK_PARAM_DEPENDENT_GEMM_B_BLOCK_COPY_DST_DATA_PER_WRITE_GEMM_KPACK=") + std::to_string(GemmBBlockCopyDstDataPerWrite_GemmKPack) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; // clang-format on @@ -1042,7 +1042,7 @@ ConvSolution ConvHipImplicitGemmWrwV4R4Xdlops::GetSolution( bool ConvHipImplicitGemmWrwV4R4Xdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp index f66f47600b..1ed8899057 100644 --- a/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp +++ b/src/solver/conv_hip_implicit_gemm_wrw_v4r4_xdlops_padded_gemm.cpp @@ -1042,8 +1042,8 @@ ConvSolution ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm::GetSolution( std::string(" -DCK_GEMM_N_PAD=") + std::to_string(GemmNPad) + std::string(" -DCK_GEMM_K_TOTAL_PAD=") + std::to_string(GemmKTotalPad) + std::string(" -DCK_USE_AMD_XDLOPS=") + std::to_string(IsXdlopsSupport(ctx) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM{}) ? 1 : 0) + - std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE{}) ? '1' : '0') + + std::string(" -DCK_USE_AMD_XDLOPS_INLINE_ASM=") + std::to_string(miopen::IsEnabled(ENV(MIOPEN_DEBUG_IMPLICIT_GEMM_XDLOPS_INLINE_ASM)) ? 1 : 0) + + std::string(" -DCK_USE_AMD_XDLOPS_EMULATE=") + (miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_XDLOPS_EMULATE)) ? '1' : '0') + get_static_ck_common_compiler_flag(ctx) + ctx.general_compile_options; // clang-format on @@ -1108,7 +1108,7 @@ ConvSolution ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm::GetSolution( bool ConvHipImplicitGemmWrwV4R4Xdlops_Padded_Gemm::IsApplicable( const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_IMPLICIT_GEMM_HIP_WRW_V4R4_PADDED_GEMM_XDLOPS))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) diff --git a/src/solver/conv_mlir_igemm_bwd.cpp b/src/solver/conv_mlir_igemm_bwd.cpp index e1d3df1bc4..b6d7d5a40b 100644 --- a/src/solver/conv_mlir_igemm_bwd.cpp +++ b/src/solver/conv_mlir_igemm_bwd.cpp @@ -44,7 +44,7 @@ bool ConvMlirIgemmBwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if MIOPEN_USE_MLIR - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_mlir_igemm_bwd_xdlops.cpp b/src/solver/conv_mlir_igemm_bwd_xdlops.cpp index 7c507933fd..3c401373cc 100644 --- a/src/solver/conv_mlir_igemm_bwd_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_bwd_xdlops.cpp @@ -45,7 +45,7 @@ bool ConvMlirIgemmBwdXdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if MIOPEN_USE_MLIR - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_MLIR_IGEMM_BWD_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_mlir_igemm_fwd.cpp b/src/solver/conv_mlir_igemm_fwd.cpp index 8d58d20bfe..ccc27ccbd0 100644 --- a/src/solver/conv_mlir_igemm_fwd.cpp +++ b/src/solver/conv_mlir_igemm_fwd.cpp @@ -164,7 +164,7 @@ bool ConvMlirIgemmFwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if MIOPEN_USE_MLIR - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_mlir_igemm_fwd_xdlops.cpp b/src/solver/conv_mlir_igemm_fwd_xdlops.cpp index 2d39a7ec9d..78b8408431 100644 --- a/src/solver/conv_mlir_igemm_fwd_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_fwd_xdlops.cpp @@ -59,7 +59,7 @@ bool ConvMlirIgemmFwdXdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if MIOPEN_USE_MLIR - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_MLIR_IGEMM_FWD_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_mlir_igemm_wrw.cpp b/src/solver/conv_mlir_igemm_wrw.cpp index 2635e9b039..1c8a303388 100644 --- a/src/solver/conv_mlir_igemm_wrw.cpp +++ b/src/solver/conv_mlir_igemm_wrw.cpp @@ -45,7 +45,7 @@ bool ConvMlirIgemmWrW::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if MIOPEN_USE_MLIR - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_mlir_igemm_wrw_xdlops.cpp b/src/solver/conv_mlir_igemm_wrw_xdlops.cpp index e1381793e7..3c97f9f0ab 100644 --- a/src/solver/conv_mlir_igemm_wrw_xdlops.cpp +++ b/src/solver/conv_mlir_igemm_wrw_xdlops.cpp @@ -46,7 +46,7 @@ bool ConvMlirIgemmWrWXdlops::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { #if MIOPEN_USE_MLIR - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_MLIR_IGEMM_WRW_XDLOPS))) return false; if(problem.GetConv().attribute.deterministic) return false; diff --git a/src/solver/conv_multipass_wino3x3WrW.cpp b/src/solver/conv_multipass_wino3x3WrW.cpp index e0caf25745..5f1b2dc1de 100644 --- a/src/solver/conv_multipass_wino3x3WrW.cpp +++ b/src/solver/conv_multipass_wino3x3WrW.cpp @@ -42,12 +42,6 @@ #define WORKAROUND_SWDEV_234193 1 #endif -namespace miopen { -namespace solver { -namespace conv { - -using ProblemDescription = miopen::conv::ProblemDescription; - MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4) @@ -59,6 +53,12 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4) MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX) +namespace miopen { +namespace solver { +namespace conv { + +using ProblemDescription = miopen::conv::ProblemDescription; + // Introduces a number of shader-specific aliases (names) in the current scope at zero cost. // These names represent shader parameters, e.g. shader C is batch_size etc and useful for // programming. @@ -384,7 +384,7 @@ bool ConvWinograd3x3MultipassWrW if(wino_data_tile == 3 && wino_filter_tile == 2) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2{}) || + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X2)) || problem.GetKernelStrideH() == 1) { return false; @@ -392,7 +392,7 @@ bool ConvWinograd3x3MultipassWrW } if(wino_data_tile == 3 && wino_filter_tile == 3) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3{}) || + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X3)) || problem.GetKernelStrideH() == 1) { return false; @@ -405,17 +405,17 @@ bool ConvWinograd3x3MultipassWrW { if(wino_data_tile == 3 && wino_filter_tile == 4) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4))) return false; } if(wino_data_tile == 3 && wino_filter_tile == 5) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5))) return false; } if(wino_data_tile == 3 && wino_filter_tile == 6) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6))) return false; } } @@ -424,39 +424,39 @@ bool ConvWinograd3x3MultipassWrW { if(wino_data_tile == 3 && wino_filter_tile == 4) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X4))) return false; } if(wino_data_tile == 3 && wino_filter_tile == 5) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X5))) return false; } if(wino_data_tile == 3 && wino_filter_tile == 6) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F3X6))) return false; } } if(wino_data_tile == 7 && wino_filter_tile == 2) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X2))) return false; } if(wino_data_tile == 7 && wino_filter_tile == 3) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F7X3))) return false; } if(wino_data_tile == 5 && wino_filter_tile == 3) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X3))) return false; } if(wino_data_tile == 5 && wino_filter_tile == 4) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_F5X4))) return false; } if(!ctx.use_asm_kernels) @@ -492,7 +492,7 @@ bool ConvWinograd3x3MultipassWrW return false; { - std::size_t limit = miopen::Value(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX{}); + std::size_t limit = miopen::Value(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_MPASS_WORKSPACE_MAX)); #if WORKAROUND_SWDEV_203031 if(limit == 0) { diff --git a/src/solver/conv_ocl_dir2D11x11.cpp b/src/solver/conv_ocl_dir2D11x11.cpp index 50119be43f..63fc1366e5 100644 --- a/src/solver/conv_ocl_dir2D11x11.cpp +++ b/src/solver/conv_ocl_dir2D11x11.cpp @@ -42,7 +42,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvOclDirectFwd11x11::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD11X11))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp index 4226b8006f..d98dd55d62 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_1x1.cpp @@ -49,11 +49,11 @@ bool ConvOclBwdWrW1x1::IsApplicable(const ExecutionContext& ctx, if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1))) return false; } #endif - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW1X1))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp index 66469f298f..10dfe96a20 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_2.cpp @@ -175,7 +175,7 @@ template bool PerformanceConfigConvOclBwdWrw2::SetNextValue(const ProblemDescription&) { // Increment with wrap-around: - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2_SEARCH_OPTIMIZED{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2_SEARCH_OPTIMIZED))) { do { @@ -454,7 +454,7 @@ template bool ConvOclBwdWrW2::IsApplicableBase(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW2))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp b/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp index 4f45f1869e..32340118cb 100644 --- a/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp +++ b/src/solver/conv_ocl_dir2D_bwdWrW_53.cpp @@ -46,7 +46,7 @@ static bool WorkaroundSwdev168168() { return true; } bool ConvOclBwdWrW53::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_WRW53))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_ocl_dir2Dfwd.cpp b/src/solver/conv_ocl_dir2Dfwd.cpp index 00fe1e7526..ee30d953a1 100644 --- a/src/solver/conv_ocl_dir2Dfwd.cpp +++ b/src/solver/conv_ocl_dir2Dfwd.cpp @@ -41,7 +41,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvOclDirectFwd::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_ocl_dir2Dfwd1x1.cpp b/src/solver/conv_ocl_dir2Dfwd1x1.cpp index 122c4c9285..097395a7cc 100644 --- a/src/solver/conv_ocl_dir2Dfwd1x1.cpp +++ b/src/solver/conv_ocl_dir2Dfwd1x1.cpp @@ -48,13 +48,13 @@ bool ConvOclDirectFwd1x1::IsApplicable(const ExecutionContext& ctx, if(StartsWith(ctx.GetStream().GetDeviceName(), "gfx10") || StartsWith(ctx.GetStream().GetDeviceName(), "gfx11")) { - if(!miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1))) return false; } #endif if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWD1X1))) return false; if(!ctx.use_opencl_convolutions) return false; diff --git a/src/solver/conv_ocl_dir2Dfwdgen.cpp b/src/solver/conv_ocl_dir2Dfwdgen.cpp index 0ff0df4be8..ded5ec5453 100644 --- a/src/solver/conv_ocl_dir2Dfwdgen.cpp +++ b/src/solver/conv_ocl_dir2Dfwdgen.cpp @@ -40,7 +40,7 @@ using ProblemDescription = miopen::conv::ProblemDescription; bool ConvOclDirectFwdGen::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_OCL_FWDGEN))) return false; if(ThisSolverIsDeprecatedStatic::IsDisabled(ctx)) return false; diff --git a/src/solver/conv_winoRxS.cpp b/src/solver/conv_winoRxS.cpp index 2df05de09b..5d0e1b24d4 100644 --- a/src/solver/conv_winoRxS.cpp +++ b/src/solver/conv_winoRxS.cpp @@ -682,7 +682,8 @@ static bool IsApplicableBase(const ExecutionContext& ctx, const ProblemDescripti // clang-format on #if WORKAROUND_ISSUE_2493 - if(!miopen::IsDisabled(MIOPEN_DEBUG_WORKAROUND_ISSUE_2493{}) && !miopen::debug::IsWarmupOngoing) + if(!miopen::IsDisabled(ENV(MIOPEN_DEBUG_WORKAROUND_ISSUE_2493)) && + !miopen::debug::IsWarmupOngoing) { constexpr double max_perf_drop_due_to_granularity = 200; // Times. const auto gl = ShaderModel(ctx, problem, Winodata, Winofilter).GetGranularityLoss(); @@ -735,7 +736,7 @@ bool ConvBinWinoRxS::IsApplicable(const ExecutionContext& { if(IS2X3) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3))) return false; #if !WORKAROUND_ISSUE_1681 if(problem.GetGroupCount() == 1 && !problem.IsDirectionBackwardWrW()) @@ -744,7 +745,7 @@ bool ConvBinWinoRxS::IsApplicable(const ExecutionContext& } if(IS3X2) { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2))) return false; } return IsApplicableBase(ctx, problem); @@ -760,12 +761,12 @@ GetPerfConfFromEnv(const ExecutionContext& ctx) if(IS2X3) { - s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS{}); + s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS)); env_name = "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_PERF_VALS"; } else if(IS3X2) { - s = miopen::GetStringEnv(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS{}); + s = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS)); env_name = "MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F3X2_PERF_VALS"; } @@ -1113,7 +1114,7 @@ ConvSolution ConvBinWinoRxS::GetSolution( bool ConvBinWinogradRxSf2x3g1::IsApplicable(const ExecutionContext& ctx, const ProblemDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1))) return false; return IsApplicableBase<2, 3>(ctx, problem) && problem.GetGroupCount() == 1; } diff --git a/src/solver/conv_winoRxS_fused.cpp b/src/solver/conv_winoRxS_fused.cpp index 05b013dac9..0893ca1afb 100644 --- a/src/solver/conv_winoRxS_fused.cpp +++ b/src/solver/conv_winoRxS_fused.cpp @@ -152,7 +152,7 @@ namespace fusion { bool ConvBinWinogradRxSf2x3g1Fused::IsApplicable(const FusionContext& context, const FusionDescription& problem) const { - if(miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_RXS_F2X3_G1))) return false; if(!WinoCommonIsApplicable(context, problem)) return false; diff --git a/src/solver/conv_wino_fury_RxS.cpp b/src/solver/conv_wino_fury_RxS.cpp index d3a087cbd8..d61781a5f8 100644 --- a/src/solver/conv_wino_fury_RxS.cpp +++ b/src/solver/conv_wino_fury_RxS.cpp @@ -177,10 +177,10 @@ bool ConvWinoFuryRxS::IsApplicable(const ExecutionContext& if(problem.HasNonPackedTensors()) return false; - if(is2x3() && miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3{})) + if(is2x3() && miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F2X3))) return false; - if(is3x2() && miopen::IsDisabled(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2{})) + if(is3x2() && miopen::IsDisabled(ENV(MIOPEN_DEBUG_AMD_WINOGRAD_FURY_RXS_F3X2))) return false; if(!ctx.use_asm_kernels) diff --git a/src/solver/fft.cpp b/src/solver/fft.cpp index 56bf50917c..d6d922e918 100644 --- a/src/solver/fft.cpp +++ b/src/solver/fft.cpp @@ -35,14 +35,14 @@ #include +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_FFT) + namespace miopen { namespace solver { namespace conv { using ProblemDescription = miopen::conv::ProblemDescription; -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_FFT) - static void cgemm_grid(size_t* global_work_size, size_t* local_work_size, int cgemm_choice, diff --git a/src/solver/gemm.cpp b/src/solver/gemm.cpp index a18cdfe157..76898a1bdc 100644 --- a/src/solver/gemm.cpp +++ b/src/solver/gemm.cpp @@ -352,7 +352,7 @@ ConvSolution GemmFwd1x1_0_2::GetSolution(const ExecutionContext& context, out_spatial.begin(), out_spatial.end(), std::size_t(1), std::multiplies()); const bool time_precision = context.GetStream().IsProfilingEnabled() && - (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { float time_gemm = 0; @@ -638,7 +638,7 @@ ConvSolution GemmFwd1x1_0_1_int8::GetSolution(const ExecutionContext& context, out_spatial.begin(), out_spatial.end(), std::size_t(1), std::multiplies()); const bool time_precision = context.GetStream().IsProfilingEnabled() && - (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { const auto& conv_params = primitive_params.CastTo(); @@ -812,7 +812,7 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, solution.invoker_factory = [=](const std::vector&) { const bool time_precision = context.GetStream().IsProfilingEnabled() && - (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); MIOPEN_LOG_FUNCTION("groupconv, 1x1"); @@ -914,7 +914,7 @@ ConvSolution GemmFwd1x1_0_1::GetSolution(const ExecutionContext& context, MIOPEN_LOG_FUNCTION("convolution, 1x1"); const bool time_precision = context.GetStream().IsProfilingEnabled() && - (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { float time = 0; @@ -1134,7 +1134,7 @@ ConvSolution GemmFwdRest::GetSolution(const ExecutionContext& context, const auto wei_spatial_size = std::accumulate( wei_spatial.begin(), wei_spatial.end(), std::size_t(1), std::multiplies()); - const bool time_precision = (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + const bool time_precision = (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { float time_gemm = 0; diff --git a/src/solver/gemm_bwd.cpp b/src/solver/gemm_bwd.cpp index 307d44ccd9..e3bbce86c0 100644 --- a/src/solver/gemm_bwd.cpp +++ b/src/solver/gemm_bwd.cpp @@ -314,7 +314,7 @@ ConvSolution GemmBwd1x1_stride2::GetSolution(const ExecutionContext& context, solution.workspace_sz = workspace_req; solution.invoker_factory = [=](const std::vector&) { - const bool time_precision = (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + const bool time_precision = (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { const auto& conv_params = primitive_params.CastTo(); @@ -510,7 +510,7 @@ ConvSolution GemmBwd1x1_stride1::GetSolution(const ExecutionContext&, solution.workspace_sz = 0; solution.invoker_factory = [=](const std::vector&) { - const bool time_precision = (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + const bool time_precision = (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { const auto& conv_params = primitive_params.CastTo(); @@ -761,7 +761,7 @@ ConvSolution GemmBwdRest::GetSolution(const ExecutionContext& context, solution.workspace_sz = workspace_req; solution.invoker_factory = [=](const std::vector&) { - const bool time_precision = (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + const bool time_precision = (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { const auto& conv_params = primitive_params.CastTo(); diff --git a/src/solver/gemm_wrw.cpp b/src/solver/gemm_wrw.cpp index 483a7d5ee9..b58d71f104 100644 --- a/src/solver/gemm_wrw.cpp +++ b/src/solver/gemm_wrw.cpp @@ -244,7 +244,7 @@ ConvSolution GemmWrw1x1_stride1::GetSolution(const ExecutionContext&, auto solution = ConvSolution{miopenStatusSuccess}; solution.invoker_factory = [=](const std::vector&) { - const bool time_precision = (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + const bool time_precision = (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { const auto& conv_params = primitive_params.CastTo(); @@ -461,7 +461,7 @@ ConvSolution GemmWrwUniversal::GetSolution(const ExecutionContext& context, solution.workspace_sz = workspace_req; solution.invoker_factory = [=](const std::vector&) { - const bool time_precision = (!IsDisabled(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING{})); + const bool time_precision = (!IsDisabled(ENV(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING))); return [=](const Handle& handle, const AnyInvokeParams& primitive_params) { const auto& conv_params = primitive_params.CastTo(); diff --git a/src/solver/norm/forward_layernorm2d_ck.cpp b/src/solver/norm/forward_layernorm2d_ck.cpp index ac3fbec273..6c1d99af19 100644 --- a/src/solver/norm/forward_layernorm2d_ck.cpp +++ b/src/solver/norm/forward_layernorm2d_ck.cpp @@ -216,7 +216,7 @@ bool Layernorm2DCKForward::IsApplicable( [[maybe_unused]] const miopen::norm::ProblemDescription& problem) const { #if MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_LAYERNORM2DCKFORWARD_CONV_CK_LN{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_LAYERNORM2DCKFORWARD_CONV_CK_LN))) return false; if(!problem.IsSameType()) return false; diff --git a/src/solver/norm/forward_layernorm4d_ck.cpp b/src/solver/norm/forward_layernorm4d_ck.cpp index 88c6fec48f..4313a62ca4 100644 --- a/src/solver/norm/forward_layernorm4d_ck.cpp +++ b/src/solver/norm/forward_layernorm4d_ck.cpp @@ -224,7 +224,7 @@ bool Layernorm4DCKForward::IsApplicable( [[maybe_unused]] const miopen::norm::ProblemDescription& problem) const { #if MIOPEN_USE_COMPOSABLEKERNEL - if(miopen::IsDisabled(MIOPEN_DEBUG_LAYERNORM4DCKFORWARD_CONV_CK_LN{})) + if(miopen::IsDisabled(ENV(MIOPEN_DEBUG_LAYERNORM4DCKFORWARD_CONV_CK_LN))) return false; if(!problem.IsSameType()) return false; diff --git a/src/target_properties.cpp b/src/target_properties.cpp index d5f763694e..ece99618be 100644 --- a/src/target_properties.cpp +++ b/src/target_properties.cpp @@ -55,7 +55,7 @@ static std::string GetDeviceNameFromMap(const std::string& in) {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, }; - const auto& dev_str = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{}); + const auto& dev_str = miopen::GetStringEnv(ENV(MIOPEN_DEBUG_ENFORCE_DEVICE)); if(!dev_str.empty()) return dev_str; @@ -76,7 +76,7 @@ const std::size_t TargetProperties::MaxLocalMemorySize = static_cast std::string { - const auto& arch = miopen::GetStringEnv(MIOPEN_DEVICE_ARCH{}); + const auto& arch = miopen::GetStringEnv(ENV(MIOPEN_DEVICE_ARCH)); if(!arch.empty()) return arch; return handle->GetDeviceNameImpl(); diff --git a/src/tmp_dir.cpp b/src/tmp_dir.cpp index c4770b9db7..87e9356fbf 100644 --- a/src/tmp_dir.cpp +++ b/src/tmp_dir.cpp @@ -62,7 +62,7 @@ TmpDir& TmpDir::operator=(TmpDir&& other) noexcept void TmpDir::Execute(std::string exe, std::string args) const { - if(miopen::IsEnabled(MIOPEN_DEBUG_SAVE_TEMP_DIR{})) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_SAVE_TEMP_DIR))) { MIOPEN_LOG_I2(this->path.string()); } @@ -73,7 +73,7 @@ void TmpDir::Execute(std::string exe, std::string args) const TmpDir::~TmpDir() { - if(!miopen::IsEnabled(MIOPEN_DEBUG_SAVE_TEMP_DIR{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_SAVE_TEMP_DIR))) { if(!this->path.empty()) boost::filesystem::remove_all(this->path); diff --git a/test/driver.hpp b/test/driver.hpp index 58128430a4..f29e6efa4d 100644 --- a/test/driver.hpp +++ b/test/driver.hpp @@ -151,7 +151,7 @@ struct test_driver static std::string compute_cache_path() { - auto s = miopen::GetStringEnv(MIOPEN_VERIFY_CACHE_PATH{}); + auto s = miopen::GetStringEnv(ENV(MIOPEN_VERIFY_CACHE_PATH)); if(s.empty()) return "~/.cache/miopen/tests"; else diff --git a/test/gpu_conv.hpp b/test/gpu_conv.hpp index 491b0aaecc..cf9dbea15f 100644 --- a/test/gpu_conv.hpp +++ b/test/gpu_conv.hpp @@ -75,7 +75,7 @@ bool gpu_ref_convolution_fwd(const tensor& input, miopen::ConvolutionDescriptor filter) { bool gpu_ref_used = false; - if(!miopen::IsEnabled(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF))) { const AutoPrepareForGpuReference guard; auto&& handle = get_handle(); @@ -111,7 +111,7 @@ bool gpu_ref_convolution_bwd(tensor& input, miopen::ConvolutionDescriptor filter) { bool gpu_ref_used = false; - if(!miopen::IsEnabled(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF))) { const AutoPrepareForGpuReference guard; auto&& handle = get_handle(); @@ -147,7 +147,7 @@ bool gpu_ref_convolution_wrw(const tensor& input, miopen::ConvolutionDescriptor filter) { bool gpu_ref_used = false; - if(!miopen::IsEnabled(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF{})) + if(!miopen::IsEnabled(ENV(MIOPEN_DEBUG_TEST_DISABLE_GPU_REF))) { const AutoPrepareForGpuReference guard; auto&& handle = get_handle(); diff --git a/test/gtest/conv_embed_db.cpp b/test/gtest/conv_embed_db.cpp index ea2e29f6a1..d20a4d2956 100644 --- a/test/gtest/conv_embed_db.cpp +++ b/test/gtest/conv_embed_db.cpp @@ -37,7 +37,7 @@ MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); - const auto& s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& s_envVar = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); return (s_envVar.compare(float_arg) == 0); } diff --git a/test/gtest/conv_hip_igemm_xdlops.cpp b/test/gtest/conv_hip_igemm_xdlops.cpp index a2e34598b6..26c51ba7e1 100644 --- a/test/gtest/conv_hip_igemm_xdlops.cpp +++ b/test/gtest/conv_hip_igemm_xdlops.cpp @@ -39,7 +39,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) static bool IsTestRunWith(const char* float_arg) { assert(float_arg != nullptr); - const auto& s_envVar = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& s_envVar = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); return (!s_envVar.empty() && std::strcmp(s_envVar.c_str(), float_arg) == 0); } @@ -111,8 +111,8 @@ TEST_P(ConfigWithInt8, Int8Test) #else // MIOPEN_BACKEND_HIP, OCL_DISABLED const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && miopen::IsEnabled(MIOPEN_TEST_COMPOSABLEKERNEL{}) && - miopen::IsEnabled(MIOPEN_TEST_ALL{}) && IsTestRunWith("--int8")) + if(IsTestSupportedForDevice(handle) && miopen::IsEnabled(ENV(MIOPEN_TEST_COMPOSABLEKERNEL)) && + miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && IsTestRunWith("--int8")) { Run2dDriver(miopenInt8); } diff --git a/test/gtest/conv_igemm_dynamic.cpp b/test/gtest/conv_igemm_dynamic.cpp index 03a6ea3257..4decf80c2b 100644 --- a/test/gtest/conv_igemm_dynamic.cpp +++ b/test/gtest/conv_igemm_dynamic.cpp @@ -35,7 +35,7 @@ using TestCase = std::tuple, std::string>; MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_GPU_XNACK_ENABLED) -static bool SkipTest(void) { return miopen::IsEnabled(MIOPEN_TEST_GPU_XNACK_ENABLED{}); } +static bool SkipTest(void) { return miopen::IsEnabled(ENV(MIOPEN_TEST_GPU_XNACK_ENABLED)); } void GetArgs(const TestCase& param, std::vector& tokens) { diff --git a/test/gtest/conv_igemm_mlir.cpp b/test/gtest/conv_igemm_mlir.cpp index 92a0582b67..23ea66efc1 100644 --- a/test/gtest/conv_igemm_mlir.cpp +++ b/test/gtest/conv_igemm_mlir.cpp @@ -40,7 +40,7 @@ using TestCase = std::tuple, std::string>; std::string GetFloatArg() { - const auto& tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& tmp = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); if(tmp.empty()) { return ""; @@ -120,7 +120,7 @@ TEST_P(ConfigWithFloat, FloatTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + miopen::IsEnabled(ENV(MIOPEN_TEST_MLIR)) && miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && GetFloatArg() == "--float") { Run2dDriver(miopenFloat); @@ -142,7 +142,7 @@ TEST_P(ConfigWithHalf, HalfTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + miopen::IsEnabled(ENV(MIOPEN_TEST_MLIR)) && miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && GetFloatArg() == "--half") { Run2dDriver(miopenHalf); @@ -164,7 +164,7 @@ TEST_P(ConfigWithInt8, Int8Test) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx103") || miopen::StartsWith(handle.GetDeviceName(), "gfx906")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + miopen::IsEnabled(ENV(MIOPEN_TEST_MLIR)) && miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && GetFloatArg() == "--int8") { Run2dDriver(miopenInt8); diff --git a/test/gtest/conv_igemm_mlir_xdlops.cpp b/test/gtest/conv_igemm_mlir_xdlops.cpp index e81f2a88ac..bcc9495204 100644 --- a/test/gtest/conv_igemm_mlir_xdlops.cpp +++ b/test/gtest/conv_igemm_mlir_xdlops.cpp @@ -15,7 +15,7 @@ using TestCase = std::tuple, std::string>; std::string GetFloatArg() { - const auto& tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& tmp = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); if(tmp.empty()) { return ""; @@ -93,7 +93,7 @@ TEST_P(ConfigWithHalf, HalfTest) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + miopen::IsEnabled(ENV(MIOPEN_TEST_MLIR)) && miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && GetFloatArg() == "--half") { Run2dDriver(miopenHalf); @@ -115,7 +115,7 @@ TEST_P(ConfigWithInt8, Int8Test) const auto& handle = get_handle(); if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a")) && - miopen::IsEnabled(MIOPEN_TEST_MLIR{}) && miopen::IsEnabled(MIOPEN_TEST_ALL{}) && + miopen::IsEnabled(ENV(MIOPEN_TEST_MLIR)) && miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && GetFloatArg() == "--int8") { Run2dDriver(miopenInt8); diff --git a/test/gtest/conv_trans.cpp b/test/gtest/conv_trans.cpp index 7be11dffda..1c887fe24c 100644 --- a/test/gtest/conv_trans.cpp +++ b/test/gtest/conv_trans.cpp @@ -97,7 +97,7 @@ bool IsTestSupportedForDevice(const miopen::Handle& handle) TEST_P(ConfigWithFloat, FloatTest) { const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && miopen::IsEnabled(MIOPEN_TEST_ALL{})) + if(IsTestSupportedForDevice(handle) && miopen::IsEnabled(ENV(MIOPEN_TEST_ALL))) { Run2dDriver(miopenFloat); } diff --git a/test/gtest/layernorm.cpp b/test/gtest/layernorm.cpp index e66863e77a..61d5bfb74e 100644 --- a/test/gtest/layernorm.cpp +++ b/test/gtest/layernorm.cpp @@ -31,7 +31,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) std::string GetFloatArg() { - const auto& tmp = miopen::GetStringEnv(MIOPEN_TEST_FLOAT_ARG{}); + const auto& tmp = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); if(tmp.empty()) { return ""; From 88bd707f15b225a0c55def04e360b94fe8d20abb Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Tue, 21 Nov 2023 00:39:09 +0000 Subject: [PATCH 45/73] fix --- src/binary_cache.cpp | 4 ++-- src/ocl/convolutionocl.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/binary_cache.cpp b/src/binary_cache.cpp index 8768cbf79b..37ef2c18df 100644 --- a/src/binary_cache.cpp +++ b/src/binary_cache.cpp @@ -44,11 +44,11 @@ #include #include -namespace miopen { - MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DISABLE_CACHE) MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_CUSTOM_CACHE_DIR) +namespace miopen { + static boost::filesystem::path ComputeSysCachePath() { const std::string cache_dir = GetSystemDbPath(); diff --git a/src/ocl/convolutionocl.cpp b/src/ocl/convolutionocl.cpp index 0185717712..73e17380ae 100644 --- a/src/ocl/convolutionocl.cpp +++ b/src/ocl/convolutionocl.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -58,7 +59,6 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_CONV_PRECISE_ROCBLAS_TIMING) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONV_IMMED_FALLBACK) -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY) MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_DUMP_TENSOR_PATH) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_ENABLE_AI_IMMED_MODE_FALLBACK) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_FORCE_IMMED_MODE_FALLBACK) From dec15326f4a768f2df5329fda3759bfb215995ed Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 20 Nov 2023 18:50:33 -0600 Subject: [PATCH 46/73] env mergefix --- test/gtest/lstm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/gtest/lstm.cpp b/test/gtest/lstm.cpp index 786f0f6bb7..597d3eace3 100644 --- a/test/gtest/lstm.cpp +++ b/test/gtest/lstm.cpp @@ -114,7 +114,7 @@ int RunLSTMDriver(std::string cmd) TEST_P(LSTMTest, test_lstm_deepbench_rnn) { - if(miopen::IsEnabled(MIOPEN_TEST_DEEPBENCH{})) + if(miopen::IsEnabled(ENV(MIOPEN_TEST_DEEPBENCH))) { // clang-format off RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); @@ -149,7 +149,7 @@ TEST_P(LSTMTest, test_lstm_deepbench_rnn) TEST_P(LSTMTest, test_lstm_extra) { - if(miopen::IsEnabled(MIOPEN_TEST_ALL{})) + if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL))) { // clang-format off RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx"); From eb46c35b62224e5281aab914509c677aa04408a6 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Tue, 21 Nov 2023 03:52:25 +0000 Subject: [PATCH 47/73] fix --- src/solver/conv_direct_naive_conv.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/solver/conv_direct_naive_conv.cpp b/src/solver/conv_direct_naive_conv.cpp index 467184b35b..fb206ceb3e 100644 --- a/src/solver/conv_direct_naive_conv.cpp +++ b/src/solver/conv_direct_naive_conv.cpp @@ -127,7 +127,7 @@ std::string ConvDirectNaiveConvKernelName(const ProblemDescription& problem) /// \todo remove packed reference convolution kernels --amberhassaan #ifndef NDEBUG // enable in debug mode only - if(miopen::IsEnabled(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS())) + if(miopen::IsEnabled(ENV(MIOPEN_DEBUG_CONV_DIRECT_NAIVE_USE_PACKED_KERNELS))) { kernel_name << "naive_conv_packed_"; } From 263917e43f51bac19dc5ba34653f7a26d1b60b44 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Tue, 21 Nov 2023 19:10:28 -0600 Subject: [PATCH 48/73] mergefix --- test/gtest/layernorm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/layernorm.cpp b/test/gtest/layernorm.cpp index 61d5bfb74e..8827aec465 100644 --- a/test/gtest/layernorm.cpp +++ b/test/gtest/layernorm.cpp @@ -49,7 +49,7 @@ TEST_P(LayerNormTestFloat, LayerNormTestFw) if((miopen::StartsWith(handle.GetDeviceName(), "gfx908") || miopen::StartsWith(handle.GetDeviceName(), "gfx90a") || miopen::StartsWith(handle.GetDeviceName(), "gfx94")) && - miopen::IsEnvvarValueEnabled("MIOPEN_TEST_ALL") && (GetFloatArg() == "--float")) + miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--float")) { RunTest(); Verify(); From 2be776ca44031e2dd45555a2a919c0723cd2bee5 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 22 Nov 2023 16:41:09 -0600 Subject: [PATCH 49/73] add Unset method for env, update assert string --- src/include/miopen/env.hpp | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index c8e28e1b9d..15be1f3dc6 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -102,6 +102,8 @@ struct EnvVar bool IsUnset() const { return is_unset; } + void Unset() const { is_unset = true; } + void UpdateValue(const T& val) { is_unset = false; @@ -128,20 +130,20 @@ struct EnvVar // static inside function hides the variable and provides // thread-safety/locking -// declare in global namespace -#define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ - namespace miopen::env { \ - struct name \ - { \ - static_assert(std::is_same_v, \ - "must be in miopen::env and must be unique"); \ - using value_type = type; \ - static miopen::internal::EnvVar& Ref() \ - { \ - static miopen::internal::EnvVar var{#name, default_val}; \ - return var; \ - } \ - }; \ +// Used in global namespace +#define MIOPEN_DECLARE_ENV_VAR(name, type, default_val) \ + namespace miopen::env { \ + struct name \ + { \ + static_assert(std::is_same_v, \ + "MIOPEN_DECLARE_ENV* must be used in the global namespace"); \ + using value_type = type; \ + static miopen::internal::EnvVar& Ref() \ + { \ + static miopen::internal::EnvVar var{#name, default_val}; \ + return var; \ + } \ + }; \ } #define MIOPEN_DECLARE_ENV_VAR_BOOL(name) MIOPEN_DECLARE_ENV_VAR(name, bool, false) @@ -191,6 +193,12 @@ inline bool IsUnset(EnvVar) return EnvVar::Ref().IsUnset(); } +template +void Unset(EnvVar) +{ + EnvVar::Ref().Unset(); +} + /// updates the cached value of an environment variable template void UpdateEnvVar(EnvVar, const ValueType& val) From b597b813a399fdb9a042a2e825d909c1337eb643 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 22 Nov 2023 16:57:08 -0600 Subject: [PATCH 50/73] fix --- src/include/miopen/env.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 15be1f3dc6..188189f5c0 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -102,7 +102,7 @@ struct EnvVar bool IsUnset() const { return is_unset; } - void Unset() const { is_unset = true; } + void Unset() { is_unset = true; } void UpdateValue(const T& val) { From ecb6fbedc643974957b6e61c2d262731dc988bf9 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 4 Dec 2023 17:37:35 +0000 Subject: [PATCH 51/73] add restructuring for lstm gtest --- test/gtest/deepbench_lstm.cpp | 154 ++++++++++++++++++++++++++++++++++ test/lstm.hpp | 74 ++++++++++++++++ 2 files changed, 228 insertions(+) create mode 100644 test/gtest/deepbench_lstm.cpp create mode 100644 test/lstm.hpp diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp new file mode 100644 index 0000000000..639dc689c9 --- /dev/null +++ b/test/gtest/deepbench_lstm.cpp @@ -0,0 +1,154 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +//#include + +#include "lstm.hpp" +#include "get_handle.hpp" +#include +#include +#include + +//MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) +//MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) +//MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_LSTM_ENABLED) + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) + +static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_DEEPBENCH)); } + +void GetArgs(const std::string& param, std::vector& tokens) +{ + std::stringstream ss(param); + std::istream_iterator begin(ss); + std::istream_iterator end; + while(begin != end) + tokens.push_back(*begin++); +} + +class ConfigWithFloat : public testing::TestWithParam> +{ +}; + +void Run2dDriver(miopenDataType_t prec) +{ + + std::vector params; + switch(prec) + { + case miopenFloat: params = ConfigWithFloat::GetParam(); break; + case miopenHalf: + case miopenFloat8: + case miopenBFloat8: + case miopenInt8: + case miopenBFloat16: + case miopenInt32: + case miopenDouble: + FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " + "data type not supported by " + "deepbench_lstm test"; + + default: params = ConfigWithFloat::GetParam(); + } + + for(const auto& test_value : params) + { + std::vector tokens; + GetArgs(test_value, tokens); + std::vector ptrs; + + std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) { + return str.data(); + }); + + testing::internal::CaptureStderr(); + test_drive(ptrs.size(), ptrs.data()); + auto capture = testing::internal::GetCapturedStderr(); + std::cout << capture; + } +}; + +bool IsTestSupportedForDevice(const miopen::Handle& handle) +{ + std::string devName = handle.GetDeviceName(); + if(devName == "gfx900" || devName == "gfx906" || devName == "gfx908" || devName == "gfx90a" || + miopen::StartsWith(devName, "gfx94") || miopen::StartsWith(devName, "gfx103") || + miopen::StartsWith(devName, "gfx110")) + return true; + else + return false; +} + +TEST_P(ConfigWithFloat, FloatTest) +{ + const auto& handle = get_handle(); + if(IsTestSupportedForDevice(handle) && !SkipTest()) + { + Run2dDriver(miopenFloat); + } + else + { + GTEST_SKIP(); + } +}; + +std::vector GetTestCases(void) +{ + std::string flags = " --verbose"; + std::string commonFlags = + " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; + + const std::vector test_cases = { + // clang-format off + {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} + // clang-format on + }; + + return test_cases; +} + +INSTANTIATE_TEST_SUITE_P(ConvTrans, ConfigWithFloat, testing::Values(GetTestCases())); + diff --git a/test/lstm.hpp b/test/lstm.hpp new file mode 100644 index 0000000000..154fdf47a5 --- /dev/null +++ b/test/lstm.hpp @@ -0,0 +1,74 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include "lstm_common.hpp" + +template +struct lstm_driver : lstm_basic_driver +{ + lstm_driver() : lstm_basic_driver() + { + std::vector modes(2, 0); + modes[1] = 1; + std::vector defaultBS(1); + + this->add(this->batchSize, "batch-size", this->generate_data(get_lstm_batchSize(), {17})); + this->add(this->seqLength, "seq-len", this->generate_data(get_lstm_seq_len(), {2})); + this->add(this->inVecLen, "vector-len", this->generate_data(get_lstm_vector_len())); + this->add(this->hiddenSize, "hidden-size", this->generate_data(get_lstm_hidden_size())); + this->add(this->numLayers, "num-layers", this->generate_data(get_lstm_num_layers())); + this->add(this->nohx, "no-hx", this->flag()); + this->add(this->nodhy, "no-dhy", this->flag()); + this->add(this->nocx, "no-cx", this->flag()); + this->add(this->nodcy, "no-dcy", this->flag()); + this->add(this->nohy, "no-hy", this->flag()); + this->add(this->nodhx, "no-dhx", this->flag()); + this->add(this->nocy, "no-cy", this->flag()); + this->add(this->nodcx, "no-dcx", this->flag()); + this->add(this->flatBatchFill, "flat-batch-fill", this->flag()); + this->add(this->useDropout, "use-dropout", this->generate_data({0})); + this->add(this->usePadding, "use-padding", this->generate_data({false, true})); + +#if(MIO_LSTM_TEST_DEBUG == 3) + this->biasMode = 0; + this->dirMode = 0; + this->inputMode = 0; + this->algoMode = 0; +#else + this->add(this->inputMode, "in-mode", this->generate_data(modes)); + this->add(this->biasMode, "bias-mode", this->generate_data(modes)); + this->add(this->dirMode, "dir-mode", this->generate_data(modes)); + this->add(this->algoMode, "algo-mode", this->generate_data(modes)); +#endif + this->add( + this->batchSeq, + "batch-seq", + this->lazy_generate_data( + [=] { return generate_batchSeq(this->batchSize, this->seqLength); }, defaultBS)); + } +}; From 6b5d1f9e35f64a23e4f996046182f9a8eeac8cb9 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 4 Dec 2023 19:29:37 +0000 Subject: [PATCH 52/73] convert lstm gtest format --- test/gtest/deepbench_lstm.cpp | 7 +- test/gtest/lstm.cpp | 205 ---------------------------------- test/gtest/lstm_extra.cpp | 158 ++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 211 deletions(-) delete mode 100644 test/gtest/lstm.cpp create mode 100644 test/gtest/lstm_extra.cpp diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index 639dc689c9..1ce4cecea7 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -23,7 +23,6 @@ * SOFTWARE. * *******************************************************************************/ -//#include #include "lstm.hpp" #include "get_handle.hpp" @@ -31,10 +30,6 @@ #include #include -//MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) -//MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) -//MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_LSTM_ENABLED) - MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_DEEPBENCH)); } @@ -116,7 +111,7 @@ TEST_P(ConfigWithFloat, FloatTest) std::vector GetTestCases(void) { - std::string flags = " --verbose"; + std::string flags = "test_lstm --verbose"; std::string commonFlags = " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; diff --git a/test/gtest/lstm.cpp b/test/gtest/lstm.cpp deleted file mode 100644 index 597d3eace3..0000000000 --- a/test/gtest/lstm.cpp +++ /dev/null @@ -1,205 +0,0 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (c) 2023 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ - -#include "lstm_common.hpp" -#include "get_handle.hpp" -#include -#include -#include - -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_LSTM_ENABLED) - -using TestCase = std::string; -struct LSTMTest : public testing::TestWithParam> -{ -}; - -template -struct lstm_driver : lstm_basic_driver -{ - lstm_driver() : lstm_basic_driver() - { - std::vector modes(2, 0); - modes[1] = 1; - std::vector defaultBS(1); - - this->add(this->batchSize, "batch-size", this->generate_data(get_lstm_batchSize(), {17})); - this->add(this->seqLength, "seq-len", this->generate_data(get_lstm_seq_len(), {2})); - this->add(this->inVecLen, "vector-len", this->generate_data(get_lstm_vector_len())); - this->add(this->hiddenSize, "hidden-size", this->generate_data(get_lstm_hidden_size())); - this->add(this->numLayers, "num-layers", this->generate_data(get_lstm_num_layers())); - this->add(this->nohx, "no-hx", this->flag()); - this->add(this->nodhy, "no-dhy", this->flag()); - this->add(this->nocx, "no-cx", this->flag()); - this->add(this->nodcy, "no-dcy", this->flag()); - this->add(this->nohy, "no-hy", this->flag()); - this->add(this->nodhx, "no-dhx", this->flag()); - this->add(this->nocy, "no-cy", this->flag()); - this->add(this->nodcx, "no-dcx", this->flag()); - this->add(this->flatBatchFill, "flat-batch-fill", this->flag()); - this->add(this->useDropout, "use-dropout", this->generate_data({0})); - this->add(this->usePadding, "use-padding", this->generate_data({false, true})); - -#if(MIO_LSTM_TEST_DEBUG == 3) - this->biasMode = 0; - this->dirMode = 0; - this->inputMode = 0; - this->algoMode = 0; -#else - this->add(this->inputMode, "in-mode", this->generate_data(modes)); - this->add(this->biasMode, "bias-mode", this->generate_data(modes)); - this->add(this->dirMode, "dir-mode", this->generate_data(modes)); - this->add(this->algoMode, "algo-mode", this->generate_data(modes)); -#endif - this->add( - this->batchSeq, - "batch-seq", - this->lazy_generate_data( - [=] { return generate_batchSeq(this->batchSize, this->seqLength); }, defaultBS)); - } -}; - -int RunLSTMDriver(std::string cmd) -{ - std::vector ptrs; - boost::split(ptrs, cmd, boost::is_any_of(" \t"), boost::token_compress_on); - ptrs.insert(ptrs.begin(), "test_lstm"); - std::vector char_ptrs; - std::transform(ptrs.begin(), ptrs.end(), std::back_inserter(char_ptrs), [](const auto& str) { - return str.c_str(); - }); - -#if(MIO_RNN_TIME_EVERYTHING > 0) - auto t_start = std::chrono::high_resolution_clock::now(); -#endif - test_drive(char_ptrs.size(), char_ptrs.data()); - -#if(MIO_RNN_TIME_EVERYTHING > 0) - auto t_end = std::chrono::high_resolution_clock::now(); - - std::cout << "Wall clock: RNN test pass time: " - << std::chrono::duration(t_end - t_start).count() << " seconds." << std::endl; -#endif - - auto capture = testing::internal::GetCapturedStderr(); - std::cout << capture; - - exit(0); // NOLINT (concurrency-mt-unsafe) -} - -TEST_P(LSTMTest, test_lstm_deepbench_rnn) -{ - if(miopen::IsEnabled(ENV(MIOPEN_TEST_DEEPBENCH))) - { - // clang-format off - RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - RunLSTMDriver("--verbose --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256 --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"); - // clang-format on - } - else - { - GTEST_SKIP(); - } -} - -TEST_P(LSTMTest, test_lstm_extra) -{ - if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL))) - { - // clang-format off - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-cx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cx --no-dcy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-cx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cx --no-dcy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dhx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-dhx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hy --no-cy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-dcx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-cy --no-dcx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dhx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-dhx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hy --no-cy"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-dcx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-cy --no-dcx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"); - RunLSTMDriver("--verbose --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0 -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"); - // clang-format on - } - else - { - GTEST_SKIP(); - } -} - -std::vector GetTestCases() -{ - std::vector test_cases; - return test_cases; -} - -INSTANTIATE_TEST_SUITE_P(LSTM, LSTMTest, testing::Values(GetTestCases())); - -int main(int argc, char** argv) -{ - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/test/gtest/lstm_extra.cpp b/test/gtest/lstm_extra.cpp new file mode 100644 index 0000000000..b8ff96e739 --- /dev/null +++ b/test/gtest/lstm_extra.cpp @@ -0,0 +1,158 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "lstm.hpp" +#include "get_handle.hpp" +#include +#include +#include + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) + +static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_DEEPBENCH)); } + +void GetArgs(const std::string& param, std::vector& tokens) +{ + std::stringstream ss(param); + std::istream_iterator begin(ss); + std::istream_iterator end; + while(begin != end) + tokens.push_back(*begin++); +} + +class ConfigWithFloat : public testing::TestWithParam> +{ +}; + +void Run2dDriver(miopenDataType_t prec) +{ + + std::vector params; + switch(prec) + { + case miopenFloat: params = ConfigWithFloat::GetParam(); break; + case miopenHalf: + case miopenFloat8: + case miopenBFloat8: + case miopenInt8: + case miopenBFloat16: + case miopenInt32: + case miopenDouble: + FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " + "data type not supported by " + "lstm_extra test"; + + default: params = ConfigWithFloat::GetParam(); + } + + for(const auto& test_value : params) + { + std::vector tokens; + GetArgs(test_value, tokens); + std::vector ptrs; + + std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) { + return str.data(); + }); + + testing::internal::CaptureStderr(); + test_drive(ptrs.size(), ptrs.data()); + auto capture = testing::internal::GetCapturedStderr(); + std::cout << capture; + } +}; + +bool IsTestSupportedForDevice(const miopen::Handle& handle) +{ + std::string devName = handle.GetDeviceName(); + if(devName == "gfx900" || devName == "gfx906" || devName == "gfx908" || devName == "gfx90a" || + miopen::StartsWith(devName, "gfx94") || miopen::StartsWith(devName, "gfx103") || + miopen::StartsWith(devName, "gfx110")) + return true; + else + return false; +} + +TEST_P(ConfigWithFloat, FloatTest) +{ + const auto& handle = get_handle(); + if(IsTestSupportedForDevice(handle) && !SkipTest()) + { + Run2dDriver(miopenFloat); + } + else + { + GTEST_SKIP(); + } +}; + +std::vector GetTestCases(void) +{ + std::string flags = "test_lstm --verbose "; + std::string commonFlags = + "--batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0"; + + const std::vector test_cases = { + // clang-format off + {flags + commonFlags + " -dir-mode 0 --no-hx"}, + {flags + commonFlags + " -dir-mode 0 --no-dhy"}, + {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy"}, + {flags + commonFlags + " -dir-mode 0 --no-cx"}, + {flags + commonFlags + " -dir-mode 0 --no-hx --no-cx"}, + {flags + commonFlags + " -dir-mode 0 --no-dcy"}, + {flags + commonFlags + " -dir-mode 0 --no-cx --no-dcy"}, + {flags + commonFlags + " -dir-mode 1 --no-hx"}, + {flags + commonFlags + " -dir-mode 1 --no-dhy"}, + {flags + commonFlags + " -dir-mode 1 --no-hx --no-dhy"}, + {flags + commonFlags + " -dir-mode 1 --no-cx"}, + {flags + commonFlags + " -dir-mode 1 --no-hx --no-cx"}, + {flags + commonFlags + " -dir-mode 1 --no-dcy"}, + {flags + commonFlags + " -dir-mode 1 --no-cx --no-dcy"}, + {flags + commonFlags + " -dir-mode 0 --no-hy"}, + {flags + commonFlags + " -dir-mode 0 --no-dhx"}, + {flags + commonFlags + " -dir-mode 0 --no-hy --no-dhx"}, + {flags + commonFlags + " -dir-mode 0 --no-cy"}, + {flags + commonFlags + " -dir-mode 0 --no-hy --no-cy"}, + {flags + commonFlags + " -dir-mode 0 --no-dcx"}, + {flags + commonFlags + " -dir-mode 0 --no-cy --no-dcx"}, + {flags + commonFlags + " -dir-mode 1 --no-hy"}, + {flags + commonFlags + " -dir-mode 1 --no-dhx"}, + {flags + commonFlags + " -dir-mode 1 --no-hy --no-dhx"}, + {flags + commonFlags + " -dir-mode 1 --no-cy"}, + {flags + commonFlags + " -dir-mode 1 --no-hy --no-cy"}, + {flags + commonFlags + " -dir-mode 1 --no-dcx"}, + {flags + commonFlags + " -dir-mode 1 --no-cy --no-dcx"}, + {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"}, + {flags + commonFlags + " -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"} + // clang-format on + }; + + return test_cases; +} + +INSTANTIATE_TEST_SUITE_P(ConvTrans, ConfigWithFloat, testing::Values(GetTestCases())); + + From ab8f2c8756027cffd1641341a9b326cce11d4af5 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 4 Dec 2023 19:41:59 +0000 Subject: [PATCH 53/73] update lstm_extra skip condition --- test/gtest/lstm_extra.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/gtest/lstm_extra.cpp b/test/gtest/lstm_extra.cpp index b8ff96e739..8a0d273019 100644 --- a/test/gtest/lstm_extra.cpp +++ b/test/gtest/lstm_extra.cpp @@ -30,9 +30,9 @@ #include #include -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) -static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_DEEPBENCH)); } +static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_ALL)); } void GetArgs(const std::string& param, std::vector& tokens) { From 91e7bdaa8fdb476a4a748021af14150084f87fa6 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 4 Dec 2023 20:21:18 +0000 Subject: [PATCH 54/73] clang format --- test/gtest/deepbench_lstm.cpp | 5 ++--- test/gtest/lstm_extra.cpp | 12 +++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index 1ce4cecea7..b58477e782 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -116,7 +116,7 @@ std::vector GetTestCases(void) " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; const std::vector test_cases = { - // clang-format off + // clang-format off {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, @@ -139,11 +139,10 @@ std::vector GetTestCases(void) {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} - // clang-format on + // clang-format on }; return test_cases; } INSTANTIATE_TEST_SUITE_P(ConvTrans, ConfigWithFloat, testing::Values(GetTestCases())); - diff --git a/test/gtest/lstm_extra.cpp b/test/gtest/lstm_extra.cpp index 8a0d273019..d8d6028010 100644 --- a/test/gtest/lstm_extra.cpp +++ b/test/gtest/lstm_extra.cpp @@ -111,12 +111,12 @@ TEST_P(ConfigWithFloat, FloatTest) std::vector GetTestCases(void) { - std::string flags = "test_lstm --verbose "; - std::string commonFlags = - "--batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 --hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0"; + std::string flags = "test_lstm --verbose "; + std::string commonFlags = "--batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 " + "--hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0"; const std::vector test_cases = { - // clang-format off + // clang-format off {flags + commonFlags + " -dir-mode 0 --no-hx"}, {flags + commonFlags + " -dir-mode 0 --no-dhy"}, {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy"}, @@ -147,12 +147,10 @@ std::vector GetTestCases(void) {flags + commonFlags + " -dir-mode 1 --no-cy --no-dcx"}, {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"}, {flags + commonFlags + " -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"} - // clang-format on + // clang-format on }; return test_cases; } INSTANTIATE_TEST_SUITE_P(ConvTrans, ConfigWithFloat, testing::Values(GetTestCases())); - - From f1b6df2321efb557136a7cf6f084428e3166a5f7 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 11 Dec 2023 09:46:33 -0600 Subject: [PATCH 55/73] rm unused header --- test/gtest/deepbench_lstm.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index b58477e782..a0208b0864 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -28,7 +28,6 @@ #include "get_handle.hpp" #include #include -#include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) From e6f57fd9f02902d38436119a6992be9493d4d799 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 11 Dec 2023 23:31:21 +0000 Subject: [PATCH 56/73] rename lstm tests, add half test, add test flags --- test/gtest/deepbench_lstm.cpp | 111 +++++++++++++++++++------------- test/gtest/lstm_extra.cpp | 116 +++++++++++++++++++--------------- 2 files changed, 132 insertions(+), 95 deletions(-) diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index a0208b0864..83d749f520 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -30,8 +30,10 @@ #include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_FLOAT) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_HALF) -static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_DEEPBENCH)); } +static bool SkipTest(void) { return !miopen::IsEnabled(ENV(MIOPEN_TEST_DEEPBENCH)); } void GetArgs(const std::string& param, std::vector& tokens) { @@ -46,6 +48,10 @@ class ConfigWithFloat : public testing::TestWithParam> { }; +class ConfigWithHalf : public testing::TestWithParam> +{ +}; + void Run2dDriver(miopenDataType_t prec) { @@ -53,15 +59,16 @@ void Run2dDriver(miopenDataType_t prec) switch(prec) { case miopenFloat: params = ConfigWithFloat::GetParam(); break; - case miopenHalf: + case miopenHalf: params = ConfigWithHalf::GetParam(); break; case miopenFloat8: case miopenBFloat8: case miopenInt8: case miopenBFloat16: case miopenInt32: case miopenDouble: - FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " - "data type not supported by " + FAIL() << "miopenInt8, miopenBFloat16, miopenInt32, " + "miopenDouble, miopenFloat8, miopenBFloat8 " + "data types not supported by " "deepbench_lstm test"; default: params = ConfigWithFloat::GetParam(); @@ -84,21 +91,12 @@ void Run2dDriver(miopenDataType_t prec) } }; -bool IsTestSupportedForDevice(const miopen::Handle& handle) -{ - std::string devName = handle.GetDeviceName(); - if(devName == "gfx900" || devName == "gfx906" || devName == "gfx908" || devName == "gfx90a" || - miopen::StartsWith(devName, "gfx94") || miopen::StartsWith(devName, "gfx103") || - miopen::StartsWith(devName, "gfx110")) - return true; - else - return false; -} +bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; } TEST_P(ConfigWithFloat, FloatTest) { const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && !SkipTest()) + if(IsTestSupportedForDevice(handle) && !SkipTest() && miopen::IsEnabled(ENV(MIOPEN_TEST_FLOAT))) { Run2dDriver(miopenFloat); } @@ -108,40 +106,67 @@ TEST_P(ConfigWithFloat, FloatTest) } }; -std::vector GetTestCases(void) +TEST_P(ConfigWithHalf, HalfTest) { - std::string flags = "test_lstm --verbose"; + const auto& handle = get_handle(); + if(IsTestSupportedForDevice(handle) && !SkipTest() && miopen::IsEnabled(ENV(MIOPEN_TEST_HALF))) + { + Run2dDriver(miopenHalf); + } + else + { + GTEST_SKIP(); + } +}; + +std::vector GetTestCases(std::string precision) +{ + std::string flags = "test_lstm --verbose " + precision; std::string commonFlags = " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; const std::vector test_cases = { - // clang-format off - {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} - // clang-format on - }; + {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + + commonFlags}, + {flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + + commonFlags}, + {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + + commonFlags}, + {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + + commonFlags}}; return test_cases; } -INSTANTIATE_TEST_SUITE_P(ConvTrans, ConfigWithFloat, testing::Values(GetTestCases())); +INSTANTIATE_TEST_SUITE_P(DeepbenchLstm, ConfigWithFloat, testing::Values(GetTestCases("--float"))); + +INSTANTIATE_TEST_SUITE_P(DeepbenchLstm, ConfigWithHalf, testing::Values(GetTestCases("--half"))); diff --git a/test/gtest/lstm_extra.cpp b/test/gtest/lstm_extra.cpp index d8d6028010..0e3ca158c3 100644 --- a/test/gtest/lstm_extra.cpp +++ b/test/gtest/lstm_extra.cpp @@ -31,8 +31,10 @@ #include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_FLOAT) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_HALF) -static bool SkipTest(void) { return miopen::IsDisabled(ENV(MIOPEN_TEST_ALL)); } +static bool SkipTest(void) { return !miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)); } void GetArgs(const std::string& param, std::vector& tokens) { @@ -47,6 +49,10 @@ class ConfigWithFloat : public testing::TestWithParam> { }; +class ConfigWithHalf : public testing::TestWithParam> +{ +}; + void Run2dDriver(miopenDataType_t prec) { @@ -54,15 +60,16 @@ void Run2dDriver(miopenDataType_t prec) switch(prec) { case miopenFloat: params = ConfigWithFloat::GetParam(); break; - case miopenHalf: + case miopenHalf: params = ConfigWithHalf::GetParam(); break; case miopenFloat8: case miopenBFloat8: case miopenInt8: case miopenBFloat16: case miopenInt32: case miopenDouble: - FAIL() << "miopenHalf, miopenInt8, miopenBFloat16, miopenInt32, miopenDouble " - "data type not supported by " + FAIL() << "miopenInt8, miopenBFloat16, miopenInt32, " + "miopenDouble, miopenFloat8, miopenBFloat8 " + "data types not supported by " "lstm_extra test"; default: params = ConfigWithFloat::GetParam(); @@ -85,21 +92,12 @@ void Run2dDriver(miopenDataType_t prec) } }; -bool IsTestSupportedForDevice(const miopen::Handle& handle) -{ - std::string devName = handle.GetDeviceName(); - if(devName == "gfx900" || devName == "gfx906" || devName == "gfx908" || devName == "gfx90a" || - miopen::StartsWith(devName, "gfx94") || miopen::StartsWith(devName, "gfx103") || - miopen::StartsWith(devName, "gfx110")) - return true; - else - return false; -} +bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; } TEST_P(ConfigWithFloat, FloatTest) { const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && !SkipTest()) + if(IsTestSupportedForDevice(handle) && !SkipTest() && miopen::IsEnabled(ENV(MIOPEN_TEST_FLOAT))) { Run2dDriver(miopenFloat); } @@ -109,48 +107,62 @@ TEST_P(ConfigWithFloat, FloatTest) } }; -std::vector GetTestCases(void) +TEST_P(ConfigWithHalf, HalfTest) { - std::string flags = "test_lstm --verbose "; - std::string commonFlags = "--batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 " + const auto& handle = get_handle(); + if(IsTestSupportedForDevice(handle) && !SkipTest() && miopen::IsEnabled(ENV(MIOPEN_TEST_HALF))) + { + Run2dDriver(miopenHalf); + } + else + { + GTEST_SKIP(); + } +}; + +std::vector GetTestCases(std::string precision) +{ + std::string flags = "test_lstm --verbose " + precision; + std::string commonFlags = " --batch-size 32 --seq-len 3 --batch-seq 32 32 32 --vector-len 128 " "--hidden-size 128 --num-layers 1 --in-mode 0 --bias-mode 0"; const std::vector test_cases = { - // clang-format off - {flags + commonFlags + " -dir-mode 0 --no-hx"}, - {flags + commonFlags + " -dir-mode 0 --no-dhy"}, - {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy"}, - {flags + commonFlags + " -dir-mode 0 --no-cx"}, - {flags + commonFlags + " -dir-mode 0 --no-hx --no-cx"}, - {flags + commonFlags + " -dir-mode 0 --no-dcy"}, - {flags + commonFlags + " -dir-mode 0 --no-cx --no-dcy"}, - {flags + commonFlags + " -dir-mode 1 --no-hx"}, - {flags + commonFlags + " -dir-mode 1 --no-dhy"}, - {flags + commonFlags + " -dir-mode 1 --no-hx --no-dhy"}, - {flags + commonFlags + " -dir-mode 1 --no-cx"}, - {flags + commonFlags + " -dir-mode 1 --no-hx --no-cx"}, - {flags + commonFlags + " -dir-mode 1 --no-dcy"}, - {flags + commonFlags + " -dir-mode 1 --no-cx --no-dcy"}, - {flags + commonFlags + " -dir-mode 0 --no-hy"}, - {flags + commonFlags + " -dir-mode 0 --no-dhx"}, - {flags + commonFlags + " -dir-mode 0 --no-hy --no-dhx"}, - {flags + commonFlags + " -dir-mode 0 --no-cy"}, - {flags + commonFlags + " -dir-mode 0 --no-hy --no-cy"}, - {flags + commonFlags + " -dir-mode 0 --no-dcx"}, - {flags + commonFlags + " -dir-mode 0 --no-cy --no-dcx"}, - {flags + commonFlags + " -dir-mode 1 --no-hy"}, - {flags + commonFlags + " -dir-mode 1 --no-dhx"}, - {flags + commonFlags + " -dir-mode 1 --no-hy --no-dhx"}, - {flags + commonFlags + " -dir-mode 1 --no-cy"}, - {flags + commonFlags + " -dir-mode 1 --no-hy --no-cy"}, - {flags + commonFlags + " -dir-mode 1 --no-dcx"}, - {flags + commonFlags + " -dir-mode 1 --no-cy --no-dcx"}, - {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"}, - {flags + commonFlags + " -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"} - // clang-format on - }; + {flags + commonFlags + " -dir-mode 0 --no-hx"}, + {flags + commonFlags + " -dir-mode 0 --no-dhy"}, + {flags + commonFlags + " -dir-mode 0 --no-hx --no-dhy"}, + {flags + commonFlags + " -dir-mode 0 --no-cx"}, + {flags + commonFlags + " -dir-mode 0 --no-hx --no-cx"}, + {flags + commonFlags + " -dir-mode 0 --no-dcy"}, + {flags + commonFlags + " -dir-mode 0 --no-cx --no-dcy"}, + {flags + commonFlags + " -dir-mode 1 --no-hx"}, + {flags + commonFlags + " -dir-mode 1 --no-dhy"}, + {flags + commonFlags + " -dir-mode 1 --no-hx --no-dhy"}, + {flags + commonFlags + " -dir-mode 1 --no-cx"}, + {flags + commonFlags + " -dir-mode 1 --no-hx --no-cx"}, + {flags + commonFlags + " -dir-mode 1 --no-dcy"}, + {flags + commonFlags + " -dir-mode 1 --no-cx --no-dcy"}, + {flags + commonFlags + " -dir-mode 0 --no-hy"}, + {flags + commonFlags + " -dir-mode 0 --no-dhx"}, + {flags + commonFlags + " -dir-mode 0 --no-hy --no-dhx"}, + {flags + commonFlags + " -dir-mode 0 --no-cy"}, + {flags + commonFlags + " -dir-mode 0 --no-hy --no-cy"}, + {flags + commonFlags + " -dir-mode 0 --no-dcx"}, + {flags + commonFlags + " -dir-mode 0 --no-cy --no-dcx"}, + {flags + commonFlags + " -dir-mode 1 --no-hy"}, + {flags + commonFlags + " -dir-mode 1 --no-dhx"}, + {flags + commonFlags + " -dir-mode 1 --no-hy --no-dhx"}, + {flags + commonFlags + " -dir-mode 1 --no-cy"}, + {flags + commonFlags + " -dir-mode 1 --no-hy --no-cy"}, + {flags + commonFlags + " -dir-mode 1 --no-dcx"}, + {flags + commonFlags + " -dir-mode 1 --no-cy --no-dcx"}, + {flags + commonFlags + + " -dir-mode 0 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"}, + {flags + commonFlags + + " -dir-mode 1 --no-hx --no-dhy --no-cx --no-dcy --no-hy --no-dhx --no-cy --no-dcx"}}; return test_cases; } -INSTANTIATE_TEST_SUITE_P(ConvTrans, ConfigWithFloat, testing::Values(GetTestCases())); +INSTANTIATE_TEST_SUITE_P(LstmExtra, ConfigWithFloat, testing::Values(GetTestCases("--float"))); + +INSTANTIATE_TEST_SUITE_P(LstmExtra, ConfigWithHalf, testing::Values(GetTestCases("--half"))); From 5c1552c0a69a78812f023985110c22a49cfd1984 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Mon, 11 Dec 2023 23:37:15 +0000 Subject: [PATCH 57/73] restore lstm.cpp main --- test/lstm.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 test/lstm.cpp diff --git a/test/lstm.cpp b/test/lstm.cpp new file mode 100644 index 0000000000..b0af61102e --- /dev/null +++ b/test/lstm.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "lstm.hpp" + +int main(int argc, const char* argv[]) +{ +#if(MIO_RNN_TIME_EVERYTHING > 0) + auto t_start = std::chrono::high_resolution_clock::now(); +#endif + test_drive(argc, argv); + +#if(MIO_RNN_TIME_EVERYTHING > 0) + auto t_end = std::chrono::high_resolution_clock::now(); + + std::cout << "Wall clock: RNN test pass time: " + << std::chrono::duration(t_end - t_start).count() << " seconds." << std::endl; +#endif + exit(0); // NOLINT (concurrency-mt-unsafe) +} From 86c90e1590da84073318a24289c004262310f386 Mon Sep 17 00:00:00 2001 From: Christopher Erb Date: Wed, 13 Dec 2023 16:37:11 -0600 Subject: [PATCH 58/73] cleanup, pass ENV to gtest --- test/gtest/CMakeLists.txt | 2 +- test/gtest/deepbench_lstm.cpp | 61 ++++++++++++++--------------------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/test/gtest/CMakeLists.txt b/test/gtest/CMakeLists.txt index c5d51e2652..94556940bd 100644 --- a/test/gtest/CMakeLists.txt +++ b/test/gtest/CMakeLists.txt @@ -33,7 +33,7 @@ function(add_gtest TEST_NAME) target_link_libraries(test_${TEST_NAME} $) endif() # Enable CMake to discover the test binary - gtest_discover_tests(test_${TEST_NAME} PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL}") + gtest_discover_tests(test_${TEST_NAME} PROPERTIES ENVIRONMENT "MIOPEN_USER_DB_PATH=${CMAKE_CURRENT_BINARY_DIR};MIOPEN_TEST_FLOAT_ARG=${MIOPEN_TEST_FLOAT_ARG};MIOPEN_TEST_ALL=${MIOPEN_TEST_ALL};MIOPEN_TEST_MLIR=${MIOPEN_TEST_MLIR};MIOPEN_TEST_COMPOSABLEKERNEL=${MIOPEN_TEST_COMPOSABLEKERNEL};MIOPEN_TEST_DEEPBENCH=${MIOPEN_TEST_DEEPBENCH}") endif() endfunction() diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index 83d749f520..f873e5a4e7 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -126,43 +126,30 @@ std::vector GetTestCases(std::string precision) " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; const std::vector test_cases = { - {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + - commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + - commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + - commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + - commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + - commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + - commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + - commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + - commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + - commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + - commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + - commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + - commonFlags}, - {flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + - commonFlags}, - {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + - commonFlags}, - {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + - commonFlags}}; + // clang-format off + {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + {flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + {flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} + // clang-format on return test_cases; } From 6147d1e89e1bc153a57e3dc1092b423a97b91316 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 14 Dec 2023 09:13:21 +0000 Subject: [PATCH 59/73] cderb/gtest: fix clang format --- test/gtest/deepbench_lstm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index f873e5a4e7..5e23da3625 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -151,7 +151,7 @@ std::vector GetTestCases(std::string precision) {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} // clang-format on - return test_cases; + return test_cases; } INSTANTIATE_TEST_SUITE_P(DeepbenchLstm, ConfigWithFloat, testing::Values(GetTestCases("--float"))); From 2582815ea0562f78432f427c9cfb0139fcbdbdeb Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Thu, 14 Dec 2023 10:28:54 +0000 Subject: [PATCH 60/73] cderb/gtest : fix compilation error --- test/gtest/deepbench_lstm.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index 5e23da3625..df3acd1c74 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -148,10 +148,10 @@ std::vector GetTestCases(std::string precision) {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} - // clang-format on + {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}}; + // clang-format on - return test_cases; + return test_cases; } INSTANTIATE_TEST_SUITE_P(DeepbenchLstm, ConfigWithFloat, testing::Values(GetTestCases("--float"))); From b7e949cf03f66d4044c2b52bdaed513a5fd9cd48 Mon Sep 17 00:00:00 2001 From: Bibek Ghimire Date: Mon, 18 Dec 2023 10:48:57 +0000 Subject: [PATCH 61/73] cderb/gtest : set environment variable --- src/include/miopen/env.hpp | 8 ++ test/gtest/deepbench_lstm.cpp | 165 ++++++++++++++++++++-------------- test/gtest/gtest_common.hpp | 164 +++++++++++++++++++++++++++++++++ 3 files changed, 269 insertions(+), 68 deletions(-) create mode 100644 test/gtest/gtest_common.hpp diff --git a/src/include/miopen/env.hpp b/src/include/miopen/env.hpp index 188189f5c0..768496c985 100644 --- a/src/include/miopen/env.hpp +++ b/src/include/miopen/env.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -207,6 +208,13 @@ void UpdateEnvVar(EnvVar, const ValueType& val) EnvVar::Ref().UpdateValue(val); } +template +void UpdateEnvVar(EnvVar, const std::string_view& val) +{ + EnvVar::Ref().UpdateValue( + miopen::internal::ParseEnvVal::go(val.data())); +} + } // namespace miopen #endif diff --git a/test/gtest/deepbench_lstm.cpp b/test/gtest/deepbench_lstm.cpp index df3acd1c74..9f8ac4a326 100644 --- a/test/gtest/deepbench_lstm.cpp +++ b/test/gtest/deepbench_lstm.cpp @@ -23,17 +23,39 @@ * SOFTWARE. * *******************************************************************************/ +#include #include "lstm.hpp" #include "get_handle.hpp" #include +#include #include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_DEEPBENCH) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_FLOAT) MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_HALF) -static bool SkipTest(void) { return !miopen::IsEnabled(ENV(MIOPEN_TEST_DEEPBENCH)); } +using EnvType = std::tuple, + std::pair, + std::pair>; + +static bool Skip(miopenDataType_t prec) +{ + bool flag = !miopen::IsEnabled(ENV(MIOPEN_TEST_DEEPBENCH)); + switch(prec) + { + case miopenFloat: return flag && !miopen::IsEnabled(ENV(MIOPEN_TEST_FLOAT)); + case miopenHalf: return flag && !miopen::IsEnabled(ENV(MIOPEN_TEST_HALF)); + case miopenFloat8: + case miopenBFloat8: + case miopenInt8: + case miopenBFloat16: + case miopenInt32: + case miopenDouble: + default: MIOPEN_THROW("Unsupported datatype"); + } + return true; +} void GetArgs(const std::string& param, std::vector& tokens) { @@ -44,18 +66,80 @@ void GetArgs(const std::string& param, std::vector& tokens) tokens.push_back(*begin++); } -class ConfigWithFloat : public testing::TestWithParam> +void SetEnv(EnvType env_vars) +{ + std::apply( + [](const auto&... pairs) { + (..., (miopen::UpdateEnvVar(pairs.first, std::string_view(pairs.second)))); + }, + env_vars); +} + +auto GetTestCases(std::string precision) +{ + const auto env = std::tuple{ + std::pair{ENV(MIOPEN_TEST_DEEPBENCH), std::string_view("ON")}, + std::pair{ENV(MIOPEN_TEST_FLOAT), + precision == "--float" ? std::string_view("ON") : std::string_view("OFF")}, + std::pair{ENV(MIOPEN_TEST_HALF), + precision == "--half" ? std::string_view("ON") : std::string_view("OFF")}}; + + std::string flags = "test_lstm --verbose " + precision; + std::string commonFlags = + " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; + + return std::vector{ + // clang-format off + std::pair{env, flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + std::pair{env, flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + std::pair{env, flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + std::pair{env, flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, + std::pair{env, flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + std::pair{env, flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + std::pair{env, flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + std::pair{env, flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, + std::pair{env, flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + std::pair{env, flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + std::pair{env, flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + std::pair{env, flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, + std::pair{env, flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + std::pair{env, flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + std::pair{env, flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + std::pair{env, flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, + std::pair{env, flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + std::pair{env, flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + std::pair{env, flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, + std::pair{env, flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + std::pair{env, flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, + std::pair{env, flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags} + }; + // clang-format on +} + +using TestCase = decltype(GetTestCases({}))::value_type; + +class ConfigWithFloat : public testing::TestWithParam> { }; -class ConfigWithHalf : public testing::TestWithParam> +class ConfigWithHalf : public testing::TestWithParam> { }; -void Run2dDriver(miopenDataType_t prec) +static bool IsTestSupportedForDevice() { + using e_mask = enabled; + using d_mask = disabled; + return IsTestSupportedForDevice(); +} - std::vector params; +void Run2dDriver(miopenDataType_t prec) +{ + if(!IsTestSupportedForDevice()) + { + GTEST_SKIP(); + } + std::vector> params; switch(prec) { case miopenFloat: params = ConfigWithFloat::GetParam(); break; @@ -77,13 +161,17 @@ void Run2dDriver(miopenDataType_t prec) for(const auto& test_value : params) { std::vector tokens; - GetArgs(test_value, tokens); + GetArgs(test_value.second, tokens); std::vector ptrs; std::transform(tokens.begin(), tokens.end(), std::back_inserter(ptrs), [](const auto& str) { return str.data(); }); - + SetEnv(test_value.first); + if(Skip(prec)) + { + GTEST_SKIP(); + } testing::internal::CaptureStderr(); test_drive(ptrs.size(), ptrs.data()); auto capture = testing::internal::GetCapturedStderr(); @@ -91,68 +179,9 @@ void Run2dDriver(miopenDataType_t prec) } }; -bool IsTestSupportedForDevice(const miopen::Handle& handle) { return true; } +TEST_P(ConfigWithFloat, FloatTest) { Run2dDriver(miopenFloat); }; -TEST_P(ConfigWithFloat, FloatTest) -{ - const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && !SkipTest() && miopen::IsEnabled(ENV(MIOPEN_TEST_FLOAT))) - { - Run2dDriver(miopenFloat); - } - else - { - GTEST_SKIP(); - } -}; - -TEST_P(ConfigWithHalf, HalfTest) -{ - const auto& handle = get_handle(); - if(IsTestSupportedForDevice(handle) && !SkipTest() && miopen::IsEnabled(ENV(MIOPEN_TEST_HALF))) - { - Run2dDriver(miopenHalf); - } - else - { - GTEST_SKIP(); - } -}; - -std::vector GetTestCases(std::string precision) -{ - std::string flags = "test_lstm --verbose " + precision; - std::string commonFlags = - " --num-layers 1 --in-mode 1 --bias-mode 0 -dir-mode 0 --rnn-mode 0 --flat-batch-fill"; - - const std::vector test_cases = { - // clang-format off - {flags + " --batch-size 16 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 512 --hidden-size 512" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 1024 --hidden-size 1024" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 2048 --hidden-size 2048" + commonFlags}, - {flags + " --batch-size 16 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 32 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 64 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 128 --seq-len 25 --vector-len 4096 --hidden-size 4096" + commonFlags}, - {flags + " --batch-size 8 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 16 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 32 --seq-len 50 --vector-len 1536 --hidden-size 1536" + commonFlags}, - {flags + " --batch-size 16 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 32 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}, - {flags + " --batch-size 64 --seq-len 150 --vector-len 256 --hidden-size 256" + commonFlags}}; - // clang-format on - - return test_cases; -} +TEST_P(ConfigWithHalf, HalfTest) { Run2dDriver(miopenHalf); }; INSTANTIATE_TEST_SUITE_P(DeepbenchLstm, ConfigWithFloat, testing::Values(GetTestCases("--float"))); diff --git a/test/gtest/gtest_common.hpp b/test/gtest/gtest_common.hpp new file mode 100644 index 0000000000..2100a2dd1c --- /dev/null +++ b/test/gtest/gtest_common.hpp @@ -0,0 +1,164 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../driver.hpp" + +void default_check(const std::string& err) { std::cout << err; } + +void tuning_check(const std::string& err) +{ + // TEST_TUNING - the test should fail if output contains "Error" or "failed". + EXPECT_FALSE(err.find("Error") != std::string::npos || err.find("failed") != std::string::npos); + default_check(err); +} + +enum class Gpu : int +{ + Default = 0, + gfx900 = 1 << 0, + gfx906 = 1 << 1, + gfx908 = 1 << 2, + gfx90A = 1 << 3, + gfx94X = 1 << 4, + gfx103X = 1 << 5, + gfx110X = 1 << 6 +}; + +template +struct enabled +{ + static constexpr int val = (static_cast(bits) | ...); + static constexpr bool enabling = true; +}; + +template +struct disabled +{ + static constexpr int val = ~((static_cast(bits) | ...)); + static constexpr bool enabling = false; +}; + +template +bool IsTestSupportedForDevice() +{ + static_assert((~disabled_mask::val & enabled_mask::val) == 0, + "Enabled and Disabled GPUs are overlapped"); + static_assert(disabled_mask::enabling == false, + "Wrong disabled mask, probably it has to be switched with enabled_mask"); + static_assert(enabled_mask::enabling == true, + "Wrong enabled mask, probably it has to be switched with disabled_mask"); + + static const auto dev = get_handle().GetDeviceName(); + + constexpr int def_val = enabled::val; + constexpr int mask = (def_val & disabled_mask::val) | enabled_mask::val; + constexpr auto test = [](Gpu bit) { return (mask & static_cast(bit)) != 0; }; + + bool res = false; + if constexpr(test(Gpu::gfx900)) + res = res || (dev == "gfx900"); + if constexpr(test(Gpu::gfx906)) + res = res || (dev == "gfx906"); + if constexpr(test(Gpu::gfx908)) + res = res || (dev == "gfx908"); + if constexpr(test(Gpu::gfx90A)) + res = res || (dev == "gfx90A"); + if constexpr(test(Gpu::gfx94X)) + res = res || (miopen::StartsWith(dev, "gfx94")); + if constexpr(test(Gpu::gfx103X)) + res = res || (miopen::StartsWith(dev, "gfx103")); + if constexpr(test(Gpu::gfx110X)) + res = res || (miopen::StartsWith(dev, "gfx110")); + + return res; +} + +template +std::vector get_args(const Case& param) +{ + const auto& [env_tuple, cmd] = param; + std::apply( + [](const auto&... env) { (miopen::UpdateEnvVar(std::get<0>(env), std::get<1>(env)), ...); }, + env_tuple); + + std::stringstream ss(cmd); + std::istream_iterator begin(ss); + std::istream_iterator end; + + return {begin, end}; +} + +template