From a6164dc68cb9a2911eb2971b4a7c9b750f14385a Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Fri, 26 Oct 2018 15:14:10 -0700 Subject: [PATCH 1/7] add env variable to choose deterministic cudnn alg --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 ++ src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 ++ src/operator/nn/cudnn/cudnn_pooling-inl.h | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index d63d46821edc..72c9a39199e8 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -889,6 +889,7 @@ class CuDNNConvolutionOp { size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), // regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -896,6 +897,7 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index c0c56507bbf3..7a9cefcd5e56 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -829,6 +829,7 @@ class CuDNNDeconvolutionOp { void AlgoFinalSelect(const std::vector &perf_results, std::string kernel_name, size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo regardless of mathType. + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; @@ -836,6 +837,7 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index bc3ee366007c..bb3e9c7e3342 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,7 +48,8 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: - mode_ = CUDNN_POOLING_MAX; + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0) ? + CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) { From 3da0f7de60a1b82123bc50184e95a7440954ffd8 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 13:34:09 -0700 Subject: [PATCH 2/7] set default value to false --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_pooling-inl.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 72c9a39199e8..642bb1c049be 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -889,7 +889,7 @@ class CuDNNConvolutionOp { size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo that matches the algo_preference (-1 = any), // regardless of mathType. - bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 7a9cefcd5e56..2f7d1ac65dab 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -829,7 +829,7 @@ class CuDNNDeconvolutionOp { void AlgoFinalSelect(const std::vector &perf_results, std::string kernel_name, size_t workspace_byte, CuDNNAlgo *algo) { // Determine the fastest acceptable algo regardless of mathType. - bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0); + bool enforce_determinism = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false); for (decltype(perf_results.size()) i = 0; i != perf_results.size(); ++i) { const auto &result = perf_results[i]; bool algo_is_tensor_core = false; diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index bb3e9c7e3342..29192b197ab3 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,7 +48,7 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: - mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", 0) ? + mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: From 65d64d20ef7d51c8aa5899ae716d7ba10b40ce9b Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 15:10:49 -0700 Subject: [PATCH 3/7] fix build failure in Windows GPU --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 642bb1c049be..ab4fefa10d53 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -897,7 +897,7 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 2f7d1ac65dab..9db30ef4971e 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -837,7 +837,7 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; From 7bc1d4d60e6cbd796031dd754d60659da12054ac Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 15:16:55 -0700 Subject: [PATCH 4/7] revert the previous change --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index ab4fefa10d53..642bb1c049be 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -897,7 +897,7 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 9db30ef4971e..2f7d1ac65dab 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -837,7 +837,7 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && - (!enforce_determinism || result.determinism == PerfType::CUDNN_DETERMINISTIC) && + (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; From ca98f5f52f6c8a9d127d4593ff6266eccc26f9e6 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 15:41:46 -0700 Subject: [PATCH 5/7] only check determinism in CUDNN 7.x release --- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 ++ src/operator/nn/cudnn/cudnn_deconvolution-inl.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 642bb1c049be..3bd6c5a3826b 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -897,7 +897,9 @@ class CuDNNConvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 2f7d1ac65dab..72ba2c95fc6a 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -837,7 +837,9 @@ class CuDNNDeconvolutionOp { algo_is_tensor_core = result.mathType == CUDNN_TENSOR_OP_MATH; #endif if (result.status == CUDNN_STATUS_SUCCESS && + #if CUDNN_MAJOR >= 7 (!enforce_determinism || result.determinism == cudnnDeterminism_t::CUDNN_DETERMINISTIC) && + #endif (param_.cudnn_tune.value() != conv::kLimited || result.memory <= workspace_byte)) { algo->Set(result.algo, algo_is_tensor_core); return; From ad3a484b72da930be31c181676127923b48af6eb Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 16:17:14 -0700 Subject: [PATCH 6/7] Add cudnn version check --- src/operator/nn/cudnn/cudnn_pooling-inl.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index 29192b197ab3..5c88bd929ae4 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -48,8 +48,11 @@ class CuDNNPoolingOp { param_ = p; switch (param_.pool_type) { case pool_enum::kMaxPooling: + #if CUDNN_MAJOR >= 7 mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; + #else + mode_ = CUDNN_POOLING_MAX; break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) { From 2a108efc7e829bb7ef6501ed901836f16feb6326 Mon Sep 17 00:00:00 2001 From: Lin Yuan Date: Mon, 29 Oct 2018 16:36:35 -0700 Subject: [PATCH 7/7] fix lint error --- src/operator/nn/cudnn/cudnn_pooling-inl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/operator/nn/cudnn/cudnn_pooling-inl.h b/src/operator/nn/cudnn/cudnn_pooling-inl.h index 5c88bd929ae4..89fa73ef5471 100644 --- a/src/operator/nn/cudnn/cudnn_pooling-inl.h +++ b/src/operator/nn/cudnn/cudnn_pooling-inl.h @@ -52,7 +52,8 @@ class CuDNNPoolingOp { mode_ = dmlc::GetEnv("MXNET_ENFORCE_DETERMINISM", false) ? CUDNN_POOLING_MAX_DETERMINISTIC : CUDNN_POOLING_MAX; #else - mode_ = CUDNN_POOLING_MAX; + mode_ = CUDNN_POOLING_MAX; + #endif break; case pool_enum::kAvgPooling: if (param_.count_include_pad.has_value() && !param_.count_include_pad.value()) {