Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Add support for CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION #12722

Merged
merged 1 commit into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion docs/faq/env_var.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,17 @@ When USE_PROFILER is enabled in Makefile or CMake, the following environments ca
- Performance tests are run to pick the convolution algo when value is 1 or 2
- Value of 1 chooses the best algo in a limited workspace
- Value of 2 chooses the fastest algo whose memory requirements may be larger than the default workspace threshold


* MXNET_CUDA_ALLOW_TENSOR_CORE
- 0(false) or 1(true) ```(default=1)```
- If set to '0', disallows Tensor Core use in CUDA ops.
- If set to '1', allows Tensor Core use in CUDA ops.
- This variable can only be set once in a session.

* MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION
- 0(false) or 1(true) ```(default=0)```
- If set to '0', disallows implicit type conversions to Float16 to use Tensor Cores
- If set to '1', allows CUDA ops like RNN and Convolution to use TensorCores even with Float32 input data by using implicit type casting to Float16. Only has an effect if `MXNET_CUDA_ALLOW_TENSOR_CORE` is `1`.

* MXNET_GLUON_REPO
- Values: String ```(default='https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'```
Expand Down
16 changes: 16 additions & 0 deletions src/common/cuda_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,22 @@ inline bool GetEnvAllowTensorCore() {
return allow_tensor_core;
}

// The policy if the user hasn't set the environment variable
// CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION
#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT false

/*!
* \brief Returns global policy for TensorCore implicit type casting
*/
inline bool GetEnvAllowTensorCoreConversion() {
// Use of optional<bool> here permits: "0", "1", "true" and "false" to all be
// legal.
bool default_value = MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT;
return dmlc::GetEnv("MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION",
dmlc::optional<bool>(default_value))
.value();
}

#if CUDA_VERSION >= 9000
// Sets the cuBLAS math mode that determines the 'allow TensorCore' policy. Returns previous.
inline cublasMath_t SetCublasMathMode(cublasHandle_t blas_handle, cublasMath_t new_math_type) {
Expand Down
5 changes: 5 additions & 0 deletions src/operator/cudnn_rnn-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,11 @@ class CuDNNRNNOp : public Operator{
if (cudnn_tensor_core_ && rnn_algo == CUDNN_RNN_ALGO_STANDARD) {
math_type = CUDNN_TENSOR_OP_MATH;
}
#if CUDNN_VERSION >= 7200
if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() &&
(DataType<DType>::kFlag != kFloat16))
math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION;
#endif
CUDNN_CALL(cudnnSetRNNMatrixMathType(rnn_desc_, math_type));
#endif
// Get temp space sizes
Expand Down
5 changes: 5 additions & 0 deletions src/operator/nn/cudnn/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,11 @@ class CuDNNConvolutionOp {
#if CUDNN_MAJOR >= 7
cudnnMathType_t math_type = cudnn_tensor_core_ ? CUDNN_TENSOR_OP_MATH
: CUDNN_DEFAULT_MATH;
#if CUDNN_VERSION >= 7200
if (GetEnvAllowTensorCore() && GetEnvAllowTensorCoreConversion() &&
(DataType<DType>::kFlag != kFloat16))
math_type = CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION;
#endif
CUDNN_CALL(cudnnSetConvolutionMathType(forward_conv_desc_, math_type));
CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_, math_type));
CUDNN_CALL(cudnnSetConvolutionMathType(back_conv_desc_w_, math_type));
Expand Down