Skip to content

Commit

Permalink
NVIDIA: releax 'threads' config
Browse files Browse the repository at this point in the history
- with fireice-uk#2443 the possible maximum number of threads per block was
reduced because phase3 used 16 threads per share (is automatically
avoided if thread limit is exeeded)
- warn user if `threads` is to large and adjust to a valid value
  • Loading branch information
psychocrypt authored and pull[bot] committed Jul 21, 2019
1 parent 6062d8a commit 2c6b13a
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 7 deletions.
1 change: 1 addition & 0 deletions xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typedef struct
int device_threads;
int device_bfactor;
int device_bsleep;
int device_maxThreadsPerBlock;
int syncMode;
bool memMode;

Expand Down
27 changes: 21 additions & 6 deletions xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
Original file line number Diff line number Diff line change
Expand Up @@ -895,12 +895,19 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo
roundsPhase3 *= 2;
}

int blockSizePhase3 = block8.x;
int gridSizePhase3 = grid.x;
if(blockSizePhase3 * 2 <= ctx->device_maxThreadsPerBlock)
{
blockSizePhase3 *= 2;
gridSizePhase3 = (blockSizePhase3 + 1) / 2;
}
for(int i = 0; i < roundsPhase3; i++)
{
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ALGO><<<
(grid.x + 1) / 2,
block8.x * 2,
2 * block8.x * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
gridSizePhase3,
blockSizePhase3,
blockSizePhase3 * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
ITERATIONS,
MEM,
ctx->device_blocks * ctx->device_threads,
Expand Down Expand Up @@ -966,12 +973,20 @@ void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce, const xmrstak_
roundsPhase3 *= 2;
}

int blockSizePhase3 = block8.x;
int gridSizePhase3 = grid.x;
if(blockSizePhase3 * 2 <= ctx->device_maxThreadsPerBlock)
{
blockSizePhase3 *= 2;
gridSizePhase3 = (blockSizePhase3 + 1) / 2;
}

for(int i = 0; i < roundsPhase3; i++)
{
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ALGO><<<
(grid.x + 1) / 2,
block8.x * 2 ,
2 * block8.x * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
gridSizePhase3,
blockSizePhase3,
blockSizePhase3 * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
ITERATIONS,
MEM / 4,
ctx->device_blocks * ctx->device_threads,
Expand Down
19 changes: 18 additions & 1 deletion xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -560,12 +560,13 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
ctx->device_mpcount = props.multiProcessorCount;
ctx->device_arch[0] = props.major;
ctx->device_arch[1] = props.minor;
ctx->device_maxThreadsPerBlock = props.maxThreadsPerBlock;

const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];

ctx->name = std::string(props.name);

printf("CUDA [%d.%d/%d.%d] GPU#%d, device architecture %d: \"%s\"... ",
printf("CUDA [%d.%d/%d.%d] GPU#%d, device architecture %d: \"%s\"...\n",
version / 1000, (version % 1000 / 10),
CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10,
ctx->device_id, gpuArch, ctx->device_name);
Expand Down Expand Up @@ -803,6 +804,22 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
}
}
}

if(useCryptonight_gpu)
{
// cryptonight_gpu used 16 threads per share
if(ctx->device_threads * 16 > ctx->device_maxThreadsPerBlock)
{
ctx->device_threads = ctx->device_maxThreadsPerBlock / 16;
printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads);
}
}
else if(ctx->device_threads * 8 > ctx->device_maxThreadsPerBlock)
{
// by default cryptonight CUDA implementations uses 8 threads per thread for some kernel
ctx->device_threads = ctx->device_maxThreadsPerBlock / 8;
printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads);
}
printf("device init succeeded\n");

return 0;
Expand Down

0 comments on commit 2c6b13a

Please sign in to comment.