Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove exception throwing from RNG code #815

Merged
merged 5 commits into from
Mar 1, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions src/cunumeric/random/bitgenerator_curand.inl
Original file line number Diff line number Diff line change
Expand Up @@ -1739,18 +1739,12 @@ struct BitGeneratorImplBody {
generate_distribution<uint32_t, negative_binomial_generator<uint32_t>>::generate(
res, cugen, intparams, floatparams, doubleparams);
break;
default: {
randutil_log().fatal() << "unknown Distribution";
assert(false);
}
default: LEGATE_ABORT;
}
}
break;
}
default: {
randutil_log().fatal() << "unknown BitGenerator operation";
assert(false);
}
default: LEGATE_ABORT;
}
}
};
Expand Down
1 change: 1 addition & 0 deletions src/cunumeric/random/bitgenerator_template.inl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ static void bitgenerator_template(TaskContext& context)
doubleparams.insert(doubleparams.end(), _doubleparams.begin(), _doubleparams.end());
break;
}
default: LEGATE_ABORT;
}

std::vector<Store> extra_args;
Expand Down
6 changes: 2 additions & 4 deletions src/cunumeric/random/curand_help.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,9 @@ static inline curandRngType get_curandRngType(cunumeric::BitGeneratorType kind)
case cunumeric::BitGeneratorType::MT19937: return curandRngType::CURAND_RNG_PSEUDO_MT19937;
case cunumeric::BitGeneratorType::PHILOX4_32_10:
return curandRngType::CURAND_RNG_PSEUDO_PHILOX4_32_10;
default: {
randutil_log().fatal() << "unknown parameter";
return curandRngType::CURAND_RNG_TEST;
}
default: LEGATE_ABORT;
}
return curandRngType::CURAND_RNG_TEST;
}

} // namespace cunumeric
2 changes: 1 addition & 1 deletion src/cunumeric/random/rand_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ constexpr decltype(auto) op_dispatch(RandGenCode gen_code, Functor f, Fnargs&&..
return f.template operator()<RandGenCode::NORMAL>(std::forward<Fnargs>(args)...);
case RandGenCode::INTEGER:
return f.template operator()<RandGenCode::INTEGER>(std::forward<Fnargs>(args)...);
default: LEGATE_ABORT;
}
assert(false);
return f.template operator()<RandGenCode::UNIFORM>(std::forward<Fnargs>(args)...);
}

Expand Down
32 changes: 17 additions & 15 deletions src/cunumeric/random/randutil/generator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include "generator.h"

#include <cuda.h>
#include <core/cuda/cuda_help.h>

namespace randutilimpl {
static constexpr int blocksPerMultiProcessor = 2; // TODO: refine => number of blocks per mp
Expand Down Expand Up @@ -71,44 +71,46 @@ struct inner_generator<gen_t, randutilimpl::execlocation::DEVICE> : basegenerato
: seed(seed), generatorID(generatorID), stream(stream)
{
int deviceId;
CUDA_CHECK(::cudaGetDevice(&deviceId));
CU_CHECK(::cuDeviceGetAttribute(
&multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, deviceId));
CHECK_CUDA(::cudaGetDevice(&deviceId));
CHECK_CUDA(
::cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, deviceId));
// get number of generators
ngenerators = blockDimX * multiProcessorCount * blocksPerMultiProcessor;
if (ngenerators == 0) throw(int) CURAND_STATUS_INTERNAL_ERROR;
#ifdef DEBUG_CUNUMERIC
assert(ngenerators > 0);
#endif

// allocate buffer for generators state
int driverVersion, runtimeVersion;
CUDA_CHECK(::cudaDriverGetVersion(&driverVersion));
CUDA_CHECK(::cudaRuntimeGetVersion(&runtimeVersion));
CHECK_CUDA(::cudaDriverGetVersion(&driverVersion));
CHECK_CUDA(::cudaRuntimeGetVersion(&runtimeVersion));
asyncsupported = ((driverVersion >= 10020) && (runtimeVersion >= 10020));
if (asyncsupported) {
#if (__CUDACC_VER_MAJOR__ > 11 || ((__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 2)))
CUDA_CHECK(::cudaMallocAsync(&generators, ngenerators * sizeof(gen_t), stream));
CHECK_CUDA(::cudaMallocAsync(&generators, ngenerators * sizeof(gen_t), stream));
#else
CUDA_CHECK(::cudaMalloc(&generators, ngenerators * sizeof(gen_t)));
CHECK_CUDA(::cudaMalloc(&generators, ngenerators * sizeof(gen_t)));
#endif
} else
CUDA_CHECK(::cudaMalloc(&generators, ngenerators * sizeof(gen_t)));
CHECK_CUDA(::cudaMalloc(&generators, ngenerators * sizeof(gen_t)));

// initialize generators
initgenerators<<<blocksPerMultiProcessor * multiProcessorCount, blockDimX, 0, stream>>>(
generators, seed, generatorID);
CUDA_CHECK(::cudaPeekAtLastError());
CHECK_CUDA(::cudaPeekAtLastError());
}

virtual void destroy() override
{
CUDA_CHECK(::cudaStreamSynchronize(stream));
CHECK_CUDA(::cudaStreamSynchronize(stream));
if (asyncsupported) {
#if (__CUDACC_VER_MAJOR__ > 11 || ((__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 2)))
CUDA_CHECK(::cudaFreeAsync(generators, stream));
CHECK_CUDA(::cudaFreeAsync(generators, stream));
#else
CUDA_CHECK(::cudaFree(generators));
CHECK_CUDA(::cudaFree(generators));
#endif
} else
CUDA_CHECK(::cudaFree(generators));
CHECK_CUDA(::cudaFree(generators));

generators = nullptr;
}
Expand Down
7 changes: 5 additions & 2 deletions src/cunumeric/random/randutil/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cstdint>
#include <cassert>

#include "legate.h"
#include "randutil_curand.h"
#include "randutil_impl.h"

Expand Down Expand Up @@ -104,8 +105,9 @@ curandStatus_t inner_dispatch_sample(basegenerator* gen, func_t func, size_t N,
case CURAND_RNG_PSEUDO_MRG32K3A:
return static_cast<inner_generator<curandStateMRG32k3a_t, location>*>(gen)
->template draw<func_t, out_t>(func, N, out);
default: return CURAND_STATUS_INTERNAL_ERROR;
default: LEGATE_ABORT;
}
return CURAND_STATUS_INTERNAL_ERROR;
}

// template funtion with HOST and DEVICE implementations
Expand Down Expand Up @@ -137,8 +139,9 @@ curandStatus_t dispatch(randutilimpl::basegenerator* gen, func_t func, size_t N,
case randutilimpl::execlocation::DEVICE:
return dispatcher<randutilimpl::execlocation::DEVICE, func_t, out_t>::run(gen, func, N, out);
#endif
default: return CURAND_STATUS_INTERNAL_ERROR;
default: LEGATE_ABORT;
}
return CURAND_STATUS_INTERNAL_ERROR;
}

} // namespace randutilimpl
17 changes: 7 additions & 10 deletions src/cunumeric/random/randutil/generator_create.inl
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,10 @@ curandStatus_t randutilGenerator(randutilGenerator_t* generator,
uint64_t generatorID,
cudaStream_t stream = nullptr)
{
try {
randutilimpl::inner_generator<gen_t, location>* result =
new randutilimpl::inner_generator<gen_t, location>(seed, generatorID, stream);
*generator = (randutilGenerator_t)result;
return CURAND_STATUS_SUCCESS;
} catch (int errorCode) {
return (curandStatus_t)errorCode;
}
randutilimpl::inner_generator<gen_t, location>* result =
new randutilimpl::inner_generator<gen_t, location>(seed, generatorID, stream);
*generator = (randutilGenerator_t)result;
return CURAND_STATUS_SUCCESS;
}

template <randutilimpl::execlocation location>
Expand All @@ -48,6 +44,7 @@ static curandStatus_t inner_randutilCreateGenerator(randutilGenerator_t* generat
case CURAND_RNG_PSEUDO_MRG32K3A:
return randutilGenerator<curandStateMRG32k3a_t, location>(
generator, seed, generatorID, stream);
default: return CURAND_STATUS_TYPE_ERROR;
default: LEGATE_ABORT;
}
}
return CURAND_STATUS_TYPE_ERROR;
}
12 changes: 3 additions & 9 deletions src/cunumeric/random/randutil/generator_host.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,9 @@ extern "C" curandStatus_t randutilCreateGeneratorHost(randutilGenerator_t* gener
extern "C" curandStatus_t randutilDestroyGenerator(randutilGenerator_t generator)
{
randutilimpl::basegenerator* gen = (randutilimpl::basegenerator*)generator;
try {
gen->destroy();
delete gen;

return CURAND_STATUS_SUCCESS;
} catch (int errorCode) {
delete gen;
return (curandStatus_t)errorCode;
}
gen->destroy();
delete gen;
return CURAND_STATUS_SUCCESS;
}

#pragma region integers
Expand Down
2 changes: 1 addition & 1 deletion src/cunumeric/random/randutil/randutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,4 @@ extern "C" curandStatus_t randutilGenerateWaldEx(
extern "C" curandStatus_t randutilGenerateBinomialEx(
randutilGenerator_t generator, uint32_t* outputPtr, size_t n, uint32_t ntrials, double p);
extern "C" curandStatus_t randutilGenerateNegativeBinomialEx(
randutilGenerator_t generator, uint32_t* outputPtr, size_t n, uint32_t ntrials, double p);
randutilGenerator_t generator, uint32_t* outputPtr, size_t n, uint32_t ntrials, double p);
33 changes: 1 addition & 32 deletions src/cunumeric/random/randutil/randutil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,11 @@
#pragma once
#include "randutil.h"

#define CURAND_CHECK_LINE(a, file, line) \
{ \
curandStatus_t __curer = a; \
if (CURAND_STATUS_SUCCESS != __curer) { throw(int) __curer; } \
}
#define CURAND_CHECK(a) CURAND_CHECK_LINE(a, __FILE__, __LINE__)

#define CU_CHECK_LINE(a, file, line) \
{ \
CUresult __cuer = a; \
if (CUDA_SUCCESS != __cuer) { \
if (__cuer == CUDA_ERROR_OUT_OF_MEMORY) \
throw(int) CURAND_STATUS_ALLOCATION_FAILED; \
else \
throw(int) CURAND_STATUS_INTERNAL_ERROR; \
} \
}
#define CU_CHECK(a) CU_CHECK_LINE(a, __FILE__, __LINE__)

#define CUDA_CHECK_LINE(a, file, line) \
{ \
cudaError_t __cuer = a; \
if (cudaSuccess != __cuer) { \
if (__cuer == cudaErrorMemoryAllocation) \
throw(int) CURAND_STATUS_ALLOCATION_FAILED; \
else \
throw(int) CURAND_STATUS_INTERNAL_ERROR; \
} \
}
#define CUDA_CHECK(a) CUDA_CHECK_LINE(a, __FILE__, __LINE__)

namespace randutilimpl {

enum class execlocation : int { DEVICE = 0, HOST = 1 };

template <typename gen_t, execlocation loc>
struct inner_generator;

} // namespace randutilimpl
} // namespace randutilimpl