diff --git a/include/alpaka/acc/AccGpuUniformCudaHipRt.hpp b/include/alpaka/acc/AccGpuUniformCudaHipRt.hpp
index 22f83f53710d..69eb9ae3c80b 100644
--- a/include/alpaka/acc/AccGpuUniformCudaHipRt.hpp
+++ b/include/alpaka/acc/AccGpuUniformCudaHipRt.hpp
@@ -153,41 +153,41 @@ namespace alpaka
                     // Reading only the necessary attributes with cudaDeviceGetAttribute is faster than reading all with cuda
                     // https://devblogs.nvidia.com/cuda-pro-tip-the-fast-way-to-query-device-properties/
                     int multiProcessorCount = {};
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &multiProcessorCount,
                         cudaDevAttrMultiProcessorCount,
                         dev.m_iDevice));
 
                     int maxGridSize[3] = {};
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxGridSize[0],
                         cudaDevAttrMaxGridDimX,
                         dev.m_iDevice));
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxGridSize[1],
                         cudaDevAttrMaxGridDimY,
                         dev.m_iDevice));
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxGridSize[2],
                         cudaDevAttrMaxGridDimZ,
                         dev.m_iDevice));
 
                     int maxBlockDim[3] = {};
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxBlockDim[0],
                         cudaDevAttrMaxBlockDimX,
                         dev.m_iDevice));
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxBlockDim[1],
                         cudaDevAttrMaxBlockDimY,
                         dev.m_iDevice));
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxBlockDim[2],
                         cudaDevAttrMaxBlockDimZ,
                         dev.m_iDevice));
 
                     int maxThreadsPerBlock = {};
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceGetAttribute(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaDeviceGetAttribute(
                         &maxThreadsPerBlock,
                         cudaDevAttrMaxThreadsPerBlock,
                         dev.m_iDevice));
@@ -219,7 +219,7 @@ namespace alpaka
 
 #else
                     hipDeviceProp_t hipDevProp;
-                    ALPAKA_HIP_RT_CHECK(hipGetDeviceProperties(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipGetDeviceProperties(
                         &hipDevProp,
                         dev.m_iDevice));
 
diff --git a/include/alpaka/core/Cuda.hpp b/include/alpaka/core/Cuda.hpp
index 2a559b1cfb5a..098debafd175 100644
--- a/include/alpaka/core/Cuda.hpp
+++ b/include/alpaka/core/Cuda.hpp
@@ -34,10 +34,8 @@
 #include <cuda_runtime.h>
 #include <cuda.h>
 
-#include <array>
 #include <type_traits>
 #include <utility>
-#include <iostream>
 #include <string>
 #include <stdexcept>
 #include <cstddef>
@@ -50,97 +48,10 @@
     #error "CUDA version 9.0 or greater required!"
 #endif
 
-namespace alpaka
-{
-    namespace cuda
-    {
-        namespace detail
-        {
-            //-----------------------------------------------------------------------------
-            //! CUDA runtime API error checking with log and exception, ignoring specific error values
-            ALPAKA_FN_HOST inline auto cudaRtCheck(
-                cudaError_t const & error,
-                char const * desc,
-                char const * file,
-                int const & line)
-            -> void
-            {
-                if(error != cudaSuccess)
-                {
-                    std::string const sError(std::string(file) + "(" + std::to_string(line) + ") " + std::string(desc) + " : '" + cudaGetErrorName(error) +  "': '" + std::string(cudaGetErrorString(error)) + "'!");
-#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
-                    std::cerr << sError << std::endl;
-#endif
-                    ALPAKA_DEBUG_BREAK;
-                    // reset the last error to allow user side error handling
-                    cudaGetLastError();
-                    throw std::runtime_error(sError);
-                }
-            }
-            //-----------------------------------------------------------------------------
-            //! CUDA runtime API error checking with log and exception, ignoring specific error values
-            // NOTE: All ignored errors have to be convertible to cudaError_t.
-            template<
-                typename... TErrors>
-            ALPAKA_FN_HOST auto cudaRtCheckIgnore(
-                cudaError_t const & error,
-                char const * cmd,
-                char const * file,
-                int const & line,
-                TErrors && ... ignoredErrorCodes)
-            -> void
-            {
-                if(error != cudaSuccess)
-                {
-                    std::array<cudaError_t, sizeof...(ignoredErrorCodes)> const aIgnoredErrorCodes{ignoredErrorCodes...};
-
-                    // If the error code is not one of the ignored ones.
-                    if(std::find(aIgnoredErrorCodes.cbegin(), aIgnoredErrorCodes.cend(), error) == aIgnoredErrorCodes.cend())
-                    {
-                        cudaRtCheck(error, ("'" + std::string(cmd) + "' returned error ").c_str(), file, line);
-                    }
-                }
-            }
-            //-----------------------------------------------------------------------------
-            //! CUDA runtime API last error checking with log and exception.
-            ALPAKA_FN_HOST inline auto cudaRtCheckLastError(
-                char const * desc,
-                char const * file,
-                int const & line)
-            -> void
-            {
-                cudaError_t const error(cudaGetLastError());
-                cudaRtCheck(error, desc, file, line);
-            }
-        }
-    }
-}
-
-#if BOOST_COMP_MSVC
-    //-----------------------------------------------------------------------------
-    //! CUDA runtime error checking with log and exception, ignoring specific error values
-    #define ALPAKA_CUDA_RT_CHECK_IGNORE(cmd, ...)\
-        ::alpaka::cuda::detail::cudaRtCheckLastError("'" #cmd "' A previous CUDA call (not this one) set the error ", __FILE__, __LINE__);\
-        ::alpaka::cuda::detail::cudaRtCheckIgnore(cmd, #cmd, __FILE__, __LINE__, __VA_ARGS__)
-#else
-    #if BOOST_COMP_CLANG
-        #pragma clang diagnostic push
-        #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
-    #endif
-    //-----------------------------------------------------------------------------
-    //! CUDA runtime error checking with log and exception, ignoring specific error values
-    #define ALPAKA_CUDA_RT_CHECK_IGNORE(cmd, ...)\
-        ::alpaka::cuda::detail::cudaRtCheckLastError("'" #cmd "' A previous CUDA call (not this one) set the error ", __FILE__, __LINE__);\
-        ::alpaka::cuda::detail::cudaRtCheckIgnore(cmd, #cmd, __FILE__, __LINE__, ##__VA_ARGS__)
-    #if BOOST_COMP_CLANG
-        #pragma clang diagnostic pop
-    #endif
-#endif
-
-//-----------------------------------------------------------------------------
-//! CUDA runtime error checking with log and exception.
-#define ALPAKA_CUDA_RT_CHECK(cmd)\
-    ALPAKA_CUDA_RT_CHECK_IGNORE(cmd)
+#define ALPAKA_PP_CONCAT_DO(X,Y) X##Y
+#define ALPAKA_PP_CONCAT(X,Y) ALPAKA_PP_CONCAT_DO(X,Y)
+//! prefix a name with `cuda`
+#define ALPAKA_API_PREFIX(name) ALPAKA_PP_CONCAT_DO(cuda,name)
 
 namespace alpaka
 {
@@ -747,4 +658,6 @@ namespace alpaka
     }
 }
 
+#include <alpaka/core/UniformCudaHip.hpp>
+
 #endif
diff --git a/include/alpaka/core/Hip.hpp b/include/alpaka/core/Hip.hpp
index df6ada481efe..79d264430995 100644
--- a/include/alpaka/core/Hip.hpp
+++ b/include/alpaka/core/Hip.hpp
@@ -26,107 +26,18 @@
 
 #include <hip/hip_runtime.h>
 
-#include <array>
 #include <type_traits>
 #include <utility>
-#include <iostream>
-#include <string>
-#include <stdexcept>
 #include <cstddef>
 
 #ifdef __HIP_PLATFORM_HCC__
   #define HIPRT_CB
 #endif
 
-
-namespace alpaka
-{
-    namespace hip
-    {
-        namespace detail
-        {
-            //-----------------------------------------------------------------------------
-            //! HIP runtime API error checking with log and exception, ignoring specific error values
-            ALPAKA_FN_HOST inline auto hipRtCheck(
-                hipError_t const & error,
-                char const * desc,
-                char const * file,
-                int const & line)
-            -> void
-            {
-                if(error != hipSuccess)
-                {
-                    std::string const sError(std::string(file) + "(" + std::to_string(line) + ") " + std::string(desc) + " : '" + hipGetErrorName(error) +  "': '" + std::string(hipGetErrorString(error)) + "'!");
-#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
-                    std::cerr << sError << std::endl;
-#endif
-                    ALPAKA_DEBUG_BREAK;
-                    throw std::runtime_error(sError);
-                }
-            }
-            //-----------------------------------------------------------------------------
-            //! HIP runtime API error checking with log and exception, ignoring specific error values
-            // NOTE: All ignored errors have to be convertible to hipError_t.
-            template<
-                typename... TErrors>
-            ALPAKA_FN_HOST auto hipRtCheckIgnore(
-                hipError_t const & error,
-                char const * cmd,
-                char const * file,
-                int const & line,
-                TErrors && ... ignoredErrorCodes)
-            -> void
-            {
-                if(error != hipSuccess)
-                {
-                    std::array<hipError_t, sizeof...(ignoredErrorCodes)> const aIgnoredErrorCodes{ignoredErrorCodes...};
-                    // If the error code is not one of the ignored ones.
-                    if(std::find(aIgnoredErrorCodes.cbegin(), aIgnoredErrorCodes.cend(), error) == aIgnoredErrorCodes.cend())
-                    {
-                        hipRtCheck(error, ("'" + std::string(cmd) + "' returned error ").c_str(), file, line);
-                    }
-                }
-            }
-            //-----------------------------------------------------------------------------
-            //! HIP runtime API last error checking with log and exception.
-            ALPAKA_FN_HOST inline auto hipRtCheckLastError(
-                char const * desc,
-                char const * file,
-                int const & line)
-            -> void
-            {
-                hipError_t const error(hipGetLastError());
-                hipRtCheck(error, desc, file, line);
-            }
-        }
-    }
-}
-
-#if BOOST_COMP_MSVC
-    //-----------------------------------------------------------------------------
-    //! HIP runtime error checking with log and exception, ignoring specific error values
-    #define ALPAKA_HIP_RT_CHECK_IGNORE(cmd, ...)\
-        ::alpaka::hip::detail::hipRtCheckLastError("'" #cmd "' A previous HIP call (not this one) set the error ", __FILE__, __LINE__);\
-        ::alpaka::hip::detail::hipRtCheckIgnore(cmd, #cmd, __FILE__, __LINE__, __VA_ARGS__)
-#else
-    #if BOOST_COMP_CLANG
-        #pragma clang diagnostic push
-        #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
-    #endif
-    //-----------------------------------------------------------------------------
-    //! HIP runtime error checking with log and exception, ignoring specific error values
-    #define ALPAKA_HIP_RT_CHECK_IGNORE(cmd, ...)\
-        ::alpaka::hip::detail::hipRtCheckLastError("'" #cmd "' A previous HIP call (not this one) set the error ", __FILE__, __LINE__);\
-        ::alpaka::hip::detail::hipRtCheckIgnore(cmd, #cmd, __FILE__, __LINE__, ##__VA_ARGS__)
-    #if BOOST_COMP_CLANG
-        #pragma clang diagnostic pop
-    #endif
-#endif
-
-//-----------------------------------------------------------------------------
-//! HIP runtime error checking with log and exception.
-#define ALPAKA_HIP_RT_CHECK(cmd)\
-    ALPAKA_HIP_RT_CHECK_IGNORE(cmd)
+#define ALPAKA_PP_CONCAT_DO(X,Y) X##Y
+#define ALPAKA_PP_CONCAT(X,Y) ALPAKA_PP_CONCAT_DO(X,Y)
+//! prefix a name with `hip`
+#define ALPAKA_API_PREFIX(name) ALPAKA_PP_CONCAT_DO(hip,name)
 
 //-----------------------------------------------------------------------------
 // HIP vector_types.h trait specializations.
@@ -704,4 +615,6 @@ namespace alpaka
     }
 }
 
+#include <alpaka/core/UniformCudaHip.hpp>
+
 #endif
diff --git a/include/alpaka/core/UniformCudaHip.hpp b/include/alpaka/core/UniformCudaHip.hpp
new file mode 100644
index 000000000000..44ad40de4d96
--- /dev/null
+++ b/include/alpaka/core/UniformCudaHip.hpp
@@ -0,0 +1,136 @@
+/* Copyright 2019 Axel Huebl, Benjamin Worpitz, Matthias Werner, René Widera
+ *
+ * This file is part of Alpaka.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED)
+
+#include <alpaka/core/BoostPredef.hpp>
+
+
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA
+    #error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA!
+#endif
+
+#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP
+    #error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP!
+#endif
+
+// Backend specific includes.
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
+    #include <alpaka/core/Cuda.hpp>
+#else
+    #include <alpaka/core/Hip.hpp>
+#endif
+
+#include <string>
+#include <array>
+#include <type_traits>
+#include <string>
+#include <stdexcept>
+
+namespace alpaka
+{
+    namespace uniform_cuda_hip
+    {
+        namespace detail
+        {
+
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
+            using Error_t = cudaError;
+#else
+            using Error_t = hipError_t;
+#endif
+            //-----------------------------------------------------------------------------
+            //! CUDA/HIP runtime API error checking with log and exception, ignoring specific error values
+            ALPAKA_FN_HOST inline auto rtCheck(
+                Error_t const & error,
+                char const * desc,
+                char const * file,
+                int const & line)
+            -> void
+            {
+                if(error != ALPAKA_API_PREFIX(Success))
+                {
+                    std::string const sError(std::string(file) + "(" + std::to_string(line) + ") " + std::string(desc) + " : '" + ALPAKA_API_PREFIX(GetErrorName)(error) +  "': '" + std::string(ALPAKA_API_PREFIX(GetErrorString)(error)) + "'!");
+#if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
+                    std::cerr << sError << std::endl;
+#endif
+                    ALPAKA_DEBUG_BREAK;
+                    // reset the last error to allow user side error handling
+                    ALPAKA_API_PREFIX(GetLastError)();
+                    throw std::runtime_error(sError);
+                }
+            }
+            //-----------------------------------------------------------------------------
+            //! CUDA/Hip runtime API error checking with log and exception, ignoring specific error values
+            // NOTE: All ignored errors have to be convertible to Error_t.
+            template<
+                typename... TErrors>
+            ALPAKA_FN_HOST auto rtCheckIgnore(
+                Error_t const & error,
+                char const * cmd,
+                char const * file,
+                int const & line,
+                TErrors && ... ignoredErrorCodes)
+            -> void
+            {
+                if(error != ALPAKA_API_PREFIX(Success))
+                {
+                    std::array<Error_t, sizeof...(ignoredErrorCodes)> const aIgnoredErrorCodes{ignoredErrorCodes...};
+
+                    // If the error code is not one of the ignored ones.
+                    if(std::find(aIgnoredErrorCodes.cbegin(), aIgnoredErrorCodes.cend(), error) == aIgnoredErrorCodes.cend())
+                    {
+                        rtCheck(error, ("'" + std::string(cmd) + "' returned error ").c_str(), file, line);
+                    }
+                }
+            }
+            //-----------------------------------------------------------------------------
+            //! CUDA runtime API last error checking with log and exception.
+            ALPAKA_FN_HOST inline auto rtCheckLastError(
+                char const * desc,
+                char const * file,
+                int const & line)
+            -> void
+            {
+                Error_t const error(ALPAKA_API_PREFIX(GetLastError)());
+                rtCheck(error, desc, file, line);
+            }
+        }
+    }
+}
+
+#if BOOST_COMP_MSVC
+    //-----------------------------------------------------------------------------
+    //! CUDA runtime error checking with log and exception, ignoring specific error values
+    #define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(cmd, ...)\
+        ::alpaka::uniform_cuda_hip::detail::rtCheckLastError("'" #cmd "' A previous API call (not this one) set the error ", __FILE__, __LINE__);\
+        ::alpaka::uniform_cuda_hip::detail::rtCheckIgnore(cmd, #cmd, __FILE__, __LINE__, __VA_ARGS__)
+#else
+    #if BOOST_COMP_CLANG
+        #pragma clang diagnostic push
+        #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
+    #endif
+    //-----------------------------------------------------------------------------
+    //! CUDA runtime error checking with log and exception, ignoring specific error values
+    #define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(cmd, ...)\
+        ::alpaka::uniform_cuda_hip::detail::rtCheckLastError("'" #cmd "' A previous API call (not this one) set the error ", __FILE__, __LINE__);\
+        ::alpaka::uniform_cuda_hip::detail::rtCheckIgnore(cmd, #cmd, __FILE__, __LINE__, ##__VA_ARGS__)
+    #if BOOST_COMP_CLANG
+        #pragma clang diagnostic pop
+    #endif
+#endif
+
+//-----------------------------------------------------------------------------
+//! CUDA runtime error checking with log and exception.
+#define ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cmd)\
+    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(cmd)
+
+#endif
diff --git a/include/alpaka/dev/DevUniformCudaHipRt.hpp b/include/alpaka/dev/DevUniformCudaHipRt.hpp
index 226f32c3070b..e6b69ac2f569 100644
--- a/include/alpaka/dev/DevUniformCudaHipRt.hpp
+++ b/include/alpaka/dev/DevUniformCudaHipRt.hpp
@@ -116,24 +116,18 @@ namespace alpaka
                     dev::DevUniformCudaHipRt const & dev)
                 -> std::string
                 {
+                    // There is cuda/hip-DeviceGetAttribute as faster alternative to cuda/hip-GetDeviceProperties to get a single device property but it has no option to get the name
 #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                    // There is cudaDeviceGetAttribute as faster alternative to cudaGetDeviceProperties to get a single device property but it has no option to get the name
-                    cudaDeviceProp cudaDevProp;
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaGetDeviceProperties(
-                            &cudaDevProp,
-                            dev.m_iDevice));
-
-                    return std::string(cudaDevProp.name);
+                    cudaDeviceProp devProp;
 #else
-                    hipDeviceProp_t hipDevProp;
-                    ALPAKA_HIP_RT_CHECK(
-                        hipGetDeviceProperties(
-                            &hipDevProp,
+                    hipDeviceProp_t devProp;
+#endif
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(GetDeviceProperties)(
+                            &devProp,
                             dev.m_iDevice));
 
-                    return std::string(hipDevProp.name);
-#endif
+                    return std::string(devProp.name);
                 }
             };
 
@@ -148,37 +142,20 @@ namespace alpaka
                     dev::DevUniformCudaHipRt const & dev)
                 -> std::size_t
                 {
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
                     // Set the current device to wait for.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            dev.m_iDevice));
-
-                    std::size_t freeInternal(0u);
-                    std::size_t totalInternal(0u);
-
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemGetInfo(
-                            &freeInternal,
-                            &totalInternal));
-
-                    return totalInternal;
-#else
-                  // Set the current device to wait for.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             dev.m_iDevice));
 
                     std::size_t freeInternal(0u);
                     std::size_t totalInternal(0u);
 
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemGetInfo(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(MemGetInfo)(
                             &freeInternal,
                             &totalInternal));
 
                     return totalInternal;
-#endif
                 }
             };
 
@@ -193,37 +170,20 @@ namespace alpaka
                     dev::DevUniformCudaHipRt const & dev)
                 -> std::size_t
                 {
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                    // Set the current device to wait for.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            dev.m_iDevice));
-
-                    std::size_t freeInternal(0u);
-                    std::size_t totalInternal(0u);
-
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemGetInfo(
-                            &freeInternal,
-                            &totalInternal));
-
-                    return freeInternal;
-#else
                     // Set the current device to wait for.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             dev.m_iDevice));
 
                     std::size_t freeInternal(0u);
                     std::size_t totalInternal(0u);
 
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemGetInfo(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(MemGetInfo)(
                             &freeInternal,
                             &totalInternal));
 
                     return freeInternal;
-#endif
                 }
             };
 
@@ -240,21 +200,12 @@ namespace alpaka
                 {
                     ALPAKA_DEBUG_FULL_LOG_SCOPE;
 
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
                     // Set the current device to wait for.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             dev.m_iDevice));
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaDeviceReset());
-#else
-                    // Set the current device to wait for.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            dev.m_iDevice));
-                    ALPAKA_HIP_RT_CHECK(
-                        hipDeviceReset());
-#endif
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(DeviceReset)());
                 }
             };
         }
@@ -321,17 +272,11 @@ namespace alpaka
                 -> void
                 {
                     ALPAKA_DEBUG_FULL_LOG_SCOPE;
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                    // Set the current device to wait for.
-                    ALPAKA_CUDA_RT_CHECK(cudaSetDevice(
-                        dev.m_iDevice));
-                    ALPAKA_CUDA_RT_CHECK(cudaDeviceSynchronize());
-#else
+
                     // Set the current device to wait for.
-                    ALPAKA_HIP_RT_CHECK(hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(SetDevice)(
                         dev.m_iDevice));
-                    ALPAKA_HIP_RT_CHECK(hipDeviceSynchronize());
-#endif
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(DeviceSynchronize)());
                 }
             };
         }
diff --git a/include/alpaka/event/EventUniformCudaHipRt.hpp b/include/alpaka/event/EventUniformCudaHipRt.hpp
index 5c9107fe1d32..336b77a9ed61 100644
--- a/include/alpaka/event/EventUniformCudaHipRt.hpp
+++ b/include/alpaka/event/EventUniformCudaHipRt.hpp
@@ -62,31 +62,21 @@ namespace alpaka
                     {
                         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
-                                m_dev.m_iDevice));
-                        // Create the event on the current device with the specified flags. Valid flags include:
-                        // - cudaEventDefault: Default event creation flag.
-                        // - cudaEventBlockingSync : Specifies that event should use blocking synchronization.
-                        //   A host thread that uses cudaEventSynchronize() to wait on an event created with this flag will block until the event actually completes.
-                        // - cudaEventDisableTiming : Specifies that the created event does not need to record timing data.
-                        //   Events created with this flag specified and the cudaEventBlockingSync flag not specified will provide the best performance when used with cudaStreamWaitEvent() and cudaEventQuery().
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaEventCreateWithFlags(
-                                &m_UniformCudaHipEvent,
-                                (bBusyWait ? cudaEventDefault : cudaEventBlockingSync) | cudaEventDisableTiming));
-#else
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 m_dev.m_iDevice));
 
-                        ALPAKA_HIP_RT_CHECK(
-                            hipEventCreateWithFlags(
+                        // Create the event on the current device with the specified flags. Valid flags include:
+                        // - cuda/hip-EventDefault: Default event creation flag.
+                        // - cuda/hip-EventBlockingSync : Specifies that event should use blocking synchronization.
+                        //   A host thread that uses cuda/hip-EventSynchronize() to wait on an event created with this flag will block until the event actually completes.
+                        // - cuda/hip-EventDisableTiming : Specifies that the created event does not need to record timing data.
+                        //   Events created with this flag specified and the cuda/hip-EventBlockingSync flag not specified will provide the best performance when used with cudaStreamWaitEvent() and cudaEventQuery().
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(EventCreateWithFlags)(
                                 &m_UniformCudaHipEvent,
-                                (bBusyWait ? hipEventDefault : hipEventBlockingSync) | hipEventDisableTiming));
-#endif
+                                (bBusyWait ? ALPAKA_API_PREFIX(EventDefault) : ALPAKA_API_PREFIX(EventBlockingSync)) | ALPAKA_API_PREFIX(EventDisableTiming)));
                     }
                     //-----------------------------------------------------------------------------
                     EventUniformCudaHipImpl(EventUniformCudaHipImpl const &) = delete;
@@ -101,30 +91,21 @@ namespace alpaka
                     {
                         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-                        // Set the current device. \TODO: Is setting the current device before cudaEventDestroy required?
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        ALPAKA_CUDA_RT_CHECK(cudaSetDevice(
+                        // Set the current device. \TODO: Is setting the current device before cuda/hip-EventDestroy required?
+
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(SetDevice)(
                             m_dev.m_iDevice));
-                        // In case event has been recorded but has not yet been completed when cudaEventDestroy() is called, the function will return immediately
+                        // In case event has been recorded but has not yet been completed when cuda/hip-EventDestroy() is called, the function will return immediately
                         // and the resources associated with event will be released automatically once the device has completed event.
                         // -> No need to synchronize here.
-                        ALPAKA_CUDA_RT_CHECK(cudaEventDestroy(
-                            m_UniformCudaHipEvent));
-#else
-                        ALPAKA_HIP_RT_CHECK(hipSetDevice(
-                            m_dev.m_iDevice));
-                        ALPAKA_HIP_RT_CHECK(hipEventDestroy(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(EventDestroy)(
                             m_UniformCudaHipEvent));
-#endif
                     }
 
                 public:
                     dev::DevUniformCudaHipRt const m_dev;   //!< The device this event is bound to.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaEvent_t m_UniformCudaHipEvent;
-#else
-                    hipEvent_t m_UniformCudaHipEvent;
-#endif
+
+                    ALPAKA_API_PREFIX(Event_t) m_UniformCudaHipEvent;
                 };
             }
         }
@@ -208,21 +189,12 @@ namespace alpaka
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                     // Query is allowed even for events on non current device.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaError_t ret = cudaSuccess;
-                    ALPAKA_CUDA_RT_CHECK_IGNORE(
-                        ret = cudaEventQuery(
+                    ALPAKA_API_PREFIX(Error_t) ret = ALPAKA_API_PREFIX(Success);
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(
+                        ret = ALPAKA_API_PREFIX(EventQuery)(
                             event.m_spEventImpl->m_UniformCudaHipEvent),
-                        cudaErrorNotReady);
-                    return (ret == cudaSuccess);
-#else
-                    hipError_t ret = hipSuccess;
-                    ALPAKA_HIP_RT_CHECK_IGNORE(
-                        ret = hipEventQuery(
-                            event.m_spEventImpl->m_UniformCudaHipEvent),
-                        hipErrorNotReady);
-                    return (ret == hipSuccess);
-#endif
+                        ALPAKA_API_PREFIX(ErrorNotReady));
+                    return (ret == ALPAKA_API_PREFIX(Success));
                 }
             };
         }
@@ -245,15 +217,10 @@ namespace alpaka
                 -> void
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaEventRecord(
+
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(EventRecord)(
                         event.m_spEventImpl->m_UniformCudaHipEvent,
                         queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipEventRecord(
-                         event.m_spEventImpl->m_UniformCudaHipEvent,
-                         queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
             //#############################################################################
@@ -270,15 +237,10 @@ namespace alpaka
                 -> void
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaEventRecord(
-                        event.m_spEventImpl->m_UniformCudaHipEvent,
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipEventRecord(
+
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(EventRecord)(
                         event.m_spEventImpl->m_UniformCudaHipEvent,
                         queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
         }
@@ -304,13 +266,8 @@ namespace alpaka
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                     // Sync is allowed even for events on non current device.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaEventSynchronize(
-                        event.m_spEventImpl->m_UniformCudaHipEvent));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipEventSynchronize(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(EventSynchronize)(
                         event.m_spEventImpl->m_UniformCudaHipEvent));
-#endif
                 }
             };
             //#############################################################################
@@ -328,17 +285,10 @@ namespace alpaka
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaStreamWaitEvent(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(StreamWaitEvent)(
                         queue.m_spQueueImpl->m_UniformCudaHipQueue,
                         event.m_spEventImpl->m_UniformCudaHipEvent,
                         0));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipStreamWaitEvent(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue,
-                        event.m_spEventImpl->m_UniformCudaHipEvent,
-                        0));
-#endif
                 }
             };
             //#############################################################################
@@ -356,17 +306,10 @@ namespace alpaka
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaStreamWaitEvent(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(StreamWaitEvent)(
                         queue.m_spQueueImpl->m_UniformCudaHipQueue,
                         event.m_spEventImpl->m_UniformCudaHipEvent,
                         0));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipStreamWaitEvent(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue,
-                        event.m_spEventImpl->m_UniformCudaHipEvent,
-                        0));
-#endif
                 }
             };
             //#############################################################################
@@ -387,28 +330,14 @@ namespace alpaka
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                     // Set the current device.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             dev.m_iDevice));
-#else
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            dev.m_iDevice));
-#endif
 
-
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaStreamWaitEvent(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(StreamWaitEvent)(
                         nullptr,
                         event.m_spEventImpl->m_UniformCudaHipEvent,
                         0));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipStreamWaitEvent(
-                        nullptr,
-                        event.m_spEventImpl->m_UniformCudaHipEvent,
-                        0));
-#endif
                 }
             };
         }
diff --git a/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp b/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp
index bbaadf830c91..b347c51239f5 100644
--- a/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp
+++ b/include/alpaka/kernel/TaskKernelGpuUniformCudaHipRt.hpp
@@ -379,16 +379,10 @@ namespace alpaka
     #endif
 #endif
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            queue.m_spQueueImpl->m_dev.m_iDevice));
-#else
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             queue.m_spQueueImpl->m_dev.m_iDevice));
-#endif
                     // Enqueue the kernel execution.
                     // \NOTE: No const reference (const &) is allowed as the parameter type because the kernel launch language extension expects the arguments by value.
                     // This forces the type of a float argument given with std::forward to this function to be of type float instead of e.g. "float const & __ptr64" (MSVC).
@@ -421,19 +415,12 @@ namespace alpaka
                         task.m_args);
 
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
-    #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Wait for the kernel execution to finish but do not check error return of this call.
-                    // Do not use the alpaka::wait method because it checks the error itself but we want to give a custom error message.
-                    cudaStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue);
-                    std::string const msg("'execution of kernel: '" + std::string(typeid(TKernelFnObj).name()) + "' failed with");
-                    ::alpaka::cuda::detail::cudaRtCheckLastError(msg.c_str(), __FILE__, __LINE__);
-    #else
-                    hipStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue);
-                    std::string const msg("'execution of kernel: '" + std::string(typeid(TKernelFnObj).name()) + "' failed with");
-                    ::alpaka::hip::detail::hipRtCheckLastError(msg.c_str(), __FILE__, __LINE__);
-    #endif
+                // Wait for the kernel execution to finish but do not check error return of this call.
+                // Do not use the alpaka::wait method because it checks the error itself but we want to give a custom error message.
+                ALPAKA_API_PREFIX(StreamSynchronize)(
+                    queue.m_spQueueImpl->m_UniformCudaHipQueue);
+                std::string const msg("'execution of kernel: '" + std::string(typeid(TKernelFnObj).name()) + "' failed with");
+                ::alpaka::uniform_cuda_hip::detail::rtCheckLastError(msg.c_str(), __FILE__, __LINE__);
 #endif
                 }
             };
@@ -520,25 +507,13 @@ namespace alpaka
 
                     auto kernelName = kernel::uniform_cuda_hip::detail::uniformCudaHipKernel<TAcc, TDim, TIdx, TKernelFnObj, std::decay_t<TArgs>...>;
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
-                    // Log the function attributes.
-    #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaFuncAttributes funcAttrs;
-                    cudaFuncGetAttributes(&funcAttrs, kernelName);
-                    std::cout << __func__
-                        << " binaryVersion: " << funcAttrs.binaryVersion
-                        << " constSizeBytes: " << funcAttrs.constSizeBytes << " B"
-                        << " localSizeBytes: " << funcAttrs.localSizeBytes << " B"
-                        << " maxThreadsPerBlock: " << funcAttrs.maxThreadsPerBlock
-                        << " numRegs: " << funcAttrs.numRegs
-                        << " ptxVersion: " << funcAttrs.ptxVersion
-                        << " sharedSizeBytes: " << funcAttrs.sharedSizeBytes << " B"
-                        << std::endl;
-    #else
                     // hipFuncAttributes not ported from HIP to HIP.
+                    // TODO why this is currently not possible
+                    //
                     // Log the function attributes.
-                    /* TODO why this is currently not possible
-                    hipFuncAttributes funcAttrs;
-                    hipFuncGetAttributes(&funcAttrs, kernelName);
+    #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
+                    ALPAKA_API_PREFIX(FuncAttributes) funcAttrs;
+                    ALPAKA_API_PREFIX(FuncGetAttributes)(&funcAttrs, kernelName);
                     std::cout << __func__
                         << " binaryVersion: " << funcAttrs.binaryVersion
                         << " constSizeBytes: " << funcAttrs.constSizeBytes << " B"
@@ -548,21 +523,14 @@ namespace alpaka
                         << " ptxVersion: " << funcAttrs.ptxVersion
                         << " sharedSizeBytes: " << funcAttrs.sharedSizeBytes << " B"
                         << std::endl;
-                     */
     #endif
-
 #endif
 
                     // Set the current device.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             queue.m_spQueueImpl->m_dev.m_iDevice));
-#else
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            queue.m_spQueueImpl->m_dev.m_iDevice));
-#endif
+
                     // Enqueue the kernel execution.
                     meta::apply(
                         [&](std::decay_t<TArgs> const & ... args)
@@ -593,20 +561,11 @@ namespace alpaka
 
                     // Wait for the kernel execution to finish but do not check error return of this call.
                     // Do not use the alpaka::wait method because it checks the error itself but we want to give a custom error message.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaStreamSynchronize(
+                    ALPAKA_API_PREFIX(StreamSynchronize)(
                         queue.m_spQueueImpl->m_UniformCudaHipQueue);
-#else
-                    hipStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue);
-#endif
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
                     std::string const msg("'execution of kernel: '" + std::string(typeid(TKernelFnObj).name()) + "' failed with");
-    #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ::alpaka::cuda::detail::cudaRtCheckLastError(msg.c_str(), __FILE__, __LINE__);
-    #else
-                    ::alpaka::hip::detail::hipRtCheckLastError(msg.c_str(), __FILE__, __LINE__);
-    #endif
+                    ::alpaka::uniform_cuda_hip::detail::rtCheckLastError(msg.c_str(), __FILE__, __LINE__);
 #endif
                 }
             };
diff --git a/include/alpaka/mem/buf/BufCpu.hpp b/include/alpaka/mem/buf/BufCpu.hpp
index 46587c2542a1..a3560b906057 100644
--- a/include/alpaka/mem/buf/BufCpu.hpp
+++ b/include/alpaka/mem/buf/BufCpu.hpp
@@ -456,31 +456,19 @@ namespace alpaka
 
                         if(!mem::buf::isPinned(buf))
                         {
-#if (defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA)
+#if (defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA) || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP)
                             if(buf.m_spBufCpuImpl->m_extentElements.prod() != 0)
                             {
                                 // - cudaHostRegisterDefault:
                                 //   See http://cgi.cs.indiana.edu/~nhusted/dokuwiki/doku.php?id=programming:cudaperformance1
                                 // - cudaHostRegisterPortable:
                                 //   The memory returned by this call will be considered as pinned memory by all CUDA contexts, not just the one that performed the allocation.
-                                ALPAKA_CUDA_RT_CHECK_IGNORE(
-                                    cudaHostRegister(
-                                        const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf))),
-                                        extent::getExtentProduct(buf) * sizeof(elem::Elem<buf::BufCpu<TElem, TDim, TIdx>>),
-                                        cudaHostRegisterDefault),
-                                    cudaErrorHostMemoryAlreadyRegistered);
-
-                                buf.m_spBufCpuImpl->m_bPinned = true;
-                            }
-#elif (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP)
-                            if(buf.m_spBufCpuImpl->m_extentElements.prod() != 0)
-                            {
-                                ALPAKA_HIP_RT_CHECK_IGNORE(
-                                    hipHostRegister(
+                                ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(
+                                    ALPAKA_API_PREFIX(HostRegister)(
                                         const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf))),
                                         extent::getExtentProduct(buf) * sizeof(elem::Elem<buf::BufCpu<TElem, TDim, TIdx>>),
-                                        hipHostRegisterDefault),
-                                    hipErrorHostMemoryAlreadyRegistered);
+                                        ALPAKA_API_PREFIX(HostRegisterDefault)),
+                                    ALPAKA_API_PREFIX(ErrorHostMemoryAlreadyRegistered));
 
                                 buf.m_spBufCpuImpl->m_bPinned = true;
                             }
@@ -527,18 +515,11 @@ namespace alpaka
 
                         if(mem::buf::isPinned(bufImpl))
                         {
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA
-                            ALPAKA_CUDA_RT_CHECK_IGNORE(
-                                cudaHostUnregister(
-                                    const_cast<void *>(reinterpret_cast<void const *>(bufImpl.m_pMem))),
-                                cudaErrorHostMemoryNotRegistered);
-
-                            bufImpl.m_bPinned = false;
-#elif defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP
-                            ALPAKA_HIP_RT_CHECK_IGNORE(
-                                hipHostUnregister(
+#if (defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA) || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP)
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(
+                                ALPAKA_API_PREFIX(HostUnregister)(
                                     const_cast<void *>(reinterpret_cast<void const *>(bufImpl.m_pMem))),
-                                hipErrorHostMemoryNotRegistered);
+                                ALPAKA_API_PREFIX(ErrorHostMemoryNotRegistered));
 
                             bufImpl.m_bPinned = false;
 #else
diff --git a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp
index 7c3fc9df94a7..565f2210ad61 100644
--- a/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp
+++ b/include/alpaka/mem/buf/BufUniformCudaHipRt.hpp
@@ -117,23 +117,14 @@ namespace alpaka
                 -> void
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                    // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            dev.m_iDevice));
-                    // Free the buffer.
-                    ALPAKA_CUDA_RT_CHECK(
-                      cudaFree(reinterpret_cast<void *>(memPtr)));
-#else
+
                    // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             dev.m_iDevice));
                     // Free the buffer.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipFree(reinterpret_cast<void *>(memPtr)));
-#endif
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(Free)(reinterpret_cast<void *>(memPtr)));
                 }
 
             public:
@@ -356,29 +347,17 @@ namespace alpaka
 
                         auto const width(extent::getWidth(extent));
                         auto const widthBytes(width * static_cast<TIdx>(sizeof(TElem)));
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                        // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
-                                dev.m_iDevice));
-                        // Allocate the buffer on this device.
-                        void * memPtr;
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMalloc(
-                                &memPtr,
-                                static_cast<std::size_t>(widthBytes)));
-#else
+
                         // Set the current device.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 dev.m_iDevice));
                         // Allocate the buffer on this device.
                         void * memPtr;
-                        ALPAKA_HIP_RT_CHECK(
-                            hipMalloc(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(Malloc)(
                                 &memPtr,
                                 static_cast<std::size_t>(widthBytes)));
-#endif
 
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
                         std::cout << __func__
@@ -420,44 +399,30 @@ namespace alpaka
                         auto const widthBytes(width * static_cast<TIdx>(sizeof(TElem)));
                         auto const height(extent::getHeight(extent));
 
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                        // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
-                                dev.m_iDevice));
-                        // Allocate the buffer on this device.
-                        void * memPtr;
-                        std::size_t pitchBytes;
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMallocPitch(
-                                &memPtr,
-                                &pitchBytes,
-                                static_cast<std::size_t>(widthBytes),
-                                static_cast<std::size_t>(height)));
-                        ALPAKA_ASSERT(pitchBytes >= static_cast<std::size_t>(widthBytes) || (width * height) == 0);
-#else
+
                         void * memPtr = nullptr;
                         std::size_t pitchBytes = widthBytes;
-
+#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
                         //FIXME: hcc cannot handle zero-size input (throws Unknown Error)
-                        if(width!=0 && height!=0) {
+                        if(width!=0 && height!=0)
+#endif
+                        {
 
                             // Set the current device.
-                            ALPAKA_HIP_RT_CHECK(
-                                hipSetDevice(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                                ALPAKA_API_PREFIX(SetDevice)(
                                     dev.m_iDevice));
 
 
                             // Allocate the buffer on this device.
-                            ALPAKA_HIP_RT_CHECK(
-                                hipMallocPitch(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                                ALPAKA_API_PREFIX(MallocPitch)(
                                     &memPtr,
                                     &pitchBytes,
                                     static_cast<std::size_t>(widthBytes),
                                     static_cast<std::size_t>(height)));
                             ALPAKA_ASSERT(pitchBytes >= static_cast<std::size_t>(widthBytes) || (width * height) == 0);
                         }
-#endif
 
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
                         std::cout << __func__
@@ -497,91 +462,52 @@ namespace alpaka
                     {
                         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                        cudaExtent const cudaExtentVal(
-                            make_cudaExtent(
+                        ALPAKA_API_PREFIX(Extent) const extentVal(
+                            ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(Extent))(
                                 static_cast<std::size_t>(extent::getWidth(extent) * static_cast<TIdx>(sizeof(TElem))),
                                 static_cast<std::size_t>(extent::getHeight(extent)),
                                 static_cast<std::size_t>(extent::getDepth(extent))));
 
-                        // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
-                                dev.m_iDevice));
-                        // Allocate the buffer on this device.
-                        cudaPitchedPtr cudaPitchedPtrVal;
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMalloc3D(
-                                &cudaPitchedPtrVal,
-                                cudaExtentVal));
-#else
-                        hipExtent const hipExtentVal(
-                            make_hipExtent(
-                                static_cast<std::size_t>(extent::getWidth(extent) * static_cast<TIdx>(sizeof(TElem))),
-                                static_cast<std::size_t>(extent::getHeight(extent)),
-                                static_cast<std::size_t>(extent::getDepth(extent))));
-
-                        hipPitchedPtr hipPitchedPtrVal = {0};
-
+                        ALPAKA_API_PREFIX(PitchedPtr) pitchedPtrVal;
+                        pitchedPtrVal.ptr = nullptr;
+#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
                         //FIXME: hcc cannot handle zero-size input
-                        if(hipExtentVal.width!=0
-                           && hipExtentVal.height!=0
-                           && hipExtentVal.depth!=0) {
+                        if(extentVal.width!=0
+                           && extentVal.height!=0
+                           && extentVal.depth!=0)
+#endif
+                        {
 
                             // Set the current device.
-                            ALPAKA_HIP_RT_CHECK(
-                                hipSetDevice(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                                ALPAKA_API_PREFIX(SetDevice)(
                                     dev.m_iDevice));
                             // Allocate the buffer on this device.
-                            ALPAKA_HIP_RT_CHECK(
-                                hipMalloc3D(
-                                    &hipPitchedPtrVal,
-                                    hipExtentVal));
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                                ALPAKA_API_PREFIX(Malloc3D)(
+                                    &pitchedPtrVal,
+                                    extentVal));
                         }
 
-#endif
-
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
-    #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
                         std::cout << __func__
                             << " ew: " << extent::getWidth(extent)
-                            << " eh: " << cudaExtentVal.height
-                            << " ed: " << cudaExtentVal.depth
-                            << " ewb: " << cudaExtentVal.width
-                            << " ptr: " << cudaPitchedPtrVal.ptr
-                            << " pitch: " << cudaPitchedPtrVal.pitch
-                            << " wb: " << cudaPitchedPtrVal.xsize
-                            << " h: " << cudaPitchedPtrVal.ysize
+                            << " eh: " << extentVal.height
+                            << " ed: " << extentVal.depth
+                            << " ewb: " << extentVal.width
+                            << " ptr: " << pitchedPtrVal.ptr
+                            << " pitch: " << pitchedPtrVal.pitch
+                            << " wb: " << pitchedPtrVal.xsize
+                            << " h: " << pitchedPtrVal.ysize
                             << std::endl;
-    #else
-                        std::cout << __func__
-                            << " ew: " << extent::getWidth(extent)
-                            << " eh: " << hipExtentVal.height
-                            << " ed: " << hipExtentVal.depth
-                            << " ewb: " << hipExtentVal.width
-                            << " ptr: " << hipPitchedPtrVal.ptr
-                            << " pitch: " << hipPitchedPtrVal.pitch
-                            << " wb: " << hipPitchedPtrVal.xsize
-                            << " h: " << hipPitchedPtrVal.ysize
-                            << std::endl;
-    #endif
 #endif
 
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
                         return
                             mem::buf::BufUniformCudaHipRt<TElem, dim::DimInt<3u>, TIdx>(
                                 dev,
-                                reinterpret_cast<TElem *>(cudaPitchedPtrVal.ptr),
-                                static_cast<TIdx>(cudaPitchedPtrVal.pitch),
+                                reinterpret_cast<TElem *>(pitchedPtrVal.ptr),
+                                static_cast<TIdx>(pitchedPtrVal.pitch),
                                 extent);
-#else
-                        return
-                            mem::buf::BufUniformCudaHipRt<TElem, dim::DimInt<3u>, TIdx>(
-                                dev,
-                                reinterpret_cast<TElem *>(hipPitchedPtrVal.ptr),
-                                static_cast<TIdx>(hipPitchedPtrVal.pitch),
-                                extent);
-#endif
                     }
                 };
                 //#############################################################################
@@ -786,25 +712,14 @@ namespace alpaka
                         // If it is already the same device, nothing has to be mapped.
                         if(dev::getDev(buf) != dev)
                         {
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                            // cudaHostRegisterMapped:
+                            // cuda/hip-HostRegisterMapped:
                             //   Maps the allocation into the CUDA/HIP address space.The device pointer to the memory may be obtained by calling cudaHostGetDevicePointer().
                             //   This feature is available only on GPUs with compute capability greater than or equal to 1.1.
-                            ALPAKA_CUDA_RT_CHECK(
-                                cudaHostRegister(
-                                    const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf))),
-                                    extent::getExtentProduct(buf) * sizeof(elem::Elem<BufCpu<TElem, TDim, TIdx>>),
-                                    cudaHostRegisterMapped));
-#else
-                            // hipHostRegisterMapped:
-                            //   Maps the allocation into the HIP address space.The device pointer to the memory may be obtained by calling hipHostGetDevicePointer().
-                            //   This feature is available only on GPUs with compute capability greater than or equal to 1.1.
-                            ALPAKA_HIP_RT_CHECK(
-                                hipHostRegister(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                                ALPAKA_API_PREFIX(HostRegister)(
                                     const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf))),
                                     extent::getExtentProduct(buf) * sizeof(elem::Elem<BufCpu<TElem, TDim, TIdx>>),
-                                    hipHostRegisterMapped));
-#endif
+                                    ALPAKA_API_PREFIX(HostRegisterMapped)));
                         }
                     }
                 };
@@ -830,15 +745,9 @@ namespace alpaka
                         {
                             // Unmaps the memory range whose base address is specified by ptr, and makes it pageable again.
                             // \FIXME: If the memory has separately been pinned before we destroy the pinning state.
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                            ALPAKA_CUDA_RT_CHECK(
-                                cudaHostUnregister(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                                ALPAKA_API_PREFIX(HostUnregister)(
                                     const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf)))));
-#else
-                            ALPAKA_HIP_RT_CHECK(
-                                hipHostUnregister(
-                                    const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf)))));
-#endif
                         }
                         // If it is already the same device, nothing has to be unmapped.
                     }
@@ -867,19 +776,13 @@ namespace alpaka
                     {
                         // TODO: Check if the memory is mapped at all!
                         TElem * pDev(nullptr);
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaHostGetDevicePointer(
-                                &pDev,
-                                const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf))),
-                                0));
-#else
-                        ALPAKA_HIP_RT_CHECK(
-                            hipHostGetDevicePointer(
+
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(HostGetDevicePointer)(
                                 &pDev,
                                 const_cast<void *>(reinterpret_cast<void const *>(mem::view::getPtrNative(buf))),
                                 0));
-#endif
+
                         return pDev;
                     }
                     //-----------------------------------------------------------------------------
@@ -890,19 +793,13 @@ namespace alpaka
                     {
                         // TODO: Check if the memory is mapped at all!
                         TElem * pDev(nullptr);
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaHostGetDevicePointer(
-                                &pDev,
-                                mem::view::getPtrNative(buf),
-                                0));
-#else
-                        ALPAKA_HIP_RT_CHECK(
-                            hipHostGetDevicePointer(
+
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(HostGetDevicePointer)(
                                 &pDev,
                                 mem::view::getPtrNative(buf),
                                 0));
-#endif
+
                         return pDev;
                     }
                 };
diff --git a/include/alpaka/mem/buf/uniformCudaHip/Copy.hpp b/include/alpaka/mem/buf/uniformCudaHip/Copy.hpp
index 7f7c9817611f..3a40b9a4dd8a 100644
--- a/include/alpaka/mem/buf/uniformCudaHip/Copy.hpp
+++ b/include/alpaka/mem/buf/uniformCudaHip/Copy.hpp
@@ -74,11 +74,8 @@ namespace alpaka
                         TViewSrc,
                         TExtent>
                     {
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        using MemcpyKind = cudaMemcpyKind;
-#else
-                        using MemcpyKind = hipMemcpyKind;
-#endif
+                        using MemcpyKind = ALPAKA_API_PREFIX(MemcpyKind);
+
                         static_assert(
                             !std::is_const<TViewDst>::value,
                             "The destination view can not be const!");
@@ -164,11 +161,8 @@ namespace alpaka
                         TViewSrc,
                         TExtent>
                     {
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        using MemcpyKind = cudaMemcpyKind;
-#else
-                        using MemcpyKind = hipMemcpyKind;
-#endif
+                        using MemcpyKind = ALPAKA_API_PREFIX(MemcpyKind);
+
                         static_assert(
                             !std::is_const<TViewDst>::value,
                             "The destination view can not be const!");
@@ -285,11 +279,8 @@ namespace alpaka
                         TViewSrc,
                         TExtent>
                     {
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        using MemcpyKind = cudaMemcpyKind;
-#else
-                        using MemcpyKind = hipMemcpyKind;
-#endif
+                        using MemcpyKind = ALPAKA_API_PREFIX(MemcpyKind);
+
                         static_assert(
                             !std::is_const<TViewDst>::value,
                             "The destination view can not be const!");
@@ -431,11 +422,8 @@ namespace alpaka
                             alreadyCheckedPeerAccessDevices.insert(devicePair);
 
                             int canAccessPeer = 0;
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                            ALPAKA_CUDA_RT_CHECK(cudaDeviceCanAccessPeer(&canAccessPeer, devSrc, devDst));
-#else
-                            ALPAKA_HIP_RT_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, devSrc, devDst));
-#endif
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(DeviceCanAccessPeer)(&canAccessPeer, devSrc, devDst));
+
                             if(!canAccessPeer) {
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
                             std::cout << __func__
@@ -446,19 +434,12 @@ namespace alpaka
 #endif
                                 return;
                             }
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                            ALPAKA_CUDA_RT_CHECK(cudaSetDevice(devSrc));
-#else
-                            ALPAKA_HIP_RT_CHECK(hipSetDevice(devSrc));
-#endif
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(SetDevice)(devSrc));
+
                             // NOTE: "until access is explicitly disabled using cudaDeviceDisablePeerAccess() or either device is reset using cudaDeviceReset()."
                             // We do not remove a device from the enabled device pairs on cudaDeviceReset.
                             // Note that access granted by this call is unidirectional and that in order to access memory on the current device from peerDevice, a separate symmetric call to cudaDeviceEnablePeerAccess() is required.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                            ALPAKA_CUDA_RT_CHECK(cudaDeviceEnablePeerAccess(devDst, 0));
-#else
-                            ALPAKA_HIP_RT_CHECK(hipDeviceEnablePeerAccess(devDst, 0));
-#endif
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(DeviceEnablePeerAccess)(devDst, 0));
                         }
                     }
                 }
@@ -506,11 +487,7 @@ namespace alpaka
                                     viewDst,
                                     viewSrc,
                                     extent,
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                                    cudaMemcpyDeviceToHost,
-#else
-                                    hipMemcpyDeviceToHost,
-#endif
+                                    ALPAKA_API_PREFIX(MemcpyDeviceToHost),
                                     iDevice,
                                     iDevice);
                     }
@@ -553,11 +530,7 @@ namespace alpaka
                                     viewDst,
                                     viewSrc,
                                     extent,
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                                    cudaMemcpyHostToDevice,
-#else
-                                    hipMemcpyHostToDevice,
-#endif
+                                    ALPAKA_API_PREFIX(MemcpyHostToDevice),
                                     iDevice,
                                     iDevice);
                     }
@@ -597,11 +570,7 @@ namespace alpaka
                                     viewDst,
                                     viewSrc,
                                     extent,
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-                                    cudaMemcpyDeviceToDevice,
-#else
-                                    hipMemcpyDeviceToDevice,
-#endif
+                                    ALPAKA_API_PREFIX(MemcpyDeviceToDevice),
                                     dev::getDev(viewDst).m_iDevice,
                                     dev::getDev(viewSrc).m_iDevice);
                     }
@@ -618,11 +587,7 @@ namespace alpaka
                         typename TViewDst>
                     ALPAKA_FN_HOST auto buildUniformCudaHipMemcpy3DParms(
                         mem::view::uniform_cuda_hip::detail::TaskCopyUniformCudaHip<dim::DimInt<3>, TViewDst, TViewSrc, TExtent> const & task)
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    -> cudaMemcpy3DParms
-#else
-                    -> hipMemcpy3DParms
-#endif
+                    -> ALPAKA_API_PREFIX(Memcpy3DParms)
                     {
                         ALPAKA_DEBUG_FULL_LOG_SCOPE;
 
@@ -645,63 +610,34 @@ namespace alpaka
                         auto const & srcNativePtr(task.m_srcMemNative);
 
                         // Fill CUDA/HIP parameter structure.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        cudaMemcpy3DParms cudaMemCpy3DParms;
-                        cudaMemCpy3DParms.srcArray = nullptr;  // Either srcArray or srcPtr.
-                        cudaMemCpy3DParms.srcPos = make_cudaPos(0, 0, 0);  // Optional. Offset in bytes.
-                        cudaMemCpy3DParms.srcPtr =
-                            make_cudaPitchedPtr(
+                        ALPAKA_API_PREFIX(Memcpy3DParms) memCpy3DParms;
+                        memCpy3DParms.srcArray = nullptr;  // Either srcArray or srcPtr.
+                        memCpy3DParms.srcPos = ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(Pos))(0, 0, 0);  // Optional. Offset in bytes.
+                        memCpy3DParms.srcPtr =
+                            ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(PitchedPtr))(
                                 const_cast<void *>(srcNativePtr),
                                 static_cast<std::size_t>(srcPitchBytesX),
                                 static_cast<std::size_t>(srcWidth),
                                 static_cast<std::size_t>(srcPitchBytesY/srcPitchBytesX));
-                        cudaMemCpy3DParms.dstArray = nullptr;  // Either dstArray or dstPtr.
-                        cudaMemCpy3DParms.dstPos = make_cudaPos(0, 0, 0);  // Optional. Offset in bytes.
-                        cudaMemCpy3DParms.dstPtr =
-                            make_cudaPitchedPtr(
+                        memCpy3DParms.dstArray = nullptr;  // Either dstArray or dstPtr.
+                        memCpy3DParms.dstPos = ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(Pos))(0, 0, 0);  // Optional. Offset in bytes.
+                        memCpy3DParms.dstPtr =
+                            ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(PitchedPtr))(
                                 dstNativePtr,
                                 static_cast<std::size_t>(dstPitchBytesX),
                                 static_cast<std::size_t>(dstWidth),
                                 static_cast<std::size_t>(dstPitchBytesY / dstPitchBytesX));
-                        cudaMemCpy3DParms.extent =
-                            make_cudaExtent(
+                        memCpy3DParms.extent =
+                            ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(Extent))(
                                 static_cast<std::size_t>(extentWidthBytes),
                                 static_cast<std::size_t>(extentHeight),
                                 static_cast<std::size_t>(extentDepth));
-                        cudaMemCpy3DParms.kind = task.m_uniformMemCpyKind;
-                        return cudaMemCpy3DParms;
-
+#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && defined(__HIP_PLATFORM_NVCC__)
+                        memCpy3DParms.kind = hipMemcpyKindToCudaMemcpyKind(task.m_uniformMemCpyKind);
 #else
-                        hipMemcpy3DParms hipMemCpy3DParms;
-                        hipMemCpy3DParms.srcArray = nullptr;  // Either srcArray or srcPtr.
-                        hipMemCpy3DParms.srcPos = make_hipPos(0, 0, 0);  // Optional. Offset in bytes.
-                        hipMemCpy3DParms.srcPtr =
-                            make_hipPitchedPtr(
-                                const_cast<void *>(srcNativePtr),
-                                static_cast<std::size_t>(srcPitchBytesX),
-                                static_cast<std::size_t>(srcWidth),
-                                static_cast<std::size_t>(srcPitchBytesY/srcPitchBytesX));
-                        hipMemCpy3DParms.dstArray = nullptr;  // Either dstArray or dstPtr.
-                        hipMemCpy3DParms.dstPos = make_hipPos(0, 0, 0);  // Optional. Offset in bytes.
-                        hipMemCpy3DParms.dstPtr =
-                            make_hipPitchedPtr(
-                                dstNativePtr,
-                                static_cast<std::size_t>(dstPitchBytesX),
-                                static_cast<std::size_t>(dstWidth),
-                                static_cast<std::size_t>(dstPitchBytesY/dstPitchBytesX));
-                        hipMemCpy3DParms.extent =
-                            make_hipExtent(
-                                static_cast<std::size_t>(extentWidthBytes),
-                                static_cast<std::size_t>(extentHeight),
-                                static_cast<std::size_t>(extentDepth));
-    #ifdef __HIP_PLATFORM_NVCC__
-                        hipMemCpy3DParms.kind = hipMemcpyKindToCudaMemcpyKind(task.m_uniformMemCpyKind);
-    #else
-                        hipMemCpy3DParms.kind = task.m_uniformMemCpyKind;
-    #endif
-                        return hipMemCpy3DParms;
-
+                        memCpy3DParms.kind = task.m_uniformMemCpyKind;
 #endif
+                        return memCpy3DParms;
                     }
 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     //-----------------------------------------------------------------------------
@@ -869,16 +805,15 @@ namespace alpaka
 
                     auto const & uniformCudaHipMemCpyKind(task.m_uniformMemCpyKind);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     if(iDstDev == iSrcDev)
                     {
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 iDstDev));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpyAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(MemcpyAsync)(
                                 dstNativePtr,
                                 srcNativePtr,
                                 static_cast<std::size_t>(extentWidthBytes),
@@ -890,8 +825,8 @@ namespace alpaka
                         alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
 
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpyPeerAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(MemcpyPeerAsync)(
                                 dstNativePtr,
                                 iDstDev,
                                 srcNativePtr,
@@ -899,35 +834,6 @@ namespace alpaka
                                 static_cast<std::size_t>(extentWidthBytes),
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
-#else
-                    if(iDstDev == iSrcDev)
-                    {
-                        // Set the current device.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
-                                iDstDev));
-                        // Initiate the memory copy.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipMemcpyAsync(
-                                dstNativePtr,
-                                srcNativePtr,
-                                static_cast<std::size_t>(extentWidthBytes),
-                                uniformCudaHipMemCpyKind,
-                                queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
-                    else
-                    {
-                        // Initiate the memory copy.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipMemcpyPeerAsync(
-                                dstNativePtr,
-                                iDstDev,
-                                srcNativePtr,
-                                iSrcDev,
-                                static_cast<std::size_t>(extentWidthBytes),
-                                queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
-#endif
                 }
             };
             //#############################################################################
@@ -964,19 +870,17 @@ namespace alpaka
                     auto const & dstNativePtr(task.m_dstMemNative);
                     auto const & srcNativePtr(task.m_srcMemNative);
 
-
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     if(iDstDev == iSrcDev)
                     {
                         auto const & uniformCudaHipMemCpyKind(task.m_uniformMemCpyKind);
 
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 iDstDev));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpyAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(MemcpyAsync)(
                                 dstNativePtr,
                                 srcNativePtr,
                                 static_cast<std::size_t>(extentWidthBytes),
@@ -988,8 +892,8 @@ namespace alpaka
                         alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
 
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpyPeerAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(MemcpyPeerAsync)(
                                 dstNativePtr,
                                 iDstDev,
                                 srcNativePtr,
@@ -997,43 +901,9 @@ namespace alpaka
                                 static_cast<std::size_t>(extentWidthBytes),
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaStreamSynchronize(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(StreamSynchronize)(
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
-                    if(iDstDev == iSrcDev)
-                    {
-                        auto const & uniformCudaHipMemCpyKind(task.m_uniformMemCpyKind);
-
-                        // Set the current device.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
-                                iDstDev));
-                        // Initiate the memory copy.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipMemcpyAsync(
-                                dstNativePtr,
-                                srcNativePtr,
-                                static_cast<std::size_t>(extentWidthBytes),
-                                uniformCudaHipMemCpyKind,
-                                queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
-                    else
-                    {
-                        // Initiate the memory copy.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipMemcpyPeerAsync(
-                                dstNativePtr,
-                                iDstDev,
-                                srcNativePtr,
-                                iSrcDev,
-                                static_cast<std::size_t>(extentWidthBytes),
-                                queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
-
-                    ALPAKA_HIP_RT_CHECK( hipStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
             //#############################################################################
@@ -1076,71 +946,41 @@ namespace alpaka
 
                         auto const & dstNativePtr(task.m_dstMemNative);
                         auto const & srcNativePtr(task.m_srcMemNative);
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        auto const & cudaMemcpyKind(task.m_uniformMemCpyKind);
+
+                        auto const & memcpyKind(task.m_uniformMemCpyKind);
 
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 iDstDev));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpy2DAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(Memcpy2DAsync)(
                                 dstNativePtr,
                                 static_cast<std::size_t>(dstPitchBytesX),
                                 srcNativePtr,
                                 static_cast<std::size_t>(srcPitchBytesX),
                                 static_cast<std::size_t>(extentWidthBytes),
                                 static_cast<std::size_t>(extentHeight),
-                                cudaMemcpyKind,
+                                memcpyKind,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
                     else
                     {
                         alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         // There is no cudaMemcpy2DPeerAsync, therefore we use cudaMemcpy3DPeerAsync.
                         // Create the struct describing the copy.
-                        cudaMemcpy3DPeerParms const cudaMemCpy3DPeerParms(
+                        ALPAKA_API_PREFIX(Memcpy3DPeerParms) const memCpy3DPeerParms(
                             mem::view::uniform_cuda_hip::detail::buildCudaMemcpy3DPeerParms(
                                 task));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             cudaMemcpy3DPeerAsync(
-                                &cudaMemCpy3DPeerParms,
+                                &memCpy3DPeerParms,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
-#else
-                        auto const & hipMemCpyKind(task.m_uniformMemCpyKind);
-
-                        // Set the current device.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
-                                iDstDev));
-
-                        if(iDstDev != iSrcDev)
-                        {
-                            // HIP relies on unified memory, so memcpy commands automatically do device-to-device transfers.
-                            // P2P access has to be enabled to avoid host transfer.
-                            // Checks if devices are connected via PCIe switch and enable P2P access then.
-                            alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-                        }
-
-                        ALPAKA_HIP_RT_CHECK(
-                            hipMemcpy2DAsync(
-                                dstNativePtr,
-                                static_cast<std::size_t>(dstPitchBytesX),
-                                srcNativePtr,
-                                static_cast<std::size_t>(srcPitchBytesX),
-                                static_cast<std::size_t>(extentWidthBytes),
-                                static_cast<std::size_t>(extentHeight),
-                                hipMemCpyKind,
-                                queue.m_spQueueImpl->m_UniformCudaHipQueue));
-
-                        ALPAKA_HIP_RT_CHECK( hipStreamSynchronize(
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
 #endif
+                    }
                 }
             };
             //#############################################################################
@@ -1172,7 +1012,7 @@ namespace alpaka
 
                     auto const & iDstDev(task.m_iDstDevice);
                     auto const & iSrcDev(task.m_iSrcDevice);
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
+
                     if(iDstDev == iSrcDev)
                     {
                         auto const & extentWidthBytes(task.m_extentWidthBytes);
@@ -1183,80 +1023,43 @@ namespace alpaka
 
                         auto const & dstNativePtr(task.m_dstMemNative);
                         auto const & srcNativePtr(task.m_srcMemNative);
-                        auto const & cudaMemcpyKind(task.m_uniformMemCpyKind);
+                        auto const & memcpyKind(task.m_uniformMemCpyKind);
 
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 iDstDev));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpy2DAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(Memcpy2DAsync)(
                                 dstNativePtr,
                                 static_cast<std::size_t>(dstPitchBytesX),
                                 srcNativePtr,
                                 static_cast<std::size_t>(srcPitchBytesX),
                                 static_cast<std::size_t>(extentWidthBytes),
                                 static_cast<std::size_t>(extentHeight),
-                                cudaMemcpyKind,
+                                memcpyKind,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
                     else
                     {
                         alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         // There is no cudaMemcpy2DPeerAsync, therefore we use cudaMemcpy3DPeerAsync.
                         // Create the struct describing the copy.
-                        cudaMemcpy3DPeerParms const cudaMemCpy3DPeerParms(
+                        cudaMemcpy3DPeerParms const memCpy3DPeerParms(
                             mem::view::uniform_cuda_hip::detail::buildCudaMemcpy3DPeerParms(
                                 task));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             cudaMemcpy3DPeerAsync(
-                                &cudaMemCpy3DPeerParms,
+                                &memCpy3DPeerParms,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
+#endif
                     }
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaStreamSynchronize(
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
-                    auto const & extentWidthBytes(task.m_extentWidthBytes);
-                    auto const & extentHeight(task.m_extentHeight);
-
-                    auto const & dstPitchBytesX(task.m_dstpitchBytesX);
-                    auto const & srcPitchBytesX(task.m_srcpitchBytesX);
-
-                    auto const & dstNativePtr(task.m_dstMemNative);
-                    auto const & srcNativePtr(task.m_srcMemNative);
-                    auto const & hipMemCpyKind(task.m_uniformMemCpyKind);
-
-                    // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            iDstDev));
-
-                    if(iDstDev != iSrcDev)
-                    {
-                        // HIP relies on unified memory, so memcpy commands automatically do device-to-device transfers.
-                        // P2P access has to be enabled to avoid host transfer.
-                        // Checks if devices are connected via PCIe switch and enable P2P access then.
-                        alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-                    }
-
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemcpy2DAsync(
-                            dstNativePtr,
-                            static_cast<std::size_t>(dstPitchBytesX),
-                            srcNativePtr,
-                            static_cast<std::size_t>(srcPitchBytesX),
-                            static_cast<std::size_t>(extentWidthBytes),
-                            static_cast<std::size_t>(extentHeight),
-                            hipMemCpyKind,
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(StreamSynchronize)(
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-
-                    ALPAKA_HIP_RT_CHECK( hipStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
             //#############################################################################
@@ -1288,61 +1091,37 @@ namespace alpaka
                     auto const & iDstDev(task.m_iDstDevice);
                     auto const & iSrcDev(task.m_iSrcDevice);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     if(iDstDev == iSrcDev)
                     {
                         // Create the struct describing the copy.
-                        cudaMemcpy3DParms const uniformCudaHipMemCpy3DParms(
+                        ALPAKA_API_PREFIX(Memcpy3DParms) const uniformCudaHipMemCpy3DParms(
                             mem::view::uniform_cuda_hip::detail::buildUniformCudaHipMemcpy3DParms(
                                 task));
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 iDstDev));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpy3DAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(Memcpy3DAsync)(
                                 &uniformCudaHipMemCpy3DParms,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
                     else
                     {
                         alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         // Create the struct describing the copy.
                         cudaMemcpy3DPeerParms const cudaMemCpy3DPeerParms(
                             mem::view::uniform_cuda_hip::detail::buildCudaMemcpy3DPeerParms(
                                 task));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             cudaMemcpy3DPeerAsync(
                                 &cudaMemCpy3DPeerParms,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                    }
-#else
-                    // Create the struct describing the copy.
-                    hipMemcpy3DParms const hipMemCpy3DParms(
-                        mem::view::uniform_cuda_hip::detail::buildUniformCudaHipMemcpy3DParms(
-                            task));
-                    // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            iDstDev));
-
-                    if(iDstDev != iSrcDev)
-                    {
-                        // HIP relies on unified memory, so memcpy commands automatically do device-to-device transfers.
-                        // P2P access has to be enabled to avoid host transfer.
-                        // Checks if devices are connected via PCIe switch and enable P2P access then.
-                        alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-                    }
-
-                    // Initiate the memory copy.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemcpy3DAsync(
-                            &hipMemCpy3DParms,
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
 #endif
+                    }
                 }
             };
             //#############################################################################
@@ -1374,66 +1153,40 @@ namespace alpaka
                     auto const & iDstDev(task.m_iDstDevice);
                     auto const & iSrcDev(task.m_iSrcDevice);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     if(iDstDev == iSrcDev)
                     {
                         // Create the struct describing the copy.
-                        cudaMemcpy3DParms const uniformCudaHipMemCpy3DParms(
+                        ALPAKA_API_PREFIX(Memcpy3DParms) const uniformCudaHipMemCpy3DParms(
                             mem::view::uniform_cuda_hip::detail::buildUniformCudaHipMemcpy3DParms(
                                 task));
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 iDstDev));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaMemcpy3DAsync(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(Memcpy3DAsync)(
                                 &uniformCudaHipMemCpy3DParms,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
                     else
                     {
                         alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-
+#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         // Create the struct describing the copy.
                         cudaMemcpy3DPeerParms const cudaMemCpy3DPeerParms(
                             mem::view::uniform_cuda_hip::detail::buildCudaMemcpy3DPeerParms(
                                 task));
                         // Initiate the memory copy.
-                        ALPAKA_CUDA_RT_CHECK(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             cudaMemcpy3DPeerAsync(
                                 &cudaMemCpy3DPeerParms,
                                 queue.m_spQueueImpl->m_UniformCudaHipQueue));
+#endif
                     }
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaStreamSynchronize(
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
-                    hipMemcpy3DParms const hipMemCpy3DParms(
-                        mem::view::uniform_cuda_hip::detail::buildUniformCudaHipMemcpy3DParms(
-                            task));
-                    // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            iDstDev));
-
-                    if(iDstDev != iSrcDev)
-                    {
-                        // HIP relies on unified memory, so memcpy commands automatically do device-to-device transfers.
-                        // P2P access has to be enabled to avoid host transfer.
-                        // Checks if devices are connected via PCIe switch and enable P2P access then.
-                        alpaka::mem::view::uniform_cuda_hip::detail::enablePeerAccessIfPossible(iSrcDev, iDstDev);
-                    }
-
-                    // Initiate the memory copy.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemcpy3DAsync(
-                            &hipMemCpy3DParms,
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(StreamSynchronize)(
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-
-                    ALPAKA_HIP_RT_CHECK( hipStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
         }
diff --git a/include/alpaka/mem/buf/uniformCudaHip/Set.hpp b/include/alpaka/mem/buf/uniformCudaHip/Set.hpp
index aeea742ec38a..72c55064119a 100644
--- a/include/alpaka/mem/buf/uniformCudaHip/Set.hpp
+++ b/include/alpaka/mem/buf/uniformCudaHip/Set.hpp
@@ -174,31 +174,17 @@ namespace alpaka
                     auto const dstNativePtr(reinterpret_cast<void *>(mem::view::getPtrNative(view)));
                     ALPAKA_ASSERT(extentWidth <= dstWidth);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            iDevice));
-                    // Initiate the memory set.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemsetAsync(
-                            dstNativePtr,
-                            static_cast<int>(byte),
-                            static_cast<size_t>(extentWidthBytes),
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
                     // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             iDevice));
                     // Initiate the memory set.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemsetAsync(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(MemsetAsync)(
                             dstNativePtr,
                             static_cast<int>(byte),
                             static_cast<size_t>(extentWidthBytes),
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
             //#############################################################################
@@ -246,31 +232,17 @@ namespace alpaka
                     auto const dstNativePtr(reinterpret_cast<void *>(mem::view::getPtrNative(view)));
                     ALPAKA_ASSERT(extentWidth <= dstWidth);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            iDevice));
-                    // Initiate the memory set.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemsetAsync(
-                            dstNativePtr,
-                            static_cast<int>(byte),
-                            static_cast<size_t>(extentWidthBytes),
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
                     // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             iDevice));
                     // Initiate the memory set.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemsetAsync(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(MemsetAsync)(
                             dstNativePtr,
                             static_cast<int>(byte),
                             static_cast<size_t>(extentWidthBytes),
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                     wait::wait(queue);
                 }
             };
@@ -324,35 +296,19 @@ namespace alpaka
                     ALPAKA_ASSERT(extentWidth <= dstWidth);
                     ALPAKA_ASSERT(extentHeight <= dstHeight);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            iDevice));
-                    // Initiate the memory set.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemset2DAsync(
-                            dstNativePtr,
-                            static_cast<size_t>(dstPitchBytesX),
-                            static_cast<int>(byte),
-                            static_cast<size_t>(extentWidthBytes),
-                            static_cast<size_t>(extentHeight),
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
                     // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             iDevice));
                     // Initiate the memory set.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemset2DAsync(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(Memset2DAsync)(
                             dstNativePtr,
                             static_cast<size_t>(dstPitchBytesX),
                             static_cast<int>(byte),
                             static_cast<size_t>(extentWidthBytes),
                             static_cast<size_t>(extentHeight),
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
             //#############################################################################
@@ -405,15 +361,14 @@ namespace alpaka
                     ALPAKA_ASSERT(extentWidth <= dstWidth);
                     ALPAKA_ASSERT(extentHeight <= dstHeight);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             iDevice));
 
                     // Initiate the memory set.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemset2DAsync(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(Memset2DAsync)(
                             dstNativePtr,
                             static_cast<size_t>(dstPitchBytesX),
                             static_cast<int>(byte),
@@ -421,22 +376,6 @@ namespace alpaka
                             static_cast<size_t>(extentHeight),
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
 
-#else
-                    // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            iDevice));
-
-                    // Initiate the memory set.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemset2DAsync(
-                            dstNativePtr,
-                            static_cast<size_t>(dstPitchBytesX),
-                            static_cast<int>(byte),
-                            static_cast<size_t>(extentWidthBytes),
-                            static_cast<size_t>(extentHeight),
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                     wait::wait(queue);
                 }
             };
@@ -494,59 +433,31 @@ namespace alpaka
                     ALPAKA_ASSERT(extentHeight <= dstHeight);
                     ALPAKA_ASSERT(extentDepth <= dstDepth);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Fill CUDA parameter structures.
-                    cudaPitchedPtr const cudaPitchedPtrVal(
-                        make_cudaPitchedPtr(
-                            dstNativePtr,
-                            static_cast<size_t>(dstPitchBytesX),
-                            static_cast<size_t>(dstWidth * static_cast<Idx>(sizeof(Elem))),
-                            static_cast<size_t>(dstPitchBytesY / dstPitchBytesX)));
-
-                    cudaExtent const cudaExtentVal(
-                        make_cudaExtent(
-                            static_cast<size_t>(extentWidth * static_cast<Idx>(sizeof(Elem))),
-                            static_cast<size_t>(extentHeight),
-                            static_cast<size_t>(extentDepth)));
-
-                    // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
-                            iDevice));
-                    // Initiate the memory set.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemset3DAsync(
-                            cudaPitchedPtrVal,
-                            static_cast<int>(byte),
-                            cudaExtentVal,
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
                     // Fill CUDA parameter structures.
-                    hipPitchedPtr const hipPitchedPtrVal(
-                        make_hipPitchedPtr(
+                    ALPAKA_API_PREFIX(PitchedPtr) const pitchedPtrVal(
+                        ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(PitchedPtr))(
                             dstNativePtr,
                             static_cast<size_t>(dstPitchBytesX),
                             static_cast<size_t>(dstWidth * static_cast<Idx>(sizeof(Elem))),
                             static_cast<size_t>(dstPitchBytesY / dstPitchBytesX)));
 
-                    hipExtent const hipExtentVal(
-                        make_hipExtent(
+                    ALPAKA_API_PREFIX(Extent) const extentVal(
+                        ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(Extent))(
                             static_cast<size_t>(extentWidth * static_cast<Idx>(sizeof(Elem))),
                             static_cast<size_t>(extentHeight),
                             static_cast<size_t>(extentDepth)));
 
                     // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             iDevice));
                     // Initiate the memory set.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemset3DAsync(
-                            hipPitchedPtrVal,
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(Memset3DAsync)(
+                            pitchedPtrVal,
                             static_cast<int>(byte),
-                            hipExtentVal,
+                            extentVal,
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                 }
             };
             //#############################################################################
@@ -603,59 +514,32 @@ namespace alpaka
                     ALPAKA_ASSERT(extentHeight <= dstHeight);
                     ALPAKA_ASSERT(extentDepth <= dstDepth);
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     // Fill CUDA parameter structures.
-                    cudaPitchedPtr const cudaPitchedPtrVal(
-                        make_cudaPitchedPtr(
+                    ALPAKA_API_PREFIX(PitchedPtr) const pitchedPtrVal(
+                        ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(PitchedPtr))(
                             dstNativePtr,
                             static_cast<size_t>(dstPitchBytesX),
                             static_cast<size_t>(dstWidth * static_cast<Idx>(sizeof(Elem))),
                             static_cast<size_t>(dstPitchBytesY / dstPitchBytesX)));
 
-                    cudaExtent const cudaExtentVal(
-                        make_cudaExtent(
+                    ALPAKA_API_PREFIX(Extent) const extentVal(
+                        ALPAKA_PP_CONCAT(make_,ALPAKA_API_PREFIX(Extent))(
                             static_cast<size_t>(extentWidth * static_cast<Idx>(sizeof(Elem))),
                             static_cast<size_t>(extentHeight),
                             static_cast<size_t>(extentDepth)));
 
                     // Set the current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaSetDevice(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(SetDevice)(
                             iDevice));
                     // Initiate the memory set.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaMemset3DAsync(
-                            cudaPitchedPtrVal,
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                        ALPAKA_API_PREFIX(Memset3DAsync)(
+                            pitchedPtrVal,
                             static_cast<int>(byte),
-                            cudaExtentVal,
+                            extentVal,
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#else
-                    // Fill CUDA parameter structures.
-                    hipPitchedPtr const hipPitchedPtrVal(
-                        make_hipPitchedPtr(
-                            dstNativePtr,
-                            static_cast<size_t>(dstPitchBytesX),
-                            static_cast<size_t>(dstWidth * static_cast<Idx>(sizeof(Elem))),
-                            static_cast<size_t>(dstPitchBytesY / dstPitchBytesX)));
 
-                    hipExtent const hipExtentVal(
-                        make_hipExtent(
-                            static_cast<size_t>(extentWidth * static_cast<Idx>(sizeof(Elem))),
-                            static_cast<size_t>(extentHeight),
-                            static_cast<size_t>(extentDepth)));
-
-                    // Set the current device.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipSetDevice(
-                            iDevice));
-                    // Initiate the memory set.
-                    ALPAKA_HIP_RT_CHECK(
-                        hipMemset3DAsync(
-                            hipPitchedPtrVal,
-                            static_cast<int>(byte),
-                            hipExtentVal,
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#endif
                     wait::wait(queue);
                 }
             };
diff --git a/include/alpaka/mem/view/ViewPlainPtr.hpp b/include/alpaka/mem/view/ViewPlainPtr.hpp
index 89e911db310a..342771739538 100644
--- a/include/alpaka/mem/view/ViewPlainPtr.hpp
+++ b/include/alpaka/mem/view/ViewPlainPtr.hpp
@@ -314,21 +314,22 @@ namespace alpaka
                         TExtent const & extent)
                     {
                         TElem* pMemAcc(nullptr);
+
 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                        ALPAKA_CUDA_RT_CHECK(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             cudaGetSymbolAddress(
                                 reinterpret_cast<void **>(&pMemAcc),
                                 *pMem));
 #else
     #ifdef __HIP_PLATFORM_NVCC__
-                        ALPAKA_HIP_RT_CHECK(hipCUDAErrorTohipError(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipCUDAErrorTohipError(
                             cudaGetSymbolAddress(
                                 reinterpret_cast<void **>(&pMemAcc),
                                 *pMem)));
     #else
                         // FIXME: still does not work in HIP(HCC) (results in hipErrorNotFound)
                         // HIP_SYMBOL(X) not useful because it only does #X on HIP(HCC), while &X on HIP(NVCC)
-                        ALPAKA_HIP_RT_CHECK(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             hipGetSymbolAddress(
                                 reinterpret_cast<void **>(&pMemAcc),
                                 pMem));
diff --git a/include/alpaka/pltf/PltfUniformCudaHipRt.hpp b/include/alpaka/pltf/PltfUniformCudaHipRt.hpp
index 9bf24987b7d4..c2a021826754 100644
--- a/include/alpaka/pltf/PltfUniformCudaHipRt.hpp
+++ b/include/alpaka/pltf/PltfUniformCudaHipRt.hpp
@@ -80,15 +80,10 @@ namespace alpaka
                     ALPAKA_DEBUG_FULL_LOG_SCOPE;
 
                     int iNumDevices(0);
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaError_t error = cudaGetDeviceCount(&iNumDevices);
-                    if(error != cudaSuccess)
-                        iNumDevices = 0;
-#else
-                    hipError_t error = hipGetDeviceCount(&iNumDevices);
-                    if(error != hipSuccess)
+                    ALPAKA_API_PREFIX(Error_t) error = ALPAKA_API_PREFIX(GetDeviceCount)(&iNumDevices);
+                    if(error != ALPAKA_API_PREFIX(Success))
                         iNumDevices = 0;
-#endif
+
                     return static_cast<std::size_t>(iNumDevices);
                 }
             };
@@ -112,7 +107,7 @@ namespace alpaka
                     if(devIdx >= devCount)
                     {
                         std::stringstream ssErr;
-                        ssErr << "Unable to return device handle for device " << devIdx << ". There are only " << devCount << " CUDA devices!";
+                        ssErr << "Unable to return device handle for device " << devIdx << ". There are only " << devCount << " devices!";
                         throw std::runtime_error(ssErr.str());
                     }
 
@@ -124,11 +119,10 @@ namespace alpaka
     #if ALPAKA_DEBUG >= ALPAKA_DEBUG_MINIMAL
         #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         cudaDeviceProp devProp;
-                        ALPAKA_CUDA_RT_CHECK(cudaGetDeviceProperties(&devProp, dev.m_iDevice));
         #else
-                      hipDeviceProp_t devProp;
-                      ALPAKA_HIP_RT_CHECK(hipGetDeviceProperties(&devProp, dev.m_iDevice));
+                        hipDeviceProp_t devProp;
         #endif
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, dev.m_iDevice));
     #endif
     #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
                         printDeviceProperties(devProp);
@@ -155,77 +149,48 @@ namespace alpaka
                 {
 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                     cudaError rc(cudaSetDevice(static_cast<int>(iDevice)));
-
-                    cudaStream_t queue = {};
-                    // Create a dummy queue to check if the device is already used by an other process.
-                    // cudaSetDevice never returns an error if another process already uses the selected device and gpu compute mode is set "process exclusive".
-                    // \TODO: Check if this workaround is needed!
-                    if(rc == cudaSuccess)
-                    {
-                        rc = cudaStreamCreate(&queue);
-                    }
-
-                    if(rc == cudaSuccess)
-                    {
-                        // Destroy the dummy queue.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaStreamDestroy(
-                                queue));
-                        return true;
-                    }
-                    else
-                    {
-                        // Return the previous error from cudaStreamCreate.
-                        ALPAKA_CUDA_RT_CHECK(
-                            rc);
-                        // Reset the Error state.
-                        cudaGetLastError();
-                               return false;
-                    }
-                }
 #else
                     hipError_t rc(hipSetDevice(static_cast<int>(iDevice)));
+#endif
 
-                    hipStream_t queue = {};
+                    ALPAKA_API_PREFIX(Stream_t) queue = {};
                     // Create a dummy queue to check if the device is already used by an other process.
-                    // hipSetDevice never returns an error if another process already uses the selected device and gpu compute mode is set "process exclusive".
+                    // cuda/hip-SetDevice never returns an error if another process already uses the selected device and gpu compute mode is set "process exclusive".
                     // \TODO: Check if this workaround is needed!
-                    if(rc == hipSuccess)
+                    if(rc == ALPAKA_API_PREFIX(Success))
                     {
-                        rc = hipStreamCreate(&queue);
+                        rc = ALPAKA_API_PREFIX(StreamCreate)(&queue);
                     }
 
-                    if(rc == hipSuccess)
+                    if(rc == ALPAKA_API_PREFIX(Success))
                     {
                         // Destroy the dummy queue.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipStreamDestroy(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(StreamDestroy)(
                                 queue));
                         return true;
                     }
                     else
                     {
-                        // Return the previous error from hipStreamCreate.
-                        ALPAKA_HIP_RT_CHECK(
+                        // Return the previous error from cudaStreamCreate.
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                             rc);
                         // Reset the Error state.
-                        hipGetLastError();
-
-                        return false;
+                        ALPAKA_API_PREFIX(GetLastError)();
+                               return false;
                     }
                 }
-#endif
-
 
 #if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
                 //-----------------------------------------------------------------------------
                 //! Prints all the device properties to std::cout.
                 ALPAKA_FN_HOST static auto printDeviceProperties(
 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaDeviceProp const & devProp)
+                    cudaDeviceProp const & devProp
 #else
-                    hipDeviceProp_t const & devProp)
+                    hipDeviceProp_t const & devProp
 #endif
+                )
                 -> void
                 {
                     ALPAKA_DEBUG_FULL_LOG_SCOPE;
diff --git a/include/alpaka/queue/QueueUniformCudaHipRtBlocking.hpp b/include/alpaka/queue/QueueUniformCudaHipRtBlocking.hpp
index 9af657746769..560a8922d9b2 100644
--- a/include/alpaka/queue/QueueUniformCudaHipRtBlocking.hpp
+++ b/include/alpaka/queue/QueueUniformCudaHipRtBlocking.hpp
@@ -72,33 +72,21 @@ namespace alpaka
                         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                         // Set the current device.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                            ALPAKA_CUDA_RT_CHECK(
-                            cudaStreamCreateWithFlags(
-                                &m_UniformCudaHipQueue,
-                                cudaStreamNonBlocking));
-#else
-                            ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 m_dev.m_iDevice));
-#endif
+
                         // - [cuda/hip]StreamDefault: Default queue creation flag.
                         // - [cuda/hip]StreamNonBlocking: Specifies that work running in the created queue may run concurrently with work in queue 0 (the NULL queue),
                         //   and that the created queue should perform no implicit synchronization with queue 0.
                         // Create the queue on the current device.
                         // NOTE: [cuda/hip]StreamNonBlocking is required to match the semantic implemented in the alpaka CPU queue.
                         // It would be too much work to implement implicit default queue synchronization on CPU.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                            ALPAKA_CUDA_RT_CHECK(
-                            cudaStreamCreateWithFlags(
-                                &m_UniformCudaHipQueue,
-                                cudaStreamNonBlocking));
-#else
-                            ALPAKA_HIP_RT_CHECK(
-                            hipStreamCreateWithFlags(
+
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(StreamCreateWithFlags)(
                                 &m_UniformCudaHipQueue,
-                                hipStreamNonBlocking));
-#endif
+                                ALPAKA_API_PREFIX(StreamNonBlocking)));
 
                     }
                     //-----------------------------------------------------------------------------
@@ -119,34 +107,23 @@ namespace alpaka
                         // In case the device is still doing work in the queue when [cuda/hip]StreamDestroy() is called, the function will return immediately
                         // and the resources associated with queue will be released automatically once the device has completed all work in queue.
                         // -> No need to synchronize here.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
 
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 m_dev.m_iDevice));
-                        // In case the device is still doing work in the queue when cudaStreamDestroy() is called, the function will return immediately
+                        // In case the device is still doing work in the queue when cuda/hip-StreamDestroy() is called, the function will return immediately
                         // and the resources associated with queue will be released automatically once the device has completed all work in queue.
                         // -> No need to synchronize here.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaStreamDestroy(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(StreamDestroy)(
                                 m_UniformCudaHipQueue));
-#else
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
-                                m_dev.m_iDevice));
-                        ALPAKA_HIP_RT_CHECK(
-                            hipStreamDestroy(
-                                m_UniformCudaHipQueue));
-#endif
+
                     }
 
                 public:
                     dev::DevUniformCudaHipRt const m_dev;   //!< The device this queue is bound to.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaStream_t m_UniformCudaHipQueue;
-#else
-                    hipStream_t m_UniformCudaHipQueue;
-#endif
+                    ALPAKA_API_PREFIX(Stream_t) m_UniformCudaHipQueue;
+
 #if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
                     int m_callees = 0;
                     std::mutex m_mutex;
@@ -271,10 +248,11 @@ namespace alpaka
 
                 //-----------------------------------------------------------------------------
 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                static void CUDART_CB uniformCudaHipRtCallback(cudaStream_t /*queue*/, cudaError_t /*status*/, void *arg)
+                static void CUDART_CB
 #else
-                static void HIPRT_CB uniformCudaHipRtCallback(hipStream_t /*queue*/, hipError_t /*status*/, void *arg)
+                static void HIPRT_CB
 #endif
+                uniformCudaHipRtCallback(ALPAKA_API_PREFIX(Stream_t) /*queue*/, ALPAKA_API_PREFIX(Error_t) /*status*/, void *arg)
                 {
                     // explicitly copy the shared_ptr so that this method holds the state even when the executing thread has already finished.
                     const auto pCallbackSynchronizationData = reinterpret_cast<CallbackSynchronizationData*>(arg)->shared_from_this();
@@ -314,19 +292,12 @@ namespace alpaka
 #endif
                     auto pCallbackSynchronizationData = std::make_shared<CallbackSynchronizationData>();
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaStreamAddCallback(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue,
-                        uniformCudaHipRtCallback,
-                        pCallbackSynchronizationData.get(),
-                        0u));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipStreamAddCallback(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(StreamAddCallback)(
                         queue.m_spQueueImpl->m_UniformCudaHipQueue,
                         uniformCudaHipRtCallback,
                         pCallbackSynchronizationData.get(),
                         0u));
-#endif
+
                     // We start a new std::thread which stores the task to be executed.
                     // This circumvents the limitation that it is not possible to call CUDA/HIP methods within the CUDA/HIP callback thread.
                     // The CUDA/HIP thread signals the std::thread when it is ready to execute the task.
@@ -386,22 +357,16 @@ namespace alpaka
                 -> bool
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Query is allowed even for queues on non current device.
-                    cudaError_t ret = cudaSuccess;
-                    ALPAKA_CUDA_RT_CHECK_IGNORE(
-                        ret = cudaStreamQuery(queue.m_spQueueImpl->m_UniformCudaHipQueue),
-                        cudaErrorNotReady);
-                    return (ret == cudaSuccess);
-#elif BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
+
+#if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
                     return (queue.m_spQueueImpl->m_callees==0);
 #else
-                    hipError_t ret = hipSuccess;
-                    ALPAKA_HIP_RT_CHECK_IGNORE(
-                        ret = hipStreamQuery(
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue),
-                        hipErrorNotReady);
-                    return (ret == hipSuccess);
+                    // Query is allowed even for queues on non current device.
+                    ALPAKA_API_PREFIX(Error_t) ret = ALPAKA_API_PREFIX(Success);
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(
+                        ret = ALPAKA_API_PREFIX(StreamQuery)(queue.m_spQueueImpl->m_UniformCudaHipQueue),
+                        ALPAKA_API_PREFIX(ErrorNotReady));
+                    return (ret == ALPAKA_API_PREFIX(Success));
 #endif
                 }
             };
@@ -426,18 +391,13 @@ namespace alpaka
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Sync is allowed even for queues on non current device.
-                    ALPAKA_CUDA_RT_CHECK(cudaStreamSynchronize(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue));
-
-#elif BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
+#if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
                     while(queue.m_spQueueImpl->m_callees>0) {
                         std::this_thread::sleep_for(std::chrono::milliseconds(10u));
                     }
 #else
                     // Sync is allowed even for queues on non current device.
-                    ALPAKA_HIP_RT_CHECK( hipStreamSynchronize(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( ALPAKA_API_PREFIX(StreamSynchronize)(
                         queue.m_spQueueImpl->m_UniformCudaHipQueue));
 #endif
                 }
diff --git a/include/alpaka/queue/QueueUniformCudaHipRtNonBlocking.hpp b/include/alpaka/queue/QueueUniformCudaHipRtNonBlocking.hpp
index fd91ced2e6b0..8316bdfd4477 100644
--- a/include/alpaka/queue/QueueUniformCudaHipRtNonBlocking.hpp
+++ b/include/alpaka/queue/QueueUniformCudaHipRtNonBlocking.hpp
@@ -79,29 +79,15 @@ namespace alpaka
                         // Create the queue on the current device.
                         // NOTE: [cuda/hip]StreamNonBlocking is required to match the semantic implemented in the alpaka CPU queue.
                         // It would be too much work to implement implicit default queue synchronization on CPU.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
                         // Set the current device.
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 m_dev.m_iDevice));
 
-                        ALPAKA_CUDA_RT_CHECK(
-                           cudaStreamCreateWithFlags(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                           ALPAKA_API_PREFIX(StreamCreateWithFlags)(
                                &m_UniformCudaHipQueue,
-                               cudaStreamNonBlocking));
-#else
-                        // Set the current device.
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
-                                m_dev.m_iDevice));
-
-                        ALPAKA_HIP_RT_CHECK(
-                            hipStreamCreateWithFlags(
-                                &m_UniformCudaHipQueue,
-                                hipStreamNonBlocking));
-#endif
-
-
+                               ALPAKA_API_PREFIX(StreamNonBlocking)));
                     }
                     //-----------------------------------------------------------------------------
                     QueueUniformCudaHipRtNonBlockingImpl(QueueUniformCudaHipRtNonBlockingImpl const &) = delete;
@@ -116,44 +102,29 @@ namespace alpaka
                     {
                         ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-                        // Set the current device. \TODO: Is setting the current device before cudaStreamDestroy required?
+                        // Set the current device. \TODO: Is setting the current device before cuda/hip-StreamDestroy required?
 
                         // In case the device is still doing work in the queue when [cuda/hip]StreamDestroy() is called, the function will return immediately
                         // and the resources associated with queue will be released automatically once the device has completed all work in queue.
                         // -> No need to synchronize here.
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaSetDevice(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(SetDevice)(
                                 m_dev.m_iDevice));
 
-                        ALPAKA_CUDA_RT_CHECK(
-                            cudaStreamDestroy(
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
+                            ALPAKA_API_PREFIX(StreamDestroy)(
                                 m_UniformCudaHipQueue));
-#else
-
-                        ALPAKA_HIP_RT_CHECK(
-                            hipSetDevice(
-                                m_dev.m_iDevice));
-
-                        ALPAKA_HIP_RT_CHECK(
-                            hipStreamDestroy(
-                                m_UniformCudaHipQueue));
-#endif
                     }
 
                 public:
                     dev::DevUniformCudaHipRt const m_dev;   //!< The device this queue is bound to.
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    cudaStream_t m_UniformCudaHipQueue;
-#else
-                    hipStream_t m_UniformCudaHipQueue;
-    #if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
+           ALPAKA_API_PREFIX(Stream_t) m_UniformCudaHipQueue;
+#if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
                     int m_callees = 0;
                     std::mutex m_mutex;
-    #endif
 #endif
+
                 };
             }
         }
@@ -282,11 +253,11 @@ namespace alpaka
 
                 //-----------------------------------------------------------------------------
 #if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                static void CUDART_CB uniformCudaHipRtCallback(cudaStream_t /*queue*/, cudaError_t /*status*/, void *arg)
+                static void CUDART_CB
 #else
-                static void HIPRT_CB uniformCudaHipRtCallback(hipStream_t /*queue*/, hipError_t /*status*/, void *arg)
+                static void HIPRT_CB
 #endif
-
+                uniformCudaHipRtCallback(ALPAKA_API_PREFIX(Stream_t) /*queue*/, ALPAKA_API_PREFIX(Error_t) /*status*/, void *arg)
                 {
                     // explicitly copy the shared_ptr so that this method holds the state even when the executing thread has already finished.
                     const auto pCallbackSynchronizationData = reinterpret_cast<CallbackSynchronizationData*>(arg)->shared_from_this();
@@ -334,19 +305,12 @@ namespace alpaka
                     }
 #endif
                     auto pCallbackSynchronizationData = std::make_shared<CallbackSynchronizationData>();
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    ALPAKA_CUDA_RT_CHECK(cudaStreamAddCallback(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(ALPAKA_API_PREFIX(StreamAddCallback)(
                         queue.m_spQueueImpl->m_UniformCudaHipQueue,
                         uniformCudaHipRtCallback,
                         pCallbackSynchronizationData.get(),
                         0u));
-#else
-                    ALPAKA_HIP_RT_CHECK(hipStreamAddCallback(
-                        queue.m_spQueueImpl->m_UniformCudaHipQueue,
-                        uniformCudaHipRtCallback,
-                        pCallbackSynchronizationData.get(),
-                        0u));
-#endif
+
                     // We start a new std::thread which stores the task to be executed.
                     // This circumvents the limitation that it is not possible to call CUDA methods within the CUDA/HIP callback thread.
                     // The CUDA/HIP thread signals the std::thread when it is ready to execute the task.
@@ -406,25 +370,17 @@ namespace alpaka
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Query is allowed even for queues on non current device.
-                    cudaError_t ret = cudaSuccess;
-                    ALPAKA_CUDA_RT_CHECK_IGNORE(
-                        ret = cudaStreamQuery(
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue),
-                        cudaErrorNotReady);
-                    return (ret == cudaSuccess);
-#elif BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
+#if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
                     return (queue.m_spQueueImpl->m_callees==0);
 #else
 
                     // Query is allowed even for queues on non current device.
-                    hipError_t ret = hipSuccess;
-                    ALPAKA_HIP_RT_CHECK_IGNORE(
-                        ret = hipStreamQuery(
+                    ALPAKA_API_PREFIX(Error_t) ret = ALPAKA_API_PREFIX(Success);
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_IGNORE(
+                        ret = ALPAKA_API_PREFIX(StreamQuery)(
                             queue.m_spQueueImpl->m_UniformCudaHipQueue),
-                        hipErrorNotReady);
-                    return (ret == hipSuccess);
+                            ALPAKA_API_PREFIX(ErrorNotReady));
+                    return (ret == ALPAKA_API_PREFIX(Success));
 #endif
 
                 }
@@ -450,18 +406,13 @@ namespace alpaka
                 {
                     ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED)
-                    // Sync is allowed even for queues on non current device.
-                    ALPAKA_CUDA_RT_CHECK(
-                        cudaStreamSynchronize(
-                            queue.m_spQueueImpl->m_UniformCudaHipQueue));
-#elif BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
+#if BOOST_COMP_HCC  // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking)
                     while(queue.m_spQueueImpl->m_callees>0) {
                         std::this_thread::sleep_for(std::chrono::milliseconds(10u));
                     }
 #else
                     // Sync is allowed even for queues on non current device.
-                    ALPAKA_HIP_RT_CHECK( hipStreamSynchronize(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK( ALPAKA_API_PREFIX(StreamSynchronize)(
                             queue.m_spQueueImpl->m_UniformCudaHipQueue));
 #endif
                 }
diff --git a/test/common/include/alpaka/test/event/EventHostManualTrigger.hpp b/test/common/include/alpaka/test/event/EventHostManualTrigger.hpp
index da57d1efc2f6..604438ec12e6 100644
--- a/test/common/include/alpaka/test/event/EventHostManualTrigger.hpp
+++ b/test/common/include/alpaka/test/event/EventHostManualTrigger.hpp
@@ -402,16 +402,16 @@ namespace alpaka
                             ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                             // Set the current device.
-                            ALPAKA_CUDA_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 cudaSetDevice(
                                     m_dev.m_iDevice));
                             // Allocate the buffer on this device.
-                            ALPAKA_CUDA_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 cudaMalloc(
                                     &m_devMem,
                                     static_cast<size_t>(sizeof(int32_t))));
                             // Initiate the memory set.
-                            ALPAKA_CUDA_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 cudaMemset(
                                     m_devMem,
                                     static_cast<int>(0u),
@@ -431,11 +431,11 @@ namespace alpaka
                             ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                             // Set the current device.
-                            ALPAKA_CUDA_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 cudaSetDevice(
                                     m_dev.m_iDevice));
                             // Free the buffer.
-                            ALPAKA_CUDA_RT_CHECK(cudaFree(m_devMem));
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaFree(m_devMem));
                         }
 
                         //-----------------------------------------------------------------------------
@@ -445,11 +445,11 @@ namespace alpaka
                             m_bIsReady = true;
 
                             // Set the current device.
-                            ALPAKA_CUDA_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 cudaSetDevice(
                                     m_dev.m_iDevice));
                             // Initiate the memory set.
-                            ALPAKA_CUDA_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 cudaMemset(
                                     m_devMem,
                                     static_cast<int>(1u),
@@ -709,16 +709,16 @@ namespace alpaka
                             ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
                             // Set the current device.
-                            ALPAKA_HIP_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 hipSetDevice(
                                     m_dev.m_iDevice));
                             // Allocate the buffer on this device.
-                            ALPAKA_HIP_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 hipMalloc(
                                     &m_devMem,
                                     static_cast<size_t>(sizeof(int32_t))));
                             // Initiate the memory set.
-                            ALPAKA_HIP_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 hipMemset(
                                     m_devMem,
                                     static_cast<int>(0u),
@@ -737,11 +737,11 @@ namespace alpaka
                         {
                             ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;
 
-                            ALPAKA_HIP_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 hipSetDevice(
                                     m_dev.m_iDevice));
                             // Free the buffer.
-                            ALPAKA_HIP_RT_CHECK(hipFree(m_devMem));
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipFree(m_devMem));
                         }
 
                         //-----------------------------------------------------------------------------
@@ -751,11 +751,11 @@ namespace alpaka
                             m_bIsReady = true;
 
                             // Set the current device.
-                            ALPAKA_HIP_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 hipSetDevice(
                                     m_dev.m_iDevice));
                             // Initiate the memory set.
-                            ALPAKA_HIP_RT_CHECK(
+                            ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
                                 hipMemset(
                                     m_devMem,
                                     static_cast<int>(1u),
@@ -930,11 +930,11 @@ namespace alpaka
                     std::cerr << "[Workaround] polling of device-located value in stream, as hipStreamWaitValue32 is not available.\n";
 #endif
                     while(hostMem<0x01010101u) {
-                      ALPAKA_HIP_RT_CHECK(hipMemcpyDtoHAsync(&hostMem,
+                      ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMemcpyDtoHAsync(&hostMem,
                                                              reinterpret_cast<hipDeviceptr_t>(event.m_spEventImpl->m_devMem),
                                                              sizeof(int32_t),
                                                              queue.m_spQueueImpl->m_UniformCudaHipQueue));
-                      ALPAKA_HIP_RT_CHECK(hipStreamSynchronize(queue.m_spQueueImpl->m_UniformCudaHipQueue));
+                      ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamSynchronize(queue.m_spQueueImpl->m_UniformCudaHipQueue));
                     }
                 }
             };
@@ -971,7 +971,7 @@ namespace alpaka
                     //   the device build upon value-based HIP queue synchronization APIs such as
                     //   cuStreamWaitValue32() and cuStreamWriteValue32().
 #if BOOST_COMP_NVCC
-                    ALPAKA_HIP_RT_CHECK(hipCUResultTohipError(
+                    ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipCUResultTohipError(
                         cuStreamWaitValue32(
                             static_cast<CUstream>(queue.m_spQueueImpl->m_UniformCudaHipQueue),
                             reinterpret_cast<CUdeviceptr>(event.m_spEventImpl->m_devMem),
@@ -982,7 +982,7 @@ namespace alpaka
                     std::uint32_t hmem = 0;
                     do {
                         std::this_thread::sleep_for(std::chrono::milliseconds(10u));
-                        ALPAKA_HIP_RT_CHECK(hipMemcpy(&hmem, event.m_spEventImpl->m_devMem, sizeof(std::uint32_t), hipMemcpyDefault));
+                        ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipMemcpy(&hmem, event.m_spEventImpl->m_devMem, sizeof(std::uint32_t), hipMemcpyDefault));
                     } while(hmem < 0x01010101u);
 
 #endif