From a5cb324bd057c05f4556a4becbe0d83731edcc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Widera?= Date: Fri, 5 Aug 2022 13:42:01 +0200 Subject: [PATCH] workaround clang compiler errors Clang for HIP as problems if we have all atomic tests within one kernel and throws the compiler error: `error: stack size limit exceeded (155632) in _ZN6alpaka16uniform_cuda_hip6de`, therefore we split the tests into multiple kernel to workaround the issue. --- test/unit/atomic/src/AtomicTest.cpp | 112 +++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/test/unit/atomic/src/AtomicTest.cpp b/test/unit/atomic/src/AtomicTest.cpp index 4002cc13d1cc..9786e70037d6 100644 --- a/test/unit/atomic/src/AtomicTest.cpp +++ b/test/unit/atomic/src/AtomicTest.cpp @@ -184,20 +184,16 @@ class AtomicTestKernel { testAtomicHierarchies(acc, success, operandOrig); testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); testAtomicHierarchies(acc, success, operandOrig); testAtomicHierarchies(acc, success, operandOrig); testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); testAtomicCasHierarchies(acc, success, operandOrig); } }; + template class AtomicTestKernel>> { @@ -207,16 +203,51 @@ class AtomicTestKernel>> { testAtomicHierarchies(acc, success, operandOrig); testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); - testAtomicHierarchies(acc, success, operandOrig); testAtomicHierarchies(acc, success, operandOrig); - // Inc, Dec, Or, And, Xor are not supported on float/double types + // Inc, Dec are not supported on float/double types testAtomicCasHierarchies(acc, success, operandOrig); } }; +template +class AtomicCompareOperationsTestKernel +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T operandOrig) const -> void + { + testAtomicHierarchies(acc, success, operandOrig); + testAtomicHierarchies(acc, success, operandOrig); + } +}; + +template +class AtomicBitOperationsTestKernel +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T operandOrig) const -> void + { + testAtomicHierarchies(acc, success, operandOrig); + testAtomicHierarchies(acc, success, operandOrig); + testAtomicHierarchies(acc, success, operandOrig); + } +}; + +template +class AtomicBitOperationsTestKernel>> +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC auto operator()(TAcc const& /* acc */, bool* success, T /* operandOrig */) const -> void + { + // Do not perform bitwise atomic operations for floating point types + ALPAKA_CHECK(*success, true); + } +}; + #if(defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA) || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP) template @@ -237,6 +268,42 @@ class AtomicTestKernel< } }; +template +class AtomicBitOperationsTestKernel< + alpaka::AccGpuUniformCudaHipRt, + T, + std::enable_if_t && (sizeof(T) != 4u && sizeof(T) != 8u)>> +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC auto operator()( + alpaka::AccGpuUniformCudaHipRt const& /* acc */, + bool* success, + T /* operandOrig */) const -> void + { + // Do not perform bitwise atomic operations for floating point on CUDA/HIP. + ALPAKA_CHECK(*success, true); + } +}; + +template +class AtomicCompareOperationsTestKernel< + alpaka::AccGpuUniformCudaHipRt, + T, + std::enable_if_t> +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC auto operator()( + alpaka::AccGpuUniformCudaHipRt const& /* acc */, + bool* success, + T /* operandOrig */) const -> void + { + // All other types are not supported by CUDA/HIP atomic operations. + ALPAKA_CHECK(*success, true); + } +}; + #endif #if defined(ALPAKA_ACC_ANY_BT_OACC_ENABLED) @@ -254,6 +321,22 @@ class AtomicTestKernel, T, std::enable_if_t +class AtomicCompareOperationsTestKernel< + alpaka::AccOacc, + T, + std::enable_if_t> +{ +public: + ALPAKA_NO_HOST_ACC_WARNING + ALPAKA_FN_ACC auto operator()(alpaka::AccOacc const& /* acc */, bool* success, T /* operandOrig */) + const -> void + { + // All other types are not supported by OpenACC atomic operations. + ALPAKA_CHECK(*success, true); + } +}; + #endif template @@ -266,10 +349,19 @@ struct TestAtomicOperations alpaka::test::KernelExecutionFixture fixture(alpaka::Vec::ones()); - AtomicTestKernel kernel; - T value = static_cast(32); + + // The tests are split into multiple kernel to avoid breaking the maximum kernel size. + // clang (HIP) e.g. shows compile error: 'error: stack size limit exceeded (155632) in + // _ZN6alpaka16uniform_cuda_hip6de' + AtomicTestKernel kernel; REQUIRE(fixture(kernel, value)); + + AtomicBitOperationsTestKernel kernelBitOps; + REQUIRE(fixture(kernelBitOps, value)); + + AtomicCompareOperationsTestKernel kernelCompareOps; + REQUIRE(fixture(kernelCompareOps, value)); } };