Skip to content

Commit

Permalink
workaround clang compiler errors
Browse files Browse the repository at this point in the history
Clang for HIP as problems if we have all atomic tests within one kernel
and throws the compiler error: `error: stack size limit exceeded (155632) in _ZN6alpaka16uniform_cuda_hip6de`, therefore we split the tests into multiple kernel to workaround the issue.
  • Loading branch information
psychocoderHPC committed Aug 5, 2022
1 parent 48bca6a commit a5cb324
Showing 1 changed file with 102 additions and 10 deletions.
112 changes: 102 additions & 10 deletions test/unit/atomic/src/AtomicTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,20 +184,16 @@ class AtomicTestKernel
{
testAtomicHierarchies<Add>(acc, success, operandOrig);
testAtomicHierarchies<Sub>(acc, success, operandOrig);
testAtomicHierarchies<Min>(acc, success, operandOrig);
testAtomicHierarchies<Max>(acc, success, operandOrig);
testAtomicHierarchies<Exch>(acc, success, operandOrig);

testAtomicHierarchies<Inc>(acc, success, operandOrig);
testAtomicHierarchies<Dec>(acc, success, operandOrig);
testAtomicHierarchies<And>(acc, success, operandOrig);
testAtomicHierarchies<Or>(acc, success, operandOrig);
testAtomicHierarchies<Xor>(acc, success, operandOrig);

testAtomicCasHierarchies<alpaka::hierarchy::Threads>(acc, success, operandOrig);
}
};


template<typename TAcc, typename T>
class AtomicTestKernel<TAcc, T, std::enable_if_t<std::is_floating_point_v<T>>>
{
Expand All @@ -207,16 +203,51 @@ class AtomicTestKernel<TAcc, T, std::enable_if_t<std::is_floating_point_v<T>>>
{
testAtomicHierarchies<Add>(acc, success, operandOrig);
testAtomicHierarchies<Sub>(acc, success, operandOrig);
testAtomicHierarchies<Min>(acc, success, operandOrig);
testAtomicHierarchies<Max>(acc, success, operandOrig);
testAtomicHierarchies<Exch>(acc, success, operandOrig);

// Inc, Dec, Or, And, Xor are not supported on float/double types
// Inc, Dec are not supported on float/double types

testAtomicCasHierarchies<alpaka::hierarchy::Threads>(acc, success, operandOrig);
}
};

template<typename TAcc, typename T, typename Sfinae = void>
class AtomicCompareOperationsTestKernel
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T operandOrig) const -> void
{
testAtomicHierarchies<Min>(acc, success, operandOrig);
testAtomicHierarchies<Max>(acc, success, operandOrig);
}
};

template<typename TAcc, typename T, typename Sfinae = void>
class AtomicBitOperationsTestKernel
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T operandOrig) const -> void
{
testAtomicHierarchies<And>(acc, success, operandOrig);
testAtomicHierarchies<Or>(acc, success, operandOrig);
testAtomicHierarchies<Xor>(acc, success, operandOrig);
}
};

template<typename TAcc, typename T>
class AtomicBitOperationsTestKernel<TAcc, T, std::enable_if_t<std::is_floating_point_v<T>>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(TAcc const& /* acc */, bool* success, T /* operandOrig */) const -> void
{
// Do not perform bitwise atomic operations for floating point types
ALPAKA_CHECK(*success, true);
}
};

#if(defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && BOOST_LANG_CUDA) || (defined(ALPAKA_ACC_GPU_HIP_ENABLED) && BOOST_LANG_HIP)

template<typename TApi, typename TDim, typename TIdx, typename T>
Expand All @@ -237,6 +268,42 @@ class AtomicTestKernel<
}
};

template<typename TApi, typename TDim, typename TIdx, typename T>
class AtomicBitOperationsTestKernel<
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx>,
T,
std::enable_if_t<!std::is_floating_point_v<T> && (sizeof(T) != 4u && sizeof(T) != 8u)>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx> const& /* acc */,
bool* success,
T /* operandOrig */) const -> void
{
// Do not perform bitwise atomic operations for floating point on CUDA/HIP.
ALPAKA_CHECK(*success, true);
}
};

template<typename TApi, typename TDim, typename TIdx, typename T>
class AtomicCompareOperationsTestKernel<
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx>,
T,
std::enable_if_t<sizeof(T) != 4u && sizeof(T) != 8u>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(
alpaka::AccGpuUniformCudaHipRt<TApi, TDim, TIdx> const& /* acc */,
bool* success,
T /* operandOrig */) const -> void
{
// All other types are not supported by CUDA/HIP atomic operations.
ALPAKA_CHECK(*success, true);
}
};

#endif

#if defined(ALPAKA_ACC_ANY_BT_OACC_ENABLED)
Expand All @@ -254,6 +321,22 @@ class AtomicTestKernel<alpaka::AccOacc<TDim, TIdx>, T, std::enable_if_t<sizeof(T
}
};

template<typename TDim, typename TIdx, typename T>
class AtomicCompareOperationsTestKernel<
alpaka::AccOacc<TDim, TIdx>,
T,
std::enable_if_t<sizeof(T) != 4u && sizeof(T) != 8u>>
{
public:
ALPAKA_NO_HOST_ACC_WARNING
ALPAKA_FN_ACC auto operator()(alpaka::AccOacc<TDim, TIdx> const& /* acc */, bool* success, T /* operandOrig */)
const -> void
{
// All other types are not supported by OpenACC atomic operations.
ALPAKA_CHECK(*success, true);
}
};

#endif

template<typename TAcc, typename T>
Expand All @@ -266,10 +349,19 @@ struct TestAtomicOperations

alpaka::test::KernelExecutionFixture<TAcc> fixture(alpaka::Vec<Dim, Idx>::ones());

AtomicTestKernel<TAcc, T> kernel;

T value = static_cast<T>(32);

// The tests are split into multiple kernel to avoid breaking the maximum kernel size.
// clang (HIP) e.g. shows compile error: 'error: stack size limit exceeded (155632) in
// _ZN6alpaka16uniform_cuda_hip6de'
AtomicTestKernel<TAcc, T> kernel;
REQUIRE(fixture(kernel, value));

AtomicBitOperationsTestKernel<TAcc, T> kernelBitOps;
REQUIRE(fixture(kernelBitOps, value));

AtomicCompareOperationsTestKernel<TAcc, T> kernelCompareOps;
REQUIRE(fixture(kernelCompareOps, value));
}
};

Expand Down

0 comments on commit a5cb324

Please sign in to comment.