From 9b7645575f180ea9ea201e495960f322b93cf51f Mon Sep 17 00:00:00 2001 From: Julia Jiang <56359287+jujiang-del@users.noreply.github.com> Date: Thu, 25 Nov 2021 04:40:31 -0500 Subject: [PATCH] SWDEV-306204 - remove external reference in hip_bugs.md (#2394) Change-Id: I7425eba1cd20a2f842479d0eb050f97e21e457c1 --- docs/markdown/hip_bugs.md | 213 ++++++++++++++++++++++++++------------ 1 file changed, 149 insertions(+), 64 deletions(-) diff --git a/docs/markdown/hip_bugs.md b/docs/markdown/hip_bugs.md index 46dfa6d0d2..6f872ff89e 100644 --- a/docs/markdown/hip_bugs.md +++ b/docs/markdown/hip_bugs.md @@ -13,75 +13,160 @@ The language specification for HIP and CUDA forbid calling a differences in the strictness of this restriction, with HIP exhibiting a tighter adherence to the specification and thus less tolerant of infringing code. The solution is to ensure that all functions which are called in a -`__device__` context are correctly annotated to reflect it. An interesting case -where these differences emerge is shown below. This relies on a the common -[C++ Member Detector idiom][1], as it would be implemented pre C++11): +`__device__` context are correctly annotated to reflect it. -```c++ -#include +The following is an example of codes using the specification, +``` +#include #include +#include +#include "test_common.h" -struct aye { bool a[1]; }; -struct nay { bool a[2]; }; +static std::random_device dev; +static std::mt19937 rng(dev()); -// Dual restriction is necessary in HIP if the detector is to work for -// __device__ contexts as well as __host__ ones. NVCC is less strict. -template -__host__ __device__ -const T& cref_t(); - -template -struct Has_call_operator { - // Dual restriction is necessary in HIP if the detector is to work for - // __device__ contexts as well as __host__ ones. NVCC is less strict. - template - __host__ __device__ - static - aye test( - C const *, - typename std::enable_if< - (sizeof(cref_t().operator()()) > 0)>::type* = nullptr); - static - nay test(...); - - enum { value = sizeof(test(static_cast(0))) == sizeof(aye) }; -}; - -template::value> -struct Wrapper { - template - V f() const { return T{1}; } -}; - - -template -struct Wrapper { - template - V f() const { return T{10}; } -}; - -// This specialisation will yield a compile-time error, if selected. -template -struct Wrapper {}; - -template -struct Functor; - -template<> struct Functor { - __device__ - float operator()() const { return 42.0f; } -}; - -__device__ -void this_will_not_compile_if_detector_is_not_marked_device() -{ - float f = Wrapper>().f(); +template +__host__ __device__ inline constexpr int count() { + return sizeof(T) / sizeof(M); } -__host__ -void this_will_not_compile_if_detector_is_marked_device_only() -{ - float f = Wrapper>().f(); +inline float getRandomFloat(float min = 10, float max = 100) { + std::uniform_real_distribution gen(min, max); + return gen(rng); +} + +template +void fillMatrix(T* a, int size) { + for (int i = 0; i < size; i++) { + T t; + t.x = getRandomFloat(); + if constexpr (count() >= 2) t.y = getRandomFloat(); + if constexpr (count() >= 3) t.z = getRandomFloat(); + if constexpr (count() >= 4) t.w = getRandomFloat(); + + a[i] = t; + } +} + +// Test operations +template +__host__ __device__ void testOperations(T& a, T& b) { + a.x += b.x; + a.x++; + b.x++; + if constexpr (count() >= 2) { + a.y = b.x; + a.x = b.y; + } + if constexpr (count() >= 3) { + if (a.x > 0) b.x /= a.x; + a.x *= b.z; + a.y--; + } + if constexpr (count() >= 4) { + b.w = a.x; + a.w += (-b.y); + } +} + +template +__global__ void testOperationsGPU(T* d_a, T* d_b, int size) { + int id = threadIdx.x; + if (id > size) return; + T &a = d_a[id]; + T &b = d_b[id]; + + testOperations(a, b); +} + + +template +void dcopy(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + a[i] = b[i]; + } +} + +template +bool isEqual(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + if (a[i] != b[i]) { + return false; + } + } + return true; +} + +// Main function that tests type +// T = what you want to test +// D = pack of 1 i.e. float1 int1 +template +void testType(int msize) { + T *fa, *fb, *fc, *h_fa, *h_fb; + fa = new T[msize]; + fb = new T[msize]; + fc = new T[msize]; + h_fa = new T[msize]; + h_fb = new T[msize]; + + T *d_fa, *d_fb; + + constexpr int c = count(); + + if (c <= 0 || c >= 5) { + failed("Invalid Size\n"); + } + + fillMatrix(fa, msize); + dcopy(fb, fa, msize); + dcopy(h_fa, fa, msize); + dcopy(h_fb, fa, msize); + for (int i = 0; i < msize; i++) testOperations(h_fa[i], h_fb[i]); + + hipMalloc(&d_fa, sizeof(T) * msize); + hipMalloc(&d_fb, sizeof(T) * msize); + + hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); + hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice); + + auto kernel = testOperationsGPU; + hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); + + hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost); + + bool pass = true; + if (!isEqual(h_fa, fc, msize)) { + pass = false; + } + + delete[] fa; + delete[] fb; + delete[] fc; + delete[] h_fa; + delete[] h_fb; + hipFree(d_fa); + hipFree(d_fb); + + if (!pass) { + failed("Failed"); + } +} + +int main() { + const int msize = 100; + // double + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // floats + testType(msize); + testType(msize); + testType(msize); + testType(msize); + ... + passed(); } ``` -[1]: https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Member_Detector +For more details for the complete program, please refer to HIP test application at the link, https://github.com/ROCm-Developer-Tools/HIP/blob/main/tests/src/deviceLib/hip_floatnTM.cpp