diff --git a/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt b/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt index 3ba4bddd39..89d38a136e 100644 --- a/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/deviceLib/CMakeLists.txt @@ -14,6 +14,18 @@ set(TEST_SRC syncthreadscount.cc syncthreadsor.cc Atomic_func.cc + DoublePrecisionIntrinsics.cc + DoublePrecisionMathDevice.cc + DoublePrecisionMathHost.cc + FloatMathPrecise.cc + IntegerIntrinsics.cc + SinglePrecisionIntrinsics.cc + SinglePrecisionMathDevice.cc + SinglePrecisionMathHost.cc + SimpleAtomicsTest.cc + hipTestAtomicAdd.cc + hipStdComplex.cc + hipTestClock.cc ) if(UNIX) diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc new file mode 100644 index 0000000000..6801decb9e --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc @@ -0,0 +1,81 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void double_precision_intrinsics() { +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dadd_rd(0.0, 1.0); +#endif + __dadd_rn(0.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dadd_ru(0.0, 1.0); + __dadd_rz(0.0, 1.0); + __ddiv_rd(0.0, 1.0); +#endif + __ddiv_rn(0.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __ddiv_ru(0.0, 1.0); + __ddiv_rz(0.0, 1.0); + __dmul_rd(1.0, 2.0); +#endif + __dmul_rn(1.0, 2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dmul_ru(1.0, 2.0); + __dmul_rz(1.0, 2.0); + __drcp_rd(2.0); +#endif + __drcp_rn(2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __drcp_ru(2.0); + __drcp_rz(2.0); + __dsqrt_rd(4.0); +#endif + __dsqrt_rn(4.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dsqrt_ru(4.0); + __dsqrt_rz(4.0); + __dsub_rd(2.0, 1.0); +#endif + __dsub_rn(2.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dsub_ru(2.0, 1.0); + __dsub_rz(2.0, 1.0); + __fma_rd(1.0, 2.0, 3.0); +#endif + __fma_rn(1.0, 2.0, 3.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fma_ru(1.0, 2.0, 3.0); + __fma_rz(1.0, 2.0, 3.0); +#endif +} + +__global__ void compileDoublePrecisionIntrinsics(int) { + double_precision_intrinsics(); +} + +TEST_CASE("Unit_DoublePrecisionIntrinsics") { + hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc new file mode 100644 index 0000000000..9c695a7b41 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc @@ -0,0 +1,133 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void double_precision_math_functions() { + int iX; + double fX, fY; + + acos(1.0); + acosh(1.0); + asin(0.0); + asinh(0.0); + atan(0.0); + atan2(0.0, 1.0); + atanh(0.0); + cbrt(0.0); + ceil(0.0); + copysign(1.0, -2.0); + cos(0.0); + cosh(0.0); + cospi(0.0); + cyl_bessel_i0(0.0); + cyl_bessel_i1(0.0); + erf(0.0); + erfc(0.0); + erfcinv(2.0); + erfcx(0.0); + erfinv(1.0); + exp(0.0); + exp10(0.0); + exp2(0.0); + expm1(0.0); + fabs(1.0); + fdim(1.0, 0.0); + floor(0.0); + fma(1.0, 2.0, 3.0); + fmax(0.0, 0.0); + fmin(0.0, 0.0); + fmod(0.0, 1.0); + frexp(0.0, &iX); + hypot(1.0, 0.0); + ilogb(1.0); + isfinite(0.0); + isinf(0.0); + isnan(0.0); + j0(0.0); + j1(0.0); + jn(-1.0, 1.0); + ldexp(0.0, 0); + lgamma(1.0); + llrint(0.0); + llround(0.0); + log(1.0); + log10(1.0); + log1p(-1.0); + log2(1.0); + logb(1.0); + lrint(0.0); + lround(0.0); + modf(0.0, &fX); + nan("1"); + nearbyint(0.0); + nextafter(0.0, 0.0); + fX = 1.0; + norm(1, &fX); + norm3d(1.0, 0.0, 0.0); + norm4d(1.0, 0.0, 0.0, 0.0); + normcdf(0.0); + normcdfinv(1.0); + pow(1.0, 0.0); + rcbrt(1.0); + remainder(2.0, 1.0); + remquo(1.0, 2.0, &iX); + rhypot(0.0, 1.0); + rint(1.0); + fX = 1.0; + rnorm(1, &fX); + rnorm3d(0.0, 0.0, 1.0); + rnorm4d(0.0, 0.0, 0.0, 1.0); + round(0.0); + rsqrt(1.0); + scalbln(0.0, 1); + scalbn(0.0, 1); + signbit(1.0); + sin(0.0); +#if HT_AMD + // NV A100 has a bug in sincos(), so temporarily disbale it + sincos(0.0, &fX, &fY); +#endif + sincospi(0.0, &fX, &fY); + sinh(0.0); + sinpi(0.0); + sqrt(0.0); + tan(0.0); + tanh(0.0); + tgamma(2.0); + trunc(0.0); + y0(1.0); + y1(1.0); + yn(1, 1.0); +} + +__global__ void compileDoublePrecisionMathOnDevice(int) { + double_precision_math_functions(); +} + +TEST_CASE("Unit_DoublePrecisionMathDevice") { + hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc new file mode 100644 index 0000000000..fd4e4bf238 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc @@ -0,0 +1,117 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__host__ static void double_precision_math_functions() { + int iX; + double fX, fY; + + acos(1.0); + acosh(1.0); + asin(0.0); + asinh(0.0); + atan(0.0); + atan2(0.0, 1.0); + atanh(0.0); + cbrt(0.0); + ceil(0.0); + copysign(1.0, -2.0); + cos(0.0); + cosh(0.0); + erf(0.0); + erfc(0.0); + exp(0.0); + #ifdef __unix__ + exp10(0.0); + #endif + exp2(0.0); + expm1(0.0); + fabs(1.0); + fdim(1.0, 0.0); + floor(0.0); + fma(1.0, 2.0, 3.0); + fmax(0.0, 0.0); + fmin(0.0, 0.0); + fmod(0.0, 1.0); + frexp(0.0, &iX); + hypot(1.0, 0.0); + ilogb(1.0); + std::isfinite(0.0); + std::isinf(0.0); + std::isnan(0.0); + #ifdef __unix__ + j0(0.0); + j1(0.0); + jn(-1.0, 1.0); + #elif _WIN64 + _j0(0.0); + _j1(0.0); + _jn(-1.0, 1.0); + #endif + ldexp(0.0, 0); + llrint(0.0); + llround(0.0); + log(1.0); + log10(1.0); + log1p(-1.0); + log2(1.0); + logb(1.0); + lrint(0.0); + lround(0.0); + modf(0.0, &fX); + nan("1"); + nearbyint(0.0); + fX = 1.0; + pow(1.0, 0.0); + remainder(2.0, 1.0); + remquo(1.0, 2.0, &iX); + rint(1.0); + round(0.0); + scalbln(0.0, 1); + scalbn(0.0, 1); + std::signbit(1.0); + sin(0.0); + #ifdef _unix__ + sincos(0.0, &fX, &fY); + #endif + sinh(0.0); + sqrt(0.0); + tan(0.0); + tanh(0.0); + tgamma(2.0); + trunc(0.0); + #ifdef __unix__ + y0(1.0); + y1(1.0); + yn(1, 1.0); + #elif _WIN64 + _y0(1.0); + _y1(1.0); + _yn(1, 1.0); + #endif +} + +TEST_CASE("Unit_DoublePrecisionMathHost") { + double_precision_math_functions(); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc b/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc new file mode 100644 index 0000000000..357f2ed918 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc @@ -0,0 +1,128 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +__device__ void FloatMathPrecise() { + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + fX = ceilf(0.0f); + fX = copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + cospif(0.0f); + cyl_bessel_i0f(0.0f); + cyl_bessel_i1f(0.0f); + erfcf(0.0f); + erfcinvf(2.0f); + erfcxf(0.0f); + erff(0.0f); + erfinvf(1.0f); + exp10f(0.0f); + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fX = fabsf(1.0f); + fdimf(1.0f, 0.0f); + fdividef(0.0f, 1.0f); + fX = floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fX = fmaxf(0.0f, 0.0f); + fX = fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + isfinite(0.0f); + fX = isinf(0.0f); + fX = isnan(0.0f); + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + ldexpf(0.0f, 0); + lgammaf(1.0f); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + modff(0.0f, &fX); + fX = nanf("1"); + fX = nearbyintf(0.0f); + nextafterf(0.0f, 0.0f); + norm3df(1.0f, 0.0f, 0.0f); + norm4df(1.0f, 0.0f, 0.0f, 0.0f); + normcdff(0.0f); + normcdfinvf(1.0f); + fX = 1.0f; + normf(1, &fX); + powf(1.0f, 0.0f); + rcbrtf(1.0f); + remainderf(2.0f, 1.0f); + remquof(1.0f, 2.0f, &iX); + rhypotf(0.0f, 1.0f); + fY = rintf(1.0f); + rnorm3df(0.0f, 0.0f, 1.0f); + rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); + fX = 1.0f; + rnormf(1, &fX); + fY = roundf(0.0f); + rsqrtf(1.0f); + scalblnf(0.0f, 1); + scalbnf(0.0f, 1); + signbit(1.0f); + sincosf(0.0f, &fX, &fY); + sincospif(0.0f, &fX, &fY); + sinf(0.0f); + sinhf(0.0f); + sinpif(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + fY = truncf(0.0f); + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); +} + +__global__ void CompileFloatMathPrecise(int) { + FloatMathPrecise(); +} + +TEST_CASE("Unit_FloatMathPrecise") { + hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc new file mode 100644 index 0000000000..68009651bd --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc @@ -0,0 +1,68 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void integer_intrinsics() { + __brev((unsigned int)10); + __brevll((uint64_t)10); + __byte_perm((unsigned int)0, (unsigned int)0, 0); + __clz(static_cast(10)); + __clzll((int64_t)10); + __ffs(static_cast(10)); + __ffsll((long long)(10)); // NOLINT + __funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __hadd(static_cast(1), static_cast(3)); + __mul24(static_cast(1), static_cast(2)); + __mul64hi((int64_t)1, (int64_t)2); + __mulhi(static_cast(1), static_cast(2)); + __popc((unsigned int)4); + __popcll((uint64_t)4); + int a = min(static_cast(4), static_cast(5)); + int b = max(static_cast(4), static_cast(5)); + __rhadd(static_cast(1), static_cast(2)); + __sad(static_cast(1), static_cast(2), 0); + __uhadd((unsigned int)1, (unsigned int)3); + __umul24((unsigned int)1, (unsigned int)2); + __umul64hi((uint64_t)1, (uint64_t)2); + __umulhi((unsigned int)1, (unsigned int)2); + __urhadd((unsigned int)1, (unsigned int)2); + __usad((unsigned int)1, (unsigned int)2, 0); + + assert(1); +} + +__global__ void compileIntegerIntrinsics(int) { + integer_intrinsics(); +} + +TEST_CASE("Unit_IntegerIntrinsics") { + hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc b/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc new file mode 100644 index 0000000000..db481e1e22 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc @@ -0,0 +1,298 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#include +#include + +using namespace std; +//////////////////////////////////////////////////////////////////////////////// +// Auto-Verification Code +//////////////////////////////////////////////////////////////////////////////// + +bool verifyBitwise(...) { + return true; +} + +template{}>::type* = nullptr> +bool verifyBitwise(T* gpuData, int len) { + // Atomic and + T val = 0xff; + for (int i = 0; i < len; ++i) { + // 9th element should be 1 + val &= (2 * i + 7); + } + REQUIRE(val == gpuData[8]); + + // atomic Or + val = 0; + for (int i = 0; i < len; ++i) { + // 10th element should be 0xff + val |= (1 << i); + } + REQUIRE(val == gpuData[9]); + + // atomic Xor + val = 0xff; + + for (int i = 0; i < len; ++i) { + // 11th element should be 0xff + val ^= i; + } + + REQUIRE(val == gpuData[10]); + return true; +} + +bool verifySub(...) { + return true; +} + +template< + typename T, + typename enable_if< + is_same{} || is_same{}>::type* = nullptr> +bool verifySub(T* gpuData, int len) { + T val = 0; + + for (int i = 0; i < len; ++i) { + val -= 10; + } + + REQUIRE(val == gpuData[1]); + return true; +} + +bool verifyExch(...) { + return true; +} + +template {}>::type* = nullptr> // NOLINT +bool computeExchExch(T* gpuData, int len) { + T val = 0; + + for (T i = 0; i < len; ++i) { + if (i == gpuData[2]) { + return true; + break; + } + } +} + +bool VerifyIntegral(...) { + return true; +} + +template{}>::type* = nullptr> +bool VerifyIntegral(T* gpuData, int len) { + // atomic Max + T val = 0; + for (T i = 0; i < len; ++i) { + // fourth element should be len-1 + val = max(val, i); + } + + REQUIRE(val == gpuData[3]); + + // atomic Min + val = 1 << 8; + + for (T i = 0; i < len; ++i) { + val = min(val, i); + } + + REQUIRE(val == gpuData[4]); + + // atomic Inc + int limit = 17; + val = 0; + + for (int i = 0; i < len; ++i) { + val = (val >= limit) ? 0 : val + 1; + } + + REQUIRE(val == gpuData[5]); + + // atomic Dec + limit = 137; + val = 0; + + for (int i = 0; i < len; ++i) { + val = ((val == 0) || (val > limit)) ? limit : val - 1; + } + + REQUIRE(val == gpuData[6]); + + // atomic CAS + for (T i = 0; i < len; ++i) { + // eighth element should be a member of [0, len) + if (i == gpuData[7]) { + return true; + break; + } + } + return verifyBitwise(gpuData, len) && verifySub(gpuData, len); +} + +template +bool verifyData(T* gpuData, int len) { + T val = 0; + for (int i = 0; i < len; ++i) { + val += 10; + } + + REQUIRE(val == gpuData[0]); + return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len); +} + +__device__ +void testKernelExch(...) {} + +template{}>::type* = nullptr> +__device__ +void testKernelExch(T* g_odata) { + // access thread id + const T tid = blockDim.x * blockIdx.x + threadIdx.x; + + // Atomic exchange + atomicExch(&g_odata[2], tid); +} + +__device__ +void testKernelSub(...) {} + +template< + typename T, + typename enable_if< + is_same{} || is_same{}>::type* = nullptr> +__device__ +void testKernelSub(T* g_odata) { + // Atomic subtraction (final should be 0) + atomicSub(&g_odata[1], 10); +} + +__device__ +void testKernelIntegral(...) {} + +template{}>::type* = nullptr> +__device__ +void testKernelIntegral(T* g_odata) { + // access thread id + const T tid = blockDim.x * blockIdx.x + threadIdx.x; + + // Atomic maximum + atomicMax(&g_odata[3], tid); + + // Atomic minimum + atomicMin(&g_odata[4], tid); + + // Atomic increment (modulo 17+1) + atomicInc((unsigned int*)&g_odata[5], 17); + + // Atomic decrement + atomicDec((unsigned int*)&g_odata[6], 137); + + // Atomic compare-and-swap + atomicCAS(&g_odata[7], tid - 1, tid); + + // Bitwise atomic instructions + + // Atomic AND + atomicAnd(&g_odata[8], 2 * tid + 7); + + // Atomic OR + atomicOr(&g_odata[9], 1 << tid); + + // Atomic XOR + atomicXor(&g_odata[10], tid); + + testKernelSub(g_odata); +} + +template +__global__ void testKernel(T* g_odata) { + // Atomic addition + atomicAdd(&g_odata[0], 10); + testKernelIntegral(g_odata); + testKernelExch(g_odata); +} + +template +static void runTest() { + bool testResult = true; + unsigned int numThreads = 256; + unsigned int numBlocks = 64; + unsigned int numData = 11; + unsigned int memSize = sizeof(T) * numData; + + // allocate mem for the result on host side + T* hOData = reinterpret_cast(malloc(memSize)); + + // initialize the memory + for (unsigned int i = 0; i < numData; i++) { + hOData[i] = 0; + } + // To make the AND and XOR tests generate something other than 0... + hOData[8] = hOData[10] = 0xff; + + // allocate device memory for result + T* dOData; + HIP_CHECK(hipMalloc(reinterpret_cast(&dOData), memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL( + testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + + // Compute reference solution + REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks)); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); +} + +TEST_CASE("Unit_SimpleAtomicsTest") { + SECTION("test for int") { + runTest(); + } + SECTION("test for unsigned int") { + runTest(); + } + SECTION("test for float") { + runTest(); + } + #if HT_AMD + SECTION("test for unsigned long long") { + runTest(); + } + SECTION("test for double") { + runTest(); + } + #endif +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc new file mode 100644 index 0000000000..fb8bebdaa5 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc @@ -0,0 +1,101 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void single_precision_intrinsics() { + float fX, fY; + + __cosf(0.0f); + __exp10f(0.0f); + __expf(0.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fadd_rd(0.0f, 1.0f); +#endif + __fadd_rn(0.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fadd_ru(0.0f, 1.0f); + __fadd_rz(0.0f, 1.0f); + __fdiv_rd(4.0f, 2.0f); +#endif + __fdiv_rn(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fdiv_ru(4.0f, 2.0f); + __fdiv_rz(4.0f, 2.0f); +#endif + __fdividef(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmaf_rd(1.0f, 2.0f, 3.0f); +#endif + __fmaf_rn(1.0f, 2.0f, 3.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmaf_ru(1.0f, 2.0f, 3.0f); + __fmaf_rz(1.0f, 2.0f, 3.0f); + __fmul_rd(1.0f, 2.0f); +#endif + __fmul_rn(1.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmul_ru(1.0f, 2.0f); + __fmul_rz(1.0f, 2.0f); + __frcp_rd(2.0f); +#endif + __frcp_rn(2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __frcp_ru(2.0f); + __frcp_rz(2.0f); +#endif + __frsqrt_rn(4.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsqrt_rd(4.0f); +#endif + __fsqrt_rn(4.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsqrt_ru(4.0f); + __fsqrt_rz(4.0f); + __fsub_rd(2.0f, 1.0f); +#endif + __fsub_rn(2.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsub_ru(2.0f, 1.0f); + __fsub_rz(2.0f, 1.0f); +#endif + __log10f(1.0f); + __log2f(1.0f); + __logf(1.0f); + __powf(1.0f, 0.0f); + __saturatef(0.1f); + __sincosf(0.0f, &fX, &fY); + __sinf(0.0f); + __tanf(0.0f); +} + +__global__ void compileSinglePrecisionIntrinsics(int) { + single_precision_intrinsics(); +} + +TEST_CASE("Unit_SinglePrecisionIntrinsics") { + hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc new file mode 100644 index 0000000000..e7bbdc180e --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc @@ -0,0 +1,123 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void single_precision_math_functions() { + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + ceilf(0.0f); + copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + cospif(0.0f); + erfcf(0.0f); + erfcinvf(2.0f); + erfcxf(0.0f); + erff(0.0f); + erfinvf(1.0f); + exp10f(0.0f); + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fabsf(1.0f); + fdimf(1.0f, 0.0f); + fdividef(0.0f, 1.0f); + floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fmaxf(0.0f, 0.0f); + fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + isfinite(0.0f); + isinf(0.0f); + isnan(0.0f); + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + ldexpf(0.0f, 0); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + nanf("1"); + nearbyintf(0.0f); + norm3df(1.0f, 0.0f, 0.0f); + norm4df(1.0f, 0.0f, 0.0f, 0.0f); + normcdff(0.0f); + normcdfinvf(1.0f); + fX = 1.0f; + normf(1, &fX); + powf(1.0f, 0.0f); + remainderf(2.0f, 1.0f); + rhypotf(0.0f, 1.0f); + rintf(1.0f); + rnorm3df(0.0f, 0.0f, 1.0f); + rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); + fX = 1.0f; + rnormf(1, &fX); + roundf(0.0f); + rsqrtf(1.0f); + signbit(1.0f); + sincosf(0.0f, &fX, &fY); + sincospif(0.0f, &fX, &fY); + sinf(0.0f); + sinhf(0.0f); + sinpif(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + truncf(0.0f); + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); +} + +__global__ void compileSinglePrecisionMathOnDevice(int) { + single_precision_math_functions(); +} + +TEST_CASE("Unit_SinglePrecisionMathDevice") { + hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc new file mode 100644 index 0000000000..85407560cb --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc @@ -0,0 +1,113 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__host__ static void single_precision_math_functions() { + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + ceilf(0.0f); + copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + erfcf(0.0f); + erff(0.0f); + #ifdef __unix__ + exp10f(0.0f); + #endif + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fabsf(1.0f); + fdimf(1.0f, 0.0f); + floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fmaxf(0.0f, 0.0f); + fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + std::isfinite(0.0f); + std::isinf(0.0f); + std::isnan(0.0f); + #ifdef __unix__ + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + #endif + ldexpf(0.0f, 0); + lgammaf(1.0f); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + modff(0.0f, &fX); + nanf("1"); + nearbyintf(0.0f); + powf(1.0f, 0.0f); + remainderf(2.0f, 1.0f); + remquof(1.0f, 2.0f, &iX); + rintf(1.0f); +#if HT_AMD + fX = 1.0f; +#endif + roundf(0.0f); + /// rsqrtf(1.0f); + scalblnf(0.0f, 1); + scalbnf(0.0f, 1); + std::signbit(1.0f); + #ifdef __unix__ + sincosf(0.0f, &fX, &fY); + #endif + sinf(0.0f); + sinhf(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + truncf(0.0f); + #ifdef __unix__ + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); + #endif +} + +TEST_CASE("Unit_SinglePrecisionMathHost") { + single_precision_math_functions(); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc b/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc new file mode 100644 index 0000000000..8cde8de8ed --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc @@ -0,0 +1,150 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +// Tolerance for error +const double tolerance = 1e-6; +const bool verbose = false; + +#define LEN 64 + +#define ALL_FUN \ + OP(add) \ + OP(sub) \ + OP(mul) \ + OP(div) \ + OP(abs) \ + OP(arg) \ + OP(sin) \ + OP(cos) + +#define OP(x) CK_##x, +enum CalcKind { + ALL_FUN +}; +#undef OP + +#define OP(x) case CK_##x: return #x; +std::string getName(enum CalcKind CK) { + switch (CK) { + ALL_FUN + } + return ""; // To prevent compile warning +} +#undef OP + +// Calculates function. +// If the function has one argument, B is ignored. +// If the function returns real number, converts it to a complex number. +#define ONE_ARG(func) \ + case CK_##func: \ + return std::complex(func(A)); + +template +__device__ __host__ std::complex calc(std::complex A, + std::complex B, + enum CalcKind CK) { + switch (CK) { + case CK_add: + return A + B; + case CK_sub: + return A - B; + case CK_mul: + return A * B; + case CK_div: + return A / B; + + ONE_ARG(abs) + ONE_ARG(arg) + ONE_ARG(sin) + ONE_ARG(cos) + } + return A; // To prevent compile warning +} + +template +__global__ void kernel(std::complex* A, + std::complex* B, std::complex* C, + enum CalcKind CK) { + int tx = threadIdx.x + blockIdx.x * blockDim.x; + C[tx] = calc(A[tx], B[tx], CK); +} + +template +void test() { + typedef std::complex ComplexT; + + ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D; + A = new ComplexT[LEN]; + B = new ComplexT[LEN]; + C = new ComplexT[LEN]; + D = new ComplexT[LEN]; + HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), sizeof(ComplexT)*LEN)); + HIP_CHECK(hipMalloc(reinterpret_cast(&Bd), sizeof(ComplexT)*LEN)); + HIP_CHECK(hipMalloc(reinterpret_cast(&Cd), sizeof(ComplexT)*LEN)); + + for (uint32_t i = 0; i < LEN; i++) { + A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f); + B[i] = A[i]; + C[i] = A[i]; + } + HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice)); + + // Run kernel for a calculation kind and verify by comparing with host + // calculation result. Returns false if fails. + auto test_fun = [&](enum CalcKind CK) { + hipLaunchKernelGGL(kernel, dim3(1), dim3(LEN), 0, 0, + Ad, Bd, Cd, CK); + HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost)); + for (int i = 0; i < LEN; i++) { + ComplexT Expected = calc(A[i], B[i], CK); + FloatT error = abs(C[i] - Expected); + if (abs(Expected) > tolerance) + error /= abs(Expected); + bool pass = error < tolerance; + } + return true; + }; + +#define OP(x) assert(test_fun(CK_##x)); + ALL_FUN +#undef OP + + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); + HIP_CHECK(hipFree(Cd)); + delete[] A; + delete[] B; + delete[] C; + delete[] D; +} + +TEST_CASE("Unit_StdComplex") { + SECTION("Test run with float") { + test(); + } + SECTION("Test run with double") { + test(); + } +} diff --git a/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc b/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc new file mode 100644 index 0000000000..4834965942 --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : + (TestCase 1):: + 1) Execute atomicAdd in multi threaded scenario by diverging the data across + multiple threads and validate the output at the end of all operations. + 2) Execute atomicAddNoRet in multi threaded scenario by diverging the data + across multiple threads and validate the output at the end of all operations. + (TestCase 2):: + 3) Execute atomicAdd API and validate the result. + 4) Execute atomicAddNoRet API and validate the result. + (TestCase 3):: + 5) atomicadd/NoRet negative scenarios (TBD). +*/ + +#include +#include +#include +/* + * Defines initial and increment values + */ +#define INCREMENT_VALUE 10 +#define INT_INITIAL_VALUE 10 +#define FLOAT_INITIAL_VALUE 10.50 +#define DOUBLE_INITIAL_VALUE 200.12 +#define LONG_INITIAL_VALUE 10000 +#define UNSIGNED_INITIAL_VALUE 20 + +#if HT_NVIDIA +// atomicAddNoRet is unavailable in cuda +template +__device__ void atomicAddNoRet(T* x, int y) { + atomicAdd(x, static_cast(y)); +} +#endif + +bool p_atomicNoRet = false; + +template +__global__ void atomicnoret_manywaves(T* C_d) { + size_t tid = (blockIdx.x * blockDim.x + threadIdx.x); + atomicAddNoRet(C_d, INCREMENT_VALUE); +} + +template +__global__ void atomic_manywaves(T* C_d) { + size_t tid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); + atomicAdd(C_d, INCREMENT_VALUE); +} + +template +__global__ void atomicnoret_simple(T* C_d) { + atomicAddNoRet(C_d, INCREMENT_VALUE); +} + +template +__global__ void atomic_simple(T* C_d) { + atomicAdd(C_d, INCREMENT_VALUE); +} + +template +bool atomictest_manywaves(const T& initial_val) { + unsigned int ThreadsperBlock = 10; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+ + (INCREMENT_VALUE*(ThreadsperBlock*numBlocks))); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +template +bool atomictestnoret_manywaves(const T& initial_val) { + unsigned int ThreadsperBlock = 10; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+ + (INCREMENT_VALUE*(ThreadsperBlock*numBlocks))); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +template +bool atomictest_simple(const T& initial_val) { + unsigned int ThreadsperBlock = 1; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomic_simple, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +template +bool atomictestnoret_simple(const T& initial_val) { + unsigned int ThreadsperBlock = 1; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +TEST_CASE("Unit_hipTestAtomicAdd") { + bool TestPassed = true; + + SECTION("atomic tests with many waves") { + REQUIRE(TestPassed == atomictest_manywaves(INT_INITIAL_VALUE)); + REQUIRE(TestPassed == + atomictest_manywaves(UNSIGNED_INITIAL_VALUE)); + REQUIRE(TestPassed == atomictest_manywaves(FLOAT_INITIAL_VALUE)); + #if HT_AMD + REQUIRE(TestPassed == + atomictest_manywaves(LONG_INITIAL_VALUE)); + REQUIRE(TestPassed == + atomictest_manywaves(DOUBLE_INITIAL_VALUE)); + #endif + } + SECTION("atomic tests with many waves and no return") { + REQUIRE(TestPassed == + atomictestnoret_manywaves(FLOAT_INITIAL_VALUE)); + } + SECTION("simple atomic tests") { + REQUIRE(TestPassed == atomictest_simple(INT_INITIAL_VALUE)); + REQUIRE(TestPassed == + atomictest_simple(UNSIGNED_INITIAL_VALUE)); + REQUIRE(TestPassed == atomictest_simple(FLOAT_INITIAL_VALUE)); + #if HT_AMD + REQUIRE(TestPassed == + atomictest_simple(LONG_INITIAL_VALUE)); + REQUIRE(TestPassed == atomictest_simple(DOUBLE_INITIAL_VALUE)); + #endif + } + SECTION("Simple atomic test with no return") { + REQUIRE(TestPassed == atomictestnoret_simple(FLOAT_INITIAL_VALUE)); + } +} diff --git a/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc b/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc new file mode 100644 index 0000000000..26dd29c76c --- /dev/null +++ b/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc @@ -0,0 +1,51 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#define LEN 512 +#define SIZE (LEN * sizeof(int64_t)) + +static __global__ void kernel1(int64_t* Ad) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tid] = clock() + clock64() + __clock() + __clock64(); +} + +static __global__ void kernel2(int64_t* Ad) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid]; +} + +TEST_CASE("Unit_hipTestClock") { + int64_t *A, *Ad; + A = new int64_t[LEN]; + for (unsigned i = 0; i < LEN; i++) { + A[i] = 0; + } + HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), SIZE)); + HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); + hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); + HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); + for (unsigned i = 0; i < LEN; i++) { + assert(0 != A[i]); + } +}