From 13c5e7a3e474beeb4de1eba1e5587f4aa31ba92e Mon Sep 17 00:00:00 2001 From: Julia Jiang Date: Wed, 10 Jul 2024 16:06:00 -0400 Subject: [PATCH] SWDEV-472723 - Correct file format and remove trailing spaces Change-Id: Ie40c763e9391fa36d6c890cd0a171659a1502a83 [ROCm/hip-tests commit: 5d042c80fa3d79b62cc25ad0bedc2813e99571ff] --- projects/hip-tests/.gitattributes | 20 + projects/hip-tests/catch/README.md | 2 +- .../catch/unit/deviceLib/Atomic_func.cc | 238 +- .../deviceLib/DoublePrecisionIntrinsics.cc | 162 +- .../deviceLib/DoublePrecisionMathDevice.cc | 266 +- .../unit/deviceLib/DoublePrecisionMathHost.cc | 234 +- .../catch/unit/deviceLib/FloatMathPrecise.cc | 256 +- .../catch/unit/deviceLib/IntegerIntrinsics.cc | 136 +- .../catch/unit/deviceLib/SimpleAtomicsTest.cc | 596 +- .../deviceLib/SinglePrecisionIntrinsics.cc | 202 +- .../deviceLib/SinglePrecisionMathDevice.cc | 246 +- .../unit/deviceLib/SinglePrecisionMathHost.cc | 226 +- .../catch/unit/deviceLib/hipStdComplex.cc | 306 +- .../catch/unit/deviceLib/hipTestAtomicAdd.cc | 440 +- .../catch/unit/deviceLib/hipTestClock.cc | 102 +- .../unit/errorHandling/hipDrvGetErrorName.cc | 176 +- .../errorHandling/hipDrvGetErrorString.cc | 176 +- .../hip-tests/catch/unit/g++/CMakeLists.txt | 38 +- .../hip-tests/catch/unit/g++/hipMalloc.cc | 108 +- projects/hip-tests/catch/unit/g++/hipMalloc.h | 42 +- .../hip-tests/catch/unit/gcc/CMakeLists.txt | 56 +- projects/hip-tests/catch/unit/gcc/gccTest.cc | 128 +- .../catch/unit/kernel/hipDynamicShared.cc | 352 +- .../catch/unit/kernel/hipDynamicShared2.cc | 188 +- .../catch/unit/kernel/hipEmptyKernel.cc | 118 +- .../unit/kernel/hipExtLaunchKernelGGL.cc | 276 +- .../catch/unit/kernel/hipGridLaunch.cc | 244 +- .../unit/kernel/hipLanguageExtensions.cc | 222 +- .../catch/unit/kernel/hipLaunchParm.cc | 2038 ++--- .../catch/unit/kernel/hipLaunchParmFunctor.cc | 928 +-- .../hipSVMTestSharedAddressSpaceFineGrain.cpp | 2 +- .../hip-tests/catch/unit/p2p/CMakeLists.txt | 48 +- .../unit/p2p/hipP2pLinkTypeAndHopFunc.cc | 712 +- .../catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h | 220 +- .../hip-tests/catch/unit/rtc/RtcFunctions.cpp | 6600 ++++++++--------- .../hip-tests/catch/unit/rtc/RtcUtility.cpp | 1016 +-- .../catch/unit/rtc/headers/RtcFunctions.h | 356 +- .../catch/unit/rtc/headers/RtcKernels.h | 326 +- .../catch/unit/rtc/headers/RtcUtility.h | 106 +- .../catch/unit/synchronization/CMakeLists.txt | 50 +- .../cache_coherency_cpu_gpu.cc | 564 +- .../cache_coherency_gpu_gpu.cc | 588 +- .../unit/synchronization/copy_coherency.cc | 680 +- .../hip-tests/catch/unit/warp/hipShflTests.cc | 364 +- .../catch/unit/warp/hipShflUpDownTest.cc | 482 +- .../perftests/memory/hipPerfMemset.cpp | 874 +-- projects/hip-tests/samples/README.md | 2 +- 47 files changed, 10766 insertions(+), 10746 deletions(-) create mode 100644 projects/hip-tests/.gitattributes diff --git a/projects/hip-tests/.gitattributes b/projects/hip-tests/.gitattributes new file mode 100644 index 0000000000..d5175f2f9c --- /dev/null +++ b/projects/hip-tests/.gitattributes @@ -0,0 +1,20 @@ +# Set the default behavior, in case people don't have core.autolf set. +* text=auto + +# Explicitly declare text files you want to always be normalized and converted +# to have LF line endings on checkout. +*.c text eol=lf +*.cpp text eol=lf +*.cc text eol=lf +*.h text eol=lf +*.hpp text eol=lf +*.txt text eol=lf + +# Define files to support auto-remove trailing white space +# Need to run the command below, before add modified file(s) to the staging area +# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"' +*.cpp filter=trimspace +*.c filter=trimspace +*.h filter=trimspacecpp +*.hpp filter=trimspace +*.md filter=trimspace \ No newline at end of file diff --git a/projects/hip-tests/catch/README.md b/projects/hip-tests/catch/README.md index 22a4218766..3a7dec6e66 100644 --- a/projects/hip-tests/catch/README.md +++ b/projects/hip-tests/catch/README.md @@ -180,7 +180,7 @@ hipcc -I/tests/catch/include /tests ## Debugging support Catch2 allows multiple ways in which you can debug the test case. - `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger) -- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure +- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure - User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case. - User can also mention filename.cc:__LineNumber__ to break into a test case via gdb. diff --git a/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc b/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc index 4a28839c6f..9a16e82d49 100644 --- a/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc +++ b/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc @@ -1,119 +1,119 @@ -/* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - -// Test case to validate atomicInc and atomicDec functions. -// if TestToRun=1, then atomicInc function will be tested and validated -// if TestToRun=2, then atomicDec function will be tested and validated. - - -// kernel function for atomicInc -static __global__ void AtomicCheckInc(int* g_ptr) { - atomicInc(reinterpret_cast(&g_ptr[0]), 17); -} - -// kernel function for atomicDec -static __global__ void AtomicCheckDec(int* g_ptr) { - atomicDec(reinterpret_cast(&g_ptr[0]), 25); -} - -// verify results for atomicInc -static int verifyResultInc(int value) { - int limit = 17; - value = (value >= limit) ? 0 : value + 1; - return value; -} - -// verify results for atomicDec -static int verifyResultDec(int value) { - int limit = 25; - value = ((value == 0) || (value > limit)) ? limit : value - 1; - return value; -} - -// common fuction to launch atomic functions kernel. -static void launchAtomicFunction(int *Hptr, int val, int TestToRun) { - unsigned int memSize = sizeof(int) * 1; - int *dptr{nullptr}; - // allocate device memory - HIP_CHECK(hipMalloc(reinterpret_cast(&dptr), memSize)); - // copy host memory to device - HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice)); - // launch kernel function - if (TestToRun == 1) { - AtomicCheckInc<<<1, 1>>>(dptr); - } else if (TestToRun == 2) { - AtomicCheckDec<<<1, 1>>>(dptr); - } - // copy back from device to host - HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost)); - // verify the results. - if (TestToRun == 1) { - int result = verifyResultInc(val); - REQUIRE(result == Hptr[0]); - } else if (TestToRun == 2) { - int result = verifyResultDec(val); - REQUIRE(result == Hptr[0]); - } - // Cleanup memory - HIP_CHECK(hipFree(dptr)); -} - -TEST_CASE("Unit_AtomicFunctions_Inc") { - int *Hptr{nullptr}; - int val; - // Allocate Host memory - Hptr = reinterpret_cast(malloc(sizeof(int))); - SECTION("Test case when value is lesser than limit") { - val = Hptr[0] = 10; - launchAtomicFunction(Hptr, val, 1); - } - SECTION("Test case when value is greater than limit") { - val = Hptr[0] = 20; - launchAtomicFunction(Hptr, val, 1); - } - SECTION("Test case when value is equal to the limit") { - val = Hptr[0] = 17; - launchAtomicFunction(Hptr, val, 1); - } - free(Hptr); -} - -TEST_CASE("Unit_AtomicFunctions_Dec") { - int *Hptr{nullptr}; - int val; - // Allocate Host memory - Hptr = reinterpret_cast(malloc(sizeof(int))); - SECTION("Test case when value is less than limit") { - val = Hptr[0] = 4; - launchAtomicFunction(Hptr, val, 2); - } - SECTION("Test case when value is greater than limit") { - val = Hptr[0] = 31; - launchAtomicFunction(Hptr, val, 2); - } - SECTION("Test case when value is equal to the limit") { - val = Hptr[0] = 25; - launchAtomicFunction(Hptr, val, 2); - } - free(Hptr); -} +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +// Test case to validate atomicInc and atomicDec functions. +// if TestToRun=1, then atomicInc function will be tested and validated +// if TestToRun=2, then atomicDec function will be tested and validated. + + +// kernel function for atomicInc +static __global__ void AtomicCheckInc(int* g_ptr) { + atomicInc(reinterpret_cast(&g_ptr[0]), 17); +} + +// kernel function for atomicDec +static __global__ void AtomicCheckDec(int* g_ptr) { + atomicDec(reinterpret_cast(&g_ptr[0]), 25); +} + +// verify results for atomicInc +static int verifyResultInc(int value) { + int limit = 17; + value = (value >= limit) ? 0 : value + 1; + return value; +} + +// verify results for atomicDec +static int verifyResultDec(int value) { + int limit = 25; + value = ((value == 0) || (value > limit)) ? limit : value - 1; + return value; +} + +// common fuction to launch atomic functions kernel. +static void launchAtomicFunction(int *Hptr, int val, int TestToRun) { + unsigned int memSize = sizeof(int) * 1; + int *dptr{nullptr}; + // allocate device memory + HIP_CHECK(hipMalloc(reinterpret_cast(&dptr), memSize)); + // copy host memory to device + HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice)); + // launch kernel function + if (TestToRun == 1) { + AtomicCheckInc<<<1, 1>>>(dptr); + } else if (TestToRun == 2) { + AtomicCheckDec<<<1, 1>>>(dptr); + } + // copy back from device to host + HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost)); + // verify the results. + if (TestToRun == 1) { + int result = verifyResultInc(val); + REQUIRE(result == Hptr[0]); + } else if (TestToRun == 2) { + int result = verifyResultDec(val); + REQUIRE(result == Hptr[0]); + } + // Cleanup memory + HIP_CHECK(hipFree(dptr)); +} + +TEST_CASE("Unit_AtomicFunctions_Inc") { + int *Hptr{nullptr}; + int val; + // Allocate Host memory + Hptr = reinterpret_cast(malloc(sizeof(int))); + SECTION("Test case when value is lesser than limit") { + val = Hptr[0] = 10; + launchAtomicFunction(Hptr, val, 1); + } + SECTION("Test case when value is greater than limit") { + val = Hptr[0] = 20; + launchAtomicFunction(Hptr, val, 1); + } + SECTION("Test case when value is equal to the limit") { + val = Hptr[0] = 17; + launchAtomicFunction(Hptr, val, 1); + } + free(Hptr); +} + +TEST_CASE("Unit_AtomicFunctions_Dec") { + int *Hptr{nullptr}; + int val; + // Allocate Host memory + Hptr = reinterpret_cast(malloc(sizeof(int))); + SECTION("Test case when value is less than limit") { + val = Hptr[0] = 4; + launchAtomicFunction(Hptr, val, 2); + } + SECTION("Test case when value is greater than limit") { + val = Hptr[0] = 31; + launchAtomicFunction(Hptr, val, 2); + } + SECTION("Test case when value is equal to the limit") { + val = Hptr[0] = 25; + launchAtomicFunction(Hptr, val, 2); + } + free(Hptr); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc index 6801decb9e..207e105d2c 100644 --- a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc +++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc @@ -1,81 +1,81 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void double_precision_intrinsics() { -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dadd_rd(0.0, 1.0); -#endif - __dadd_rn(0.0, 1.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dadd_ru(0.0, 1.0); - __dadd_rz(0.0, 1.0); - __ddiv_rd(0.0, 1.0); -#endif - __ddiv_rn(0.0, 1.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __ddiv_ru(0.0, 1.0); - __ddiv_rz(0.0, 1.0); - __dmul_rd(1.0, 2.0); -#endif - __dmul_rn(1.0, 2.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dmul_ru(1.0, 2.0); - __dmul_rz(1.0, 2.0); - __drcp_rd(2.0); -#endif - __drcp_rn(2.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __drcp_ru(2.0); - __drcp_rz(2.0); - __dsqrt_rd(4.0); -#endif - __dsqrt_rn(4.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dsqrt_ru(4.0); - __dsqrt_rz(4.0); - __dsub_rd(2.0, 1.0); -#endif - __dsub_rn(2.0, 1.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __dsub_ru(2.0, 1.0); - __dsub_rz(2.0, 1.0); - __fma_rd(1.0, 2.0, 3.0); -#endif - __fma_rn(1.0, 2.0, 3.0); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fma_ru(1.0, 2.0, 3.0); - __fma_rz(1.0, 2.0, 3.0); -#endif -} - -__global__ void compileDoublePrecisionIntrinsics(int) { - double_precision_intrinsics(); -} - -TEST_CASE("Unit_DoublePrecisionIntrinsics") { - hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1), - dim3(1, 1, 1), 0, 0, 1); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void double_precision_intrinsics() { +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dadd_rd(0.0, 1.0); +#endif + __dadd_rn(0.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dadd_ru(0.0, 1.0); + __dadd_rz(0.0, 1.0); + __ddiv_rd(0.0, 1.0); +#endif + __ddiv_rn(0.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __ddiv_ru(0.0, 1.0); + __ddiv_rz(0.0, 1.0); + __dmul_rd(1.0, 2.0); +#endif + __dmul_rn(1.0, 2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dmul_ru(1.0, 2.0); + __dmul_rz(1.0, 2.0); + __drcp_rd(2.0); +#endif + __drcp_rn(2.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __drcp_ru(2.0); + __drcp_rz(2.0); + __dsqrt_rd(4.0); +#endif + __dsqrt_rn(4.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dsqrt_ru(4.0); + __dsqrt_rz(4.0); + __dsub_rd(2.0, 1.0); +#endif + __dsub_rn(2.0, 1.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __dsub_ru(2.0, 1.0); + __dsub_rz(2.0, 1.0); + __fma_rd(1.0, 2.0, 3.0); +#endif + __fma_rn(1.0, 2.0, 3.0); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fma_ru(1.0, 2.0, 3.0); + __fma_rz(1.0, 2.0, 3.0); +#endif +} + +__global__ void compileDoublePrecisionIntrinsics(int) { + double_precision_intrinsics(); +} + +TEST_CASE("Unit_DoublePrecisionIntrinsics") { + hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc index 9c695a7b41..df3b988ae6 100644 --- a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc +++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc @@ -1,133 +1,133 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void double_precision_math_functions() { - int iX; - double fX, fY; - - acos(1.0); - acosh(1.0); - asin(0.0); - asinh(0.0); - atan(0.0); - atan2(0.0, 1.0); - atanh(0.0); - cbrt(0.0); - ceil(0.0); - copysign(1.0, -2.0); - cos(0.0); - cosh(0.0); - cospi(0.0); - cyl_bessel_i0(0.0); - cyl_bessel_i1(0.0); - erf(0.0); - erfc(0.0); - erfcinv(2.0); - erfcx(0.0); - erfinv(1.0); - exp(0.0); - exp10(0.0); - exp2(0.0); - expm1(0.0); - fabs(1.0); - fdim(1.0, 0.0); - floor(0.0); - fma(1.0, 2.0, 3.0); - fmax(0.0, 0.0); - fmin(0.0, 0.0); - fmod(0.0, 1.0); - frexp(0.0, &iX); - hypot(1.0, 0.0); - ilogb(1.0); - isfinite(0.0); - isinf(0.0); - isnan(0.0); - j0(0.0); - j1(0.0); - jn(-1.0, 1.0); - ldexp(0.0, 0); - lgamma(1.0); - llrint(0.0); - llround(0.0); - log(1.0); - log10(1.0); - log1p(-1.0); - log2(1.0); - logb(1.0); - lrint(0.0); - lround(0.0); - modf(0.0, &fX); - nan("1"); - nearbyint(0.0); - nextafter(0.0, 0.0); - fX = 1.0; - norm(1, &fX); - norm3d(1.0, 0.0, 0.0); - norm4d(1.0, 0.0, 0.0, 0.0); - normcdf(0.0); - normcdfinv(1.0); - pow(1.0, 0.0); - rcbrt(1.0); - remainder(2.0, 1.0); - remquo(1.0, 2.0, &iX); - rhypot(0.0, 1.0); - rint(1.0); - fX = 1.0; - rnorm(1, &fX); - rnorm3d(0.0, 0.0, 1.0); - rnorm4d(0.0, 0.0, 0.0, 1.0); - round(0.0); - rsqrt(1.0); - scalbln(0.0, 1); - scalbn(0.0, 1); - signbit(1.0); - sin(0.0); -#if HT_AMD - // NV A100 has a bug in sincos(), so temporarily disbale it - sincos(0.0, &fX, &fY); -#endif - sincospi(0.0, &fX, &fY); - sinh(0.0); - sinpi(0.0); - sqrt(0.0); - tan(0.0); - tanh(0.0); - tgamma(2.0); - trunc(0.0); - y0(1.0); - y1(1.0); - yn(1, 1.0); -} - -__global__ void compileDoublePrecisionMathOnDevice(int) { - double_precision_math_functions(); -} - -TEST_CASE("Unit_DoublePrecisionMathDevice") { - hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1), - dim3(1, 1, 1), 0, 0, 1); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void double_precision_math_functions() { + int iX; + double fX, fY; + + acos(1.0); + acosh(1.0); + asin(0.0); + asinh(0.0); + atan(0.0); + atan2(0.0, 1.0); + atanh(0.0); + cbrt(0.0); + ceil(0.0); + copysign(1.0, -2.0); + cos(0.0); + cosh(0.0); + cospi(0.0); + cyl_bessel_i0(0.0); + cyl_bessel_i1(0.0); + erf(0.0); + erfc(0.0); + erfcinv(2.0); + erfcx(0.0); + erfinv(1.0); + exp(0.0); + exp10(0.0); + exp2(0.0); + expm1(0.0); + fabs(1.0); + fdim(1.0, 0.0); + floor(0.0); + fma(1.0, 2.0, 3.0); + fmax(0.0, 0.0); + fmin(0.0, 0.0); + fmod(0.0, 1.0); + frexp(0.0, &iX); + hypot(1.0, 0.0); + ilogb(1.0); + isfinite(0.0); + isinf(0.0); + isnan(0.0); + j0(0.0); + j1(0.0); + jn(-1.0, 1.0); + ldexp(0.0, 0); + lgamma(1.0); + llrint(0.0); + llround(0.0); + log(1.0); + log10(1.0); + log1p(-1.0); + log2(1.0); + logb(1.0); + lrint(0.0); + lround(0.0); + modf(0.0, &fX); + nan("1"); + nearbyint(0.0); + nextafter(0.0, 0.0); + fX = 1.0; + norm(1, &fX); + norm3d(1.0, 0.0, 0.0); + norm4d(1.0, 0.0, 0.0, 0.0); + normcdf(0.0); + normcdfinv(1.0); + pow(1.0, 0.0); + rcbrt(1.0); + remainder(2.0, 1.0); + remquo(1.0, 2.0, &iX); + rhypot(0.0, 1.0); + rint(1.0); + fX = 1.0; + rnorm(1, &fX); + rnorm3d(0.0, 0.0, 1.0); + rnorm4d(0.0, 0.0, 0.0, 1.0); + round(0.0); + rsqrt(1.0); + scalbln(0.0, 1); + scalbn(0.0, 1); + signbit(1.0); + sin(0.0); +#if HT_AMD + // NV A100 has a bug in sincos(), so temporarily disbale it + sincos(0.0, &fX, &fY); +#endif + sincospi(0.0, &fX, &fY); + sinh(0.0); + sinpi(0.0); + sqrt(0.0); + tan(0.0); + tanh(0.0); + tgamma(2.0); + trunc(0.0); + y0(1.0); + y1(1.0); + yn(1, 1.0); +} + +__global__ void compileDoublePrecisionMathOnDevice(int) { + double_precision_math_functions(); +} + +TEST_CASE("Unit_DoublePrecisionMathDevice") { + hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc index fd4e4bf238..773d5eab0d 100644 --- a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc +++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc @@ -1,117 +1,117 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__host__ static void double_precision_math_functions() { - int iX; - double fX, fY; - - acos(1.0); - acosh(1.0); - asin(0.0); - asinh(0.0); - atan(0.0); - atan2(0.0, 1.0); - atanh(0.0); - cbrt(0.0); - ceil(0.0); - copysign(1.0, -2.0); - cos(0.0); - cosh(0.0); - erf(0.0); - erfc(0.0); - exp(0.0); - #ifdef __unix__ - exp10(0.0); - #endif - exp2(0.0); - expm1(0.0); - fabs(1.0); - fdim(1.0, 0.0); - floor(0.0); - fma(1.0, 2.0, 3.0); - fmax(0.0, 0.0); - fmin(0.0, 0.0); - fmod(0.0, 1.0); - frexp(0.0, &iX); - hypot(1.0, 0.0); - ilogb(1.0); - std::isfinite(0.0); - std::isinf(0.0); - std::isnan(0.0); - #ifdef __unix__ - j0(0.0); - j1(0.0); - jn(-1.0, 1.0); - #elif _WIN64 - _j0(0.0); - _j1(0.0); - _jn(-1.0, 1.0); - #endif - ldexp(0.0, 0); - llrint(0.0); - llround(0.0); - log(1.0); - log10(1.0); - log1p(-1.0); - log2(1.0); - logb(1.0); - lrint(0.0); - lround(0.0); - modf(0.0, &fX); - nan("1"); - nearbyint(0.0); - fX = 1.0; - pow(1.0, 0.0); - remainder(2.0, 1.0); - remquo(1.0, 2.0, &iX); - rint(1.0); - round(0.0); - scalbln(0.0, 1); - scalbn(0.0, 1); - std::signbit(1.0); - sin(0.0); - #ifdef _unix__ - sincos(0.0, &fX, &fY); - #endif - sinh(0.0); - sqrt(0.0); - tan(0.0); - tanh(0.0); - tgamma(2.0); - trunc(0.0); - #ifdef __unix__ - y0(1.0); - y1(1.0); - yn(1, 1.0); - #elif _WIN64 - _y0(1.0); - _y1(1.0); - _yn(1, 1.0); - #endif -} - -TEST_CASE("Unit_DoublePrecisionMathHost") { - double_precision_math_functions(); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__host__ static void double_precision_math_functions() { + int iX; + double fX, fY; + + acos(1.0); + acosh(1.0); + asin(0.0); + asinh(0.0); + atan(0.0); + atan2(0.0, 1.0); + atanh(0.0); + cbrt(0.0); + ceil(0.0); + copysign(1.0, -2.0); + cos(0.0); + cosh(0.0); + erf(0.0); + erfc(0.0); + exp(0.0); + #ifdef __unix__ + exp10(0.0); + #endif + exp2(0.0); + expm1(0.0); + fabs(1.0); + fdim(1.0, 0.0); + floor(0.0); + fma(1.0, 2.0, 3.0); + fmax(0.0, 0.0); + fmin(0.0, 0.0); + fmod(0.0, 1.0); + frexp(0.0, &iX); + hypot(1.0, 0.0); + ilogb(1.0); + std::isfinite(0.0); + std::isinf(0.0); + std::isnan(0.0); + #ifdef __unix__ + j0(0.0); + j1(0.0); + jn(-1.0, 1.0); + #elif _WIN64 + _j0(0.0); + _j1(0.0); + _jn(-1.0, 1.0); + #endif + ldexp(0.0, 0); + llrint(0.0); + llround(0.0); + log(1.0); + log10(1.0); + log1p(-1.0); + log2(1.0); + logb(1.0); + lrint(0.0); + lround(0.0); + modf(0.0, &fX); + nan("1"); + nearbyint(0.0); + fX = 1.0; + pow(1.0, 0.0); + remainder(2.0, 1.0); + remquo(1.0, 2.0, &iX); + rint(1.0); + round(0.0); + scalbln(0.0, 1); + scalbn(0.0, 1); + std::signbit(1.0); + sin(0.0); + #ifdef _unix__ + sincos(0.0, &fX, &fY); + #endif + sinh(0.0); + sqrt(0.0); + tan(0.0); + tanh(0.0); + tgamma(2.0); + trunc(0.0); + #ifdef __unix__ + y0(1.0); + y1(1.0); + yn(1, 1.0); + #elif _WIN64 + _y0(1.0); + _y1(1.0); + _yn(1, 1.0); + #endif +} + +TEST_CASE("Unit_DoublePrecisionMathHost") { + double_precision_math_functions(); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc b/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc index 357f2ed918..698f6c144a 100644 --- a/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc +++ b/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc @@ -1,128 +1,128 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include - -__device__ void FloatMathPrecise() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - fX = ceilf(0.0f); - fX = copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - cospif(0.0f); - cyl_bessel_i0f(0.0f); - cyl_bessel_i1f(0.0f); - erfcf(0.0f); - erfcinvf(2.0f); - erfcxf(0.0f); - erff(0.0f); - erfinvf(1.0f); - exp10f(0.0f); - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fX = fabsf(1.0f); - fdimf(1.0f, 0.0f); - fdividef(0.0f, 1.0f); - fX = floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fX = fmaxf(0.0f, 0.0f); - fX = fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - isfinite(0.0f); - fX = isinf(0.0f); - fX = isnan(0.0f); - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - ldexpf(0.0f, 0); - lgammaf(1.0f); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - modff(0.0f, &fX); - fX = nanf("1"); - fX = nearbyintf(0.0f); - nextafterf(0.0f, 0.0f); - norm3df(1.0f, 0.0f, 0.0f); - norm4df(1.0f, 0.0f, 0.0f, 0.0f); - normcdff(0.0f); - normcdfinvf(1.0f); - fX = 1.0f; - normf(1, &fX); - powf(1.0f, 0.0f); - rcbrtf(1.0f); - remainderf(2.0f, 1.0f); - remquof(1.0f, 2.0f, &iX); - rhypotf(0.0f, 1.0f); - fY = rintf(1.0f); - rnorm3df(0.0f, 0.0f, 1.0f); - rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); - fX = 1.0f; - rnormf(1, &fX); - fY = roundf(0.0f); - rsqrtf(1.0f); - scalblnf(0.0f, 1); - scalbnf(0.0f, 1); - signbit(1.0f); - sincosf(0.0f, &fX, &fY); - sincospif(0.0f, &fX, &fY); - sinf(0.0f); - sinhf(0.0f); - sinpif(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - fY = truncf(0.0f); - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); -} - -__global__ void CompileFloatMathPrecise(int) { - FloatMathPrecise(); -} - -TEST_CASE("Unit_FloatMathPrecise") { - hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1), - dim3(1, 1, 1), 0, 0, 1); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +__device__ void FloatMathPrecise() { + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + fX = ceilf(0.0f); + fX = copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + cospif(0.0f); + cyl_bessel_i0f(0.0f); + cyl_bessel_i1f(0.0f); + erfcf(0.0f); + erfcinvf(2.0f); + erfcxf(0.0f); + erff(0.0f); + erfinvf(1.0f); + exp10f(0.0f); + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fX = fabsf(1.0f); + fdimf(1.0f, 0.0f); + fdividef(0.0f, 1.0f); + fX = floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fX = fmaxf(0.0f, 0.0f); + fX = fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + isfinite(0.0f); + fX = isinf(0.0f); + fX = isnan(0.0f); + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + ldexpf(0.0f, 0); + lgammaf(1.0f); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + modff(0.0f, &fX); + fX = nanf("1"); + fX = nearbyintf(0.0f); + nextafterf(0.0f, 0.0f); + norm3df(1.0f, 0.0f, 0.0f); + norm4df(1.0f, 0.0f, 0.0f, 0.0f); + normcdff(0.0f); + normcdfinvf(1.0f); + fX = 1.0f; + normf(1, &fX); + powf(1.0f, 0.0f); + rcbrtf(1.0f); + remainderf(2.0f, 1.0f); + remquof(1.0f, 2.0f, &iX); + rhypotf(0.0f, 1.0f); + fY = rintf(1.0f); + rnorm3df(0.0f, 0.0f, 1.0f); + rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); + fX = 1.0f; + rnormf(1, &fX); + fY = roundf(0.0f); + rsqrtf(1.0f); + scalblnf(0.0f, 1); + scalbnf(0.0f, 1); + signbit(1.0f); + sincosf(0.0f, &fX, &fY); + sincospif(0.0f, &fX, &fY); + sinf(0.0f); + sinhf(0.0f); + sinpif(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + fY = truncf(0.0f); + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); +} + +__global__ void CompileFloatMathPrecise(int) { + FloatMathPrecise(); +} + +TEST_CASE("Unit_FloatMathPrecise") { + hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc index 68009651bd..07b9343c1d 100644 --- a/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc +++ b/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc @@ -1,68 +1,68 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void integer_intrinsics() { - __brev((unsigned int)10); - __brevll((uint64_t)10); - __byte_perm((unsigned int)0, (unsigned int)0, 0); - __clz(static_cast(10)); - __clzll((int64_t)10); - __ffs(static_cast(10)); - __ffsll((long long)(10)); // NOLINT - __funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); - __hadd(static_cast(1), static_cast(3)); - __mul24(static_cast(1), static_cast(2)); - __mul64hi((int64_t)1, (int64_t)2); - __mulhi(static_cast(1), static_cast(2)); - __popc((unsigned int)4); - __popcll((uint64_t)4); - int a = min(static_cast(4), static_cast(5)); - int b = max(static_cast(4), static_cast(5)); - __rhadd(static_cast(1), static_cast(2)); - __sad(static_cast(1), static_cast(2), 0); - __uhadd((unsigned int)1, (unsigned int)3); - __umul24((unsigned int)1, (unsigned int)2); - __umul64hi((uint64_t)1, (uint64_t)2); - __umulhi((unsigned int)1, (unsigned int)2); - __urhadd((unsigned int)1, (unsigned int)2); - __usad((unsigned int)1, (unsigned int)2, 0); - - assert(1); -} - -__global__ void compileIntegerIntrinsics(int) { - integer_intrinsics(); -} - -TEST_CASE("Unit_IntegerIntrinsics") { - hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1), - dim3(1, 1, 1), 0, 0, 1); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void integer_intrinsics() { + __brev((unsigned int)10); + __brevll((uint64_t)10); + __byte_perm((unsigned int)0, (unsigned int)0, 0); + __clz(static_cast(10)); + __clzll((int64_t)10); + __ffs(static_cast(10)); + __ffsll((long long)(10)); // NOLINT + __funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0); + __hadd(static_cast(1), static_cast(3)); + __mul24(static_cast(1), static_cast(2)); + __mul64hi((int64_t)1, (int64_t)2); + __mulhi(static_cast(1), static_cast(2)); + __popc((unsigned int)4); + __popcll((uint64_t)4); + int a = min(static_cast(4), static_cast(5)); + int b = max(static_cast(4), static_cast(5)); + __rhadd(static_cast(1), static_cast(2)); + __sad(static_cast(1), static_cast(2), 0); + __uhadd((unsigned int)1, (unsigned int)3); + __umul24((unsigned int)1, (unsigned int)2); + __umul64hi((uint64_t)1, (uint64_t)2); + __umulhi((unsigned int)1, (unsigned int)2); + __urhadd((unsigned int)1, (unsigned int)2); + __usad((unsigned int)1, (unsigned int)2, 0); + + assert(1); +} + +__global__ void compileIntegerIntrinsics(int) { + integer_intrinsics(); +} + +TEST_CASE("Unit_IntegerIntrinsics") { + hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc b/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc index 927344af36..89521611a0 100644 --- a/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc +++ b/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc @@ -1,298 +1,298 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include - -#include -#include - -using namespace std; -//////////////////////////////////////////////////////////////////////////////// -// Auto-Verification Code -//////////////////////////////////////////////////////////////////////////////// - -bool verifyBitwise(...) { - return true; -} - -template{}>::type* = nullptr> -bool verifyBitwise(T* gpuData, int len) { - // Atomic and - T val = 0xff; - for (int i = 0; i < len; ++i) { - // 9th element should be 1 - val &= (2 * i + 7); - } - REQUIRE(val == gpuData[8]); - - // atomic Or - val = 0; - for (int i = 0; i < len; ++i) { - // 10th element should be 0xff - val |= (1 << i); - } - REQUIRE(val == gpuData[9]); - - // atomic Xor - val = 0xff; - - for (int i = 0; i < len; ++i) { - // 11th element should be 0xff - val ^= i; - } - - REQUIRE(val == gpuData[10]); - return true; -} - -bool verifySub(...) { - return true; -} - -template< - typename T, - typename enable_if< - is_same{} || is_same{}>::type* = nullptr> -bool verifySub(T* gpuData, int len) { - T val = 0; - - for (int i = 0; i < len; ++i) { - val -= 10; - } - - REQUIRE(val == gpuData[1]); - return true; -} - -bool verifyExch(...) { - return true; -} - -template {}>::type* = nullptr> // NOLINT -bool computeExchExch(T* gpuData, int len) { - T val = 0; - - for (T i = 0; i < len; ++i) { - if (i == gpuData[2]) { - return true; - break; - } - } -} - -bool VerifyIntegral(...) { - return true; -} - -template{}>::type* = nullptr> -bool VerifyIntegral(T* gpuData, int len) { - // atomic Max - T val = 0; - for (int i = 0; i < len; ++i) { - // fourth element should be len-1 - val = max(val, static_cast(i)); - } - - REQUIRE(val == gpuData[3]); - - // atomic Min - val = 1 << 8; - - for (int i = 0; i < len; ++i) { - val = min(val, static_cast(i)); - } - - REQUIRE(val == gpuData[4]); - - // atomic Inc - T limit = 17; - val = 0; - - for (int i = 0; i < len; ++i) { - val = (val >= limit) ? 0 : val + 1; - } - - REQUIRE(val == gpuData[5]); - - // atomic Dec - limit = 137; - val = 0; - - for (int i = 0; i < len; ++i) { - val = ((val == 0) || (val > limit)) ? limit : val - 1; - } - - REQUIRE(val == gpuData[6]); - - // atomic CAS - for (int i = 0; i < len; ++i) { - // eighth element should be a member of [0, len) - if (static_cast(i) == gpuData[7]) { - return true; - break; - } - } - return verifyBitwise(gpuData, len) && verifySub(gpuData, len); -} - -template -bool verifyData(T* gpuData, int len) { - T val = 0; - for (int i = 0; i < len; ++i) { - val += 10; - } - - REQUIRE(val == gpuData[0]); - return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len); -} - -__device__ -void testKernelExch(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelExch(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - - // Atomic exchange - atomicExch(&g_odata[2], tid); -} - -__device__ -void testKernelSub(...) {} - -template< - typename T, - typename enable_if< - is_same{} || is_same{}>::type* = nullptr> -__device__ -void testKernelSub(T* g_odata) { - // Atomic subtraction (final should be 0) - atomicSub(&g_odata[1], 10); -} - -__device__ -void testKernelIntegral(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelIntegral(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - - // Atomic maximum - atomicMax(&g_odata[3], tid); - - // Atomic minimum - atomicMin(&g_odata[4], tid); - - // Atomic increment (modulo 17+1) - atomicInc((unsigned int*)&g_odata[5], 17); - - // Atomic decrement - atomicDec((unsigned int*)&g_odata[6], 137); - - // Atomic compare-and-swap - atomicCAS(&g_odata[7], tid - 1, tid); - - // Bitwise atomic instructions - - // Atomic AND - atomicAnd(&g_odata[8], 2 * tid + 7); - - // Atomic OR - atomicOr(&g_odata[9], 1 << tid); - - // Atomic XOR - atomicXor(&g_odata[10], tid); - - testKernelSub(g_odata); -} - -template -__global__ void testKernel(T* g_odata) { - // Atomic addition - atomicAdd(&g_odata[0], 10); - testKernelIntegral(g_odata); - testKernelExch(g_odata); -} - -template -static void runTest() { - bool testResult = true; - unsigned int numThreads = 256; - unsigned int numBlocks = 64; - unsigned int numData = 11; - unsigned int memSize = sizeof(T) * numData; - - // allocate mem for the result on host side - T* hOData = reinterpret_cast(malloc(memSize)); - - // initialize the memory - for (unsigned int i = 0; i < numData; i++) { - hOData[i] = 0; - } - // To make the AND and XOR tests generate something other than 0... - hOData[8] = hOData[10] = 0xff; - - // allocate device memory for result - T* dOData; - HIP_CHECK(hipMalloc(reinterpret_cast(&dOData), memSize)); - // copy host memory to device to initialize to zero - HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL( - testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData); - - // Copy result from device to host - HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - - // Compute reference solution - REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks)); - - // Cleanup memory - free(hOData); - HIP_CHECK(hipFree(dOData)); -} - -TEST_CASE("Unit_SimpleAtomicsTest") { - SECTION("test for int") { - runTest(); - } - SECTION("test for unsigned int") { - runTest(); - } - SECTION("test for float") { - runTest(); - } - #if HT_AMD - SECTION("test for unsigned long long") { - runTest(); - } - SECTION("test for double") { - runTest(); - } - #endif -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include +#include + +#include +#include + +using namespace std; +//////////////////////////////////////////////////////////////////////////////// +// Auto-Verification Code +//////////////////////////////////////////////////////////////////////////////// + +bool verifyBitwise(...) { + return true; +} + +template{}>::type* = nullptr> +bool verifyBitwise(T* gpuData, int len) { + // Atomic and + T val = 0xff; + for (int i = 0; i < len; ++i) { + // 9th element should be 1 + val &= (2 * i + 7); + } + REQUIRE(val == gpuData[8]); + + // atomic Or + val = 0; + for (int i = 0; i < len; ++i) { + // 10th element should be 0xff + val |= (1 << i); + } + REQUIRE(val == gpuData[9]); + + // atomic Xor + val = 0xff; + + for (int i = 0; i < len; ++i) { + // 11th element should be 0xff + val ^= i; + } + + REQUIRE(val == gpuData[10]); + return true; +} + +bool verifySub(...) { + return true; +} + +template< + typename T, + typename enable_if< + is_same{} || is_same{}>::type* = nullptr> +bool verifySub(T* gpuData, int len) { + T val = 0; + + for (int i = 0; i < len; ++i) { + val -= 10; + } + + REQUIRE(val == gpuData[1]); + return true; +} + +bool verifyExch(...) { + return true; +} + +template {}>::type* = nullptr> // NOLINT +bool computeExchExch(T* gpuData, int len) { + T val = 0; + + for (T i = 0; i < len; ++i) { + if (i == gpuData[2]) { + return true; + break; + } + } +} + +bool VerifyIntegral(...) { + return true; +} + +template{}>::type* = nullptr> +bool VerifyIntegral(T* gpuData, int len) { + // atomic Max + T val = 0; + for (int i = 0; i < len; ++i) { + // fourth element should be len-1 + val = max(val, static_cast(i)); + } + + REQUIRE(val == gpuData[3]); + + // atomic Min + val = 1 << 8; + + for (int i = 0; i < len; ++i) { + val = min(val, static_cast(i)); + } + + REQUIRE(val == gpuData[4]); + + // atomic Inc + T limit = 17; + val = 0; + + for (int i = 0; i < len; ++i) { + val = (val >= limit) ? 0 : val + 1; + } + + REQUIRE(val == gpuData[5]); + + // atomic Dec + limit = 137; + val = 0; + + for (int i = 0; i < len; ++i) { + val = ((val == 0) || (val > limit)) ? limit : val - 1; + } + + REQUIRE(val == gpuData[6]); + + // atomic CAS + for (int i = 0; i < len; ++i) { + // eighth element should be a member of [0, len) + if (static_cast(i) == gpuData[7]) { + return true; + break; + } + } + return verifyBitwise(gpuData, len) && verifySub(gpuData, len); +} + +template +bool verifyData(T* gpuData, int len) { + T val = 0; + for (int i = 0; i < len; ++i) { + val += 10; + } + + REQUIRE(val == gpuData[0]); + return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len); +} + +__device__ +void testKernelExch(...) {} + +template{}>::type* = nullptr> +__device__ +void testKernelExch(T* g_odata) { + // access thread id + const T tid = blockDim.x * blockIdx.x + threadIdx.x; + + // Atomic exchange + atomicExch(&g_odata[2], tid); +} + +__device__ +void testKernelSub(...) {} + +template< + typename T, + typename enable_if< + is_same{} || is_same{}>::type* = nullptr> +__device__ +void testKernelSub(T* g_odata) { + // Atomic subtraction (final should be 0) + atomicSub(&g_odata[1], 10); +} + +__device__ +void testKernelIntegral(...) {} + +template{}>::type* = nullptr> +__device__ +void testKernelIntegral(T* g_odata) { + // access thread id + const T tid = blockDim.x * blockIdx.x + threadIdx.x; + + // Atomic maximum + atomicMax(&g_odata[3], tid); + + // Atomic minimum + atomicMin(&g_odata[4], tid); + + // Atomic increment (modulo 17+1) + atomicInc((unsigned int*)&g_odata[5], 17); + + // Atomic decrement + atomicDec((unsigned int*)&g_odata[6], 137); + + // Atomic compare-and-swap + atomicCAS(&g_odata[7], tid - 1, tid); + + // Bitwise atomic instructions + + // Atomic AND + atomicAnd(&g_odata[8], 2 * tid + 7); + + // Atomic OR + atomicOr(&g_odata[9], 1 << tid); + + // Atomic XOR + atomicXor(&g_odata[10], tid); + + testKernelSub(g_odata); +} + +template +__global__ void testKernel(T* g_odata) { + // Atomic addition + atomicAdd(&g_odata[0], 10); + testKernelIntegral(g_odata); + testKernelExch(g_odata); +} + +template +static void runTest() { + bool testResult = true; + unsigned int numThreads = 256; + unsigned int numBlocks = 64; + unsigned int numData = 11; + unsigned int memSize = sizeof(T) * numData; + + // allocate mem for the result on host side + T* hOData = reinterpret_cast(malloc(memSize)); + + // initialize the memory + for (unsigned int i = 0; i < numData; i++) { + hOData[i] = 0; + } + // To make the AND and XOR tests generate something other than 0... + hOData[8] = hOData[10] = 0xff; + + // allocate device memory for result + T* dOData; + HIP_CHECK(hipMalloc(reinterpret_cast(&dOData), memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL( + testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + + // Compute reference solution + REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks)); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); +} + +TEST_CASE("Unit_SimpleAtomicsTest") { + SECTION("test for int") { + runTest(); + } + SECTION("test for unsigned int") { + runTest(); + } + SECTION("test for float") { + runTest(); + } + #if HT_AMD + SECTION("test for unsigned long long") { + runTest(); + } + SECTION("test for double") { + runTest(); + } + #endif +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc index fb8bebdaa5..182500e833 100644 --- a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc +++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc @@ -1,101 +1,101 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void single_precision_intrinsics() { - float fX, fY; - - __cosf(0.0f); - __exp10f(0.0f); - __expf(0.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fadd_rd(0.0f, 1.0f); -#endif - __fadd_rn(0.0f, 1.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fadd_ru(0.0f, 1.0f); - __fadd_rz(0.0f, 1.0f); - __fdiv_rd(4.0f, 2.0f); -#endif - __fdiv_rn(4.0f, 2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fdiv_ru(4.0f, 2.0f); - __fdiv_rz(4.0f, 2.0f); -#endif - __fdividef(4.0f, 2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fmaf_rd(1.0f, 2.0f, 3.0f); -#endif - __fmaf_rn(1.0f, 2.0f, 3.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fmaf_ru(1.0f, 2.0f, 3.0f); - __fmaf_rz(1.0f, 2.0f, 3.0f); - __fmul_rd(1.0f, 2.0f); -#endif - __fmul_rn(1.0f, 2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fmul_ru(1.0f, 2.0f); - __fmul_rz(1.0f, 2.0f); - __frcp_rd(2.0f); -#endif - __frcp_rn(2.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __frcp_ru(2.0f); - __frcp_rz(2.0f); -#endif - __frsqrt_rn(4.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fsqrt_rd(4.0f); -#endif - __fsqrt_rn(4.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fsqrt_ru(4.0f); - __fsqrt_rz(4.0f); - __fsub_rd(2.0f, 1.0f); -#endif - __fsub_rn(2.0f, 1.0f); -#if defined OCML_BASIC_ROUNDED_OPERATIONS - __fsub_ru(2.0f, 1.0f); - __fsub_rz(2.0f, 1.0f); -#endif - __log10f(1.0f); - __log2f(1.0f); - __logf(1.0f); - __powf(1.0f, 0.0f); - __saturatef(0.1f); - __sincosf(0.0f, &fX, &fY); - __sinf(0.0f); - __tanf(0.0f); -} - -__global__ void compileSinglePrecisionIntrinsics(int) { - single_precision_intrinsics(); -} - -TEST_CASE("Unit_SinglePrecisionIntrinsics") { - hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1), - dim3(1, 1, 1), 0, 0, 1); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void single_precision_intrinsics() { + float fX, fY; + + __cosf(0.0f); + __exp10f(0.0f); + __expf(0.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fadd_rd(0.0f, 1.0f); +#endif + __fadd_rn(0.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fadd_ru(0.0f, 1.0f); + __fadd_rz(0.0f, 1.0f); + __fdiv_rd(4.0f, 2.0f); +#endif + __fdiv_rn(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fdiv_ru(4.0f, 2.0f); + __fdiv_rz(4.0f, 2.0f); +#endif + __fdividef(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmaf_rd(1.0f, 2.0f, 3.0f); +#endif + __fmaf_rn(1.0f, 2.0f, 3.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmaf_ru(1.0f, 2.0f, 3.0f); + __fmaf_rz(1.0f, 2.0f, 3.0f); + __fmul_rd(1.0f, 2.0f); +#endif + __fmul_rn(1.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fmul_ru(1.0f, 2.0f); + __fmul_rz(1.0f, 2.0f); + __frcp_rd(2.0f); +#endif + __frcp_rn(2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __frcp_ru(2.0f); + __frcp_rz(2.0f); +#endif + __frsqrt_rn(4.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsqrt_rd(4.0f); +#endif + __fsqrt_rn(4.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsqrt_ru(4.0f); + __fsqrt_rz(4.0f); + __fsub_rd(2.0f, 1.0f); +#endif + __fsub_rn(2.0f, 1.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS + __fsub_ru(2.0f, 1.0f); + __fsub_rz(2.0f, 1.0f); +#endif + __log10f(1.0f); + __log2f(1.0f); + __logf(1.0f); + __powf(1.0f, 0.0f); + __saturatef(0.1f); + __sincosf(0.0f, &fX, &fY); + __sinf(0.0f); + __tanf(0.0f); +} + +__global__ void compileSinglePrecisionIntrinsics(int) { + single_precision_intrinsics(); +} + +TEST_CASE("Unit_SinglePrecisionIntrinsics") { + hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc index e7bbdc180e..bc63b88c13 100644 --- a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc +++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc @@ -1,123 +1,123 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#include -#include -#include -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__device__ void single_precision_math_functions() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - ceilf(0.0f); - copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - cospif(0.0f); - erfcf(0.0f); - erfcinvf(2.0f); - erfcxf(0.0f); - erff(0.0f); - erfinvf(1.0f); - exp10f(0.0f); - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fabsf(1.0f); - fdimf(1.0f, 0.0f); - fdividef(0.0f, 1.0f); - floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fmaxf(0.0f, 0.0f); - fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - isfinite(0.0f); - isinf(0.0f); - isnan(0.0f); - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - ldexpf(0.0f, 0); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - nanf("1"); - nearbyintf(0.0f); - norm3df(1.0f, 0.0f, 0.0f); - norm4df(1.0f, 0.0f, 0.0f, 0.0f); - normcdff(0.0f); - normcdfinvf(1.0f); - fX = 1.0f; - normf(1, &fX); - powf(1.0f, 0.0f); - remainderf(2.0f, 1.0f); - rhypotf(0.0f, 1.0f); - rintf(1.0f); - rnorm3df(0.0f, 0.0f, 1.0f); - rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); - fX = 1.0f; - rnormf(1, &fX); - roundf(0.0f); - rsqrtf(1.0f); - signbit(1.0f); - sincosf(0.0f, &fX, &fY); - sincospif(0.0f, &fX, &fY); - sinf(0.0f); - sinhf(0.0f); - sinpif(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - truncf(0.0f); - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); -} - -__global__ void compileSinglePrecisionMathOnDevice(int) { - single_precision_math_functions(); -} - -TEST_CASE("Unit_SinglePrecisionMathDevice") { - hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1), - dim3(1, 1, 1), 0, 0, 1); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__device__ void single_precision_math_functions() { + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + ceilf(0.0f); + copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + cospif(0.0f); + erfcf(0.0f); + erfcinvf(2.0f); + erfcxf(0.0f); + erff(0.0f); + erfinvf(1.0f); + exp10f(0.0f); + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fabsf(1.0f); + fdimf(1.0f, 0.0f); + fdividef(0.0f, 1.0f); + floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fmaxf(0.0f, 0.0f); + fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + isfinite(0.0f); + isinf(0.0f); + isnan(0.0f); + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + ldexpf(0.0f, 0); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + nanf("1"); + nearbyintf(0.0f); + norm3df(1.0f, 0.0f, 0.0f); + norm4df(1.0f, 0.0f, 0.0f, 0.0f); + normcdff(0.0f); + normcdfinvf(1.0f); + fX = 1.0f; + normf(1, &fX); + powf(1.0f, 0.0f); + remainderf(2.0f, 1.0f); + rhypotf(0.0f, 1.0f); + rintf(1.0f); + rnorm3df(0.0f, 0.0f, 1.0f); + rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); + fX = 1.0f; + rnormf(1, &fX); + roundf(0.0f); + rsqrtf(1.0f); + signbit(1.0f); + sincosf(0.0f, &fX, &fY); + sincospif(0.0f, &fX, &fY); + sinf(0.0f); + sinhf(0.0f); + sinpif(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + truncf(0.0f); + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); +} + +__global__ void compileSinglePrecisionMathOnDevice(int) { + single_precision_math_functions(); +} + +TEST_CASE("Unit_SinglePrecisionMathDevice") { + hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1), + dim3(1, 1, 1), 0, 0, 1); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc index 85407560cb..bceaeadb27 100644 --- a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc +++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc @@ -1,113 +1,113 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include - -#pragma GCC diagnostic ignored "-Wall" -#pragma clang diagnostic ignored "-Wunused-variable" - -__host__ static void single_precision_math_functions() { - int iX; - float fX, fY; - - acosf(1.0f); - acoshf(1.0f); - asinf(0.0f); - asinhf(0.0f); - atan2f(0.0f, 1.0f); - atanf(0.0f); - atanhf(0.0f); - cbrtf(0.0f); - ceilf(0.0f); - copysignf(1.0f, -2.0f); - cosf(0.0f); - coshf(0.0f); - erfcf(0.0f); - erff(0.0f); - #ifdef __unix__ - exp10f(0.0f); - #endif - exp2f(0.0f); - expf(0.0f); - expm1f(0.0f); - fabsf(1.0f); - fdimf(1.0f, 0.0f); - floorf(0.0f); - fmaf(1.0f, 2.0f, 3.0f); - fmaxf(0.0f, 0.0f); - fminf(0.0f, 0.0f); - fmodf(0.0f, 1.0f); - frexpf(0.0f, &iX); - hypotf(1.0f, 0.0f); - ilogbf(1.0f); - std::isfinite(0.0f); - std::isinf(0.0f); - std::isnan(0.0f); - #ifdef __unix__ - j0f(0.0f); - j1f(0.0f); - jnf(-1.0f, 1.0f); - #endif - ldexpf(0.0f, 0); - lgammaf(1.0f); - llrintf(0.0f); - llroundf(0.0f); - log10f(1.0f); - log1pf(-1.0f); - log2f(1.0f); - logbf(1.0f); - logf(1.0f); - lrintf(0.0f); - lroundf(0.0f); - modff(0.0f, &fX); - nanf("1"); - nearbyintf(0.0f); - powf(1.0f, 0.0f); - remainderf(2.0f, 1.0f); - remquof(1.0f, 2.0f, &iX); - rintf(1.0f); -#if HT_AMD - fX = 1.0f; -#endif - roundf(0.0f); - /// rsqrtf(1.0f); - scalblnf(0.0f, 1); - scalbnf(0.0f, 1); - std::signbit(1.0f); - #ifdef __unix__ - sincosf(0.0f, &fX, &fY); - #endif - sinf(0.0f); - sinhf(0.0f); - sqrtf(0.0f); - tanf(0.0f); - tanhf(0.0f); - tgammaf(2.0f); - truncf(0.0f); - #ifdef __unix__ - y0f(1.0f); - y1f(1.0f); - ynf(1, 1.0f); - #endif -} - -TEST_CASE("Unit_SinglePrecisionMathHost") { - single_precision_math_functions(); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#pragma GCC diagnostic ignored "-Wall" +#pragma clang diagnostic ignored "-Wunused-variable" + +__host__ static void single_precision_math_functions() { + int iX; + float fX, fY; + + acosf(1.0f); + acoshf(1.0f); + asinf(0.0f); + asinhf(0.0f); + atan2f(0.0f, 1.0f); + atanf(0.0f); + atanhf(0.0f); + cbrtf(0.0f); + ceilf(0.0f); + copysignf(1.0f, -2.0f); + cosf(0.0f); + coshf(0.0f); + erfcf(0.0f); + erff(0.0f); + #ifdef __unix__ + exp10f(0.0f); + #endif + exp2f(0.0f); + expf(0.0f); + expm1f(0.0f); + fabsf(1.0f); + fdimf(1.0f, 0.0f); + floorf(0.0f); + fmaf(1.0f, 2.0f, 3.0f); + fmaxf(0.0f, 0.0f); + fminf(0.0f, 0.0f); + fmodf(0.0f, 1.0f); + frexpf(0.0f, &iX); + hypotf(1.0f, 0.0f); + ilogbf(1.0f); + std::isfinite(0.0f); + std::isinf(0.0f); + std::isnan(0.0f); + #ifdef __unix__ + j0f(0.0f); + j1f(0.0f); + jnf(-1.0f, 1.0f); + #endif + ldexpf(0.0f, 0); + lgammaf(1.0f); + llrintf(0.0f); + llroundf(0.0f); + log10f(1.0f); + log1pf(-1.0f); + log2f(1.0f); + logbf(1.0f); + logf(1.0f); + lrintf(0.0f); + lroundf(0.0f); + modff(0.0f, &fX); + nanf("1"); + nearbyintf(0.0f); + powf(1.0f, 0.0f); + remainderf(2.0f, 1.0f); + remquof(1.0f, 2.0f, &iX); + rintf(1.0f); +#if HT_AMD + fX = 1.0f; +#endif + roundf(0.0f); + /// rsqrtf(1.0f); + scalblnf(0.0f, 1); + scalbnf(0.0f, 1); + std::signbit(1.0f); + #ifdef __unix__ + sincosf(0.0f, &fX, &fY); + #endif + sinf(0.0f); + sinhf(0.0f); + sqrtf(0.0f); + tanf(0.0f); + tanhf(0.0f); + tgammaf(2.0f); + truncf(0.0f); + #ifdef __unix__ + y0f(1.0f); + y1f(1.0f); + ynf(1, 1.0f); + #endif +} + +TEST_CASE("Unit_SinglePrecisionMathHost") { + single_precision_math_functions(); +} diff --git a/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc b/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc index c0bafae007..1b8db8658c 100644 --- a/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc +++ b/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc @@ -1,153 +1,153 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include - -#pragma clang diagnostic ignored "-Wunused-variable" -// Tolerance for error -const double tolerance = 1e-6; - -#define LEN 64 - -#define ALL_FUN \ - OP(add) \ - OP(sub) \ - OP(mul) \ - OP(div) \ - OP(abs) \ - OP(arg) \ - OP(sin) \ - OP(cos) - -#define OP(x) CK_##x, -enum CalcKind { - ALL_FUN -}; -#undef OP - -#define OP(x) case CK_##x: return #x; -std::string getName(enum CalcKind CK) { - switch (CK) { - ALL_FUN - } - return ""; // To prevent compile warning -} -#undef OP - -// Calculates function. -// If the function has one argument, B is ignored. -// If the function returns real number, converts it to a complex number. -#define ONE_ARG(func) \ - case CK_##func: \ - return std::complex(func(A)); - -template -__device__ __host__ std::complex calc(std::complex A, - std::complex B, - enum CalcKind CK) { - switch (CK) { - case CK_add: - return A + B; - case CK_sub: - return A - B; - case CK_mul: - return A * B; - case CK_div: - return A / B; - - ONE_ARG(abs) - ONE_ARG(arg) - ONE_ARG(sin) - ONE_ARG(cos) - } - return A; // To prevent compile warning -} - -template -__global__ void kernel(std::complex* A, - std::complex* B, std::complex* C, - enum CalcKind CK) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - C[tx] = calc(A[tx], B[tx], CK); -} - -template -void test() { - typedef std::complex ComplexT; - - ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D; - A = new ComplexT[LEN]; - B = new ComplexT[LEN]; - C = new ComplexT[LEN]; - D = new ComplexT[LEN]; - HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), sizeof(ComplexT)*LEN)); - HIP_CHECK(hipMalloc(reinterpret_cast(&Bd), sizeof(ComplexT)*LEN)); - HIP_CHECK(hipMalloc(reinterpret_cast(&Cd), sizeof(ComplexT)*LEN)); - - for (uint32_t i = 0; i < LEN; i++) { - A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f); - B[i] = A[i]; - C[i] = A[i]; - } - HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice)); - - // Run kernel for a calculation kind and verify by comparing with host - // calculation result. Returns false if fails. - auto test_fun = [&](enum CalcKind CK) { - hipLaunchKernelGGL(kernel, dim3(1), dim3(LEN), 0, 0, - Ad, Bd, Cd, CK); - HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost)); - bool pass = true; - for (int i = 0; i < LEN; i++) { - ComplexT Expected = calc(A[i], B[i], CK); - FloatT error = abs(C[i] - Expected); - if (abs(Expected) > tolerance) - error /= abs(Expected); - pass &= error < tolerance; - } - return pass; - }; - -#define OP(x) assert(test_fun(CK_##x)); - ALL_FUN -#undef OP - - HIP_CHECK(hipFree(Ad)); - HIP_CHECK(hipFree(Bd)); - HIP_CHECK(hipFree(Cd)); - delete[] A; - delete[] B; - delete[] C; - delete[] D; -} - -#if HT_AMD -TEST_CASE("Unit_StdComplex") { - SECTION("Test run with float") { - test(); - } - SECTION("Test run with double") { - test(); - } -} -#endif +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +// Tolerance for error +const double tolerance = 1e-6; + +#define LEN 64 + +#define ALL_FUN \ + OP(add) \ + OP(sub) \ + OP(mul) \ + OP(div) \ + OP(abs) \ + OP(arg) \ + OP(sin) \ + OP(cos) + +#define OP(x) CK_##x, +enum CalcKind { + ALL_FUN +}; +#undef OP + +#define OP(x) case CK_##x: return #x; +std::string getName(enum CalcKind CK) { + switch (CK) { + ALL_FUN + } + return ""; // To prevent compile warning +} +#undef OP + +// Calculates function. +// If the function has one argument, B is ignored. +// If the function returns real number, converts it to a complex number. +#define ONE_ARG(func) \ + case CK_##func: \ + return std::complex(func(A)); + +template +__device__ __host__ std::complex calc(std::complex A, + std::complex B, + enum CalcKind CK) { + switch (CK) { + case CK_add: + return A + B; + case CK_sub: + return A - B; + case CK_mul: + return A * B; + case CK_div: + return A / B; + + ONE_ARG(abs) + ONE_ARG(arg) + ONE_ARG(sin) + ONE_ARG(cos) + } + return A; // To prevent compile warning +} + +template +__global__ void kernel(std::complex* A, + std::complex* B, std::complex* C, + enum CalcKind CK) { + int tx = threadIdx.x + blockIdx.x * blockDim.x; + C[tx] = calc(A[tx], B[tx], CK); +} + +template +void test() { + typedef std::complex ComplexT; + + ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D; + A = new ComplexT[LEN]; + B = new ComplexT[LEN]; + C = new ComplexT[LEN]; + D = new ComplexT[LEN]; + HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), sizeof(ComplexT)*LEN)); + HIP_CHECK(hipMalloc(reinterpret_cast(&Bd), sizeof(ComplexT)*LEN)); + HIP_CHECK(hipMalloc(reinterpret_cast(&Cd), sizeof(ComplexT)*LEN)); + + for (uint32_t i = 0; i < LEN; i++) { + A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f); + B[i] = A[i]; + C[i] = A[i]; + } + HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice)); + + // Run kernel for a calculation kind and verify by comparing with host + // calculation result. Returns false if fails. + auto test_fun = [&](enum CalcKind CK) { + hipLaunchKernelGGL(kernel, dim3(1), dim3(LEN), 0, 0, + Ad, Bd, Cd, CK); + HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost)); + bool pass = true; + for (int i = 0; i < LEN; i++) { + ComplexT Expected = calc(A[i], B[i], CK); + FloatT error = abs(C[i] - Expected); + if (abs(Expected) > tolerance) + error /= abs(Expected); + pass &= error < tolerance; + } + return pass; + }; + +#define OP(x) assert(test_fun(CK_##x)); + ALL_FUN +#undef OP + + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); + HIP_CHECK(hipFree(Cd)); + delete[] A; + delete[] B; + delete[] C; + delete[] D; +} + +#if HT_AMD +TEST_CASE("Unit_StdComplex") { + SECTION("Test run with float") { + test(); + } + SECTION("Test run with double") { + test(); + } +} +#endif diff --git a/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc b/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc index c728d85b18..3719a3560c 100644 --- a/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc +++ b/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc @@ -1,220 +1,220 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/** -Testcase Scenarios : - (TestCase 1):: - 1) Execute atomicAdd in multi threaded scenario by diverging the data across - multiple threads and validate the output at the end of all operations. - 2) Execute atomicAddNoRet in multi threaded scenario by diverging the data - across multiple threads and validate the output at the end of all operations. - (TestCase 2):: - 3) Execute atomicAdd API and validate the result. - 4) Execute atomicAddNoRet API and validate the result. - (TestCase 3):: - 5) atomicadd/NoRet negative scenarios (TBD). -*/ - -#include -#include -#include -/* - * Defines initial and increment values - */ -#define INCREMENT_VALUE 10 -#define INT_INITIAL_VALUE 10 -#define FLOAT_INITIAL_VALUE 10.50 -#define DOUBLE_INITIAL_VALUE 200.12 -#define LONG_INITIAL_VALUE 10000 -#define UNSIGNED_INITIAL_VALUE 20 - -#if HT_NVIDIA -// atomicAddNoRet is unavailable in cuda -template -__device__ void atomicAddNoRet(T* x, int y) { - atomicAdd(x, static_cast(y)); -} -#endif - -bool p_atomicNoRet = false; - -template -__global__ void atomicnoret_manywaves(T* C_d) { - atomicAddNoRet(C_d, INCREMENT_VALUE); -} - -template -__global__ void atomic_manywaves(T* C_d) { - atomicAdd(C_d, INCREMENT_VALUE); -} - -template -__global__ void atomicnoret_simple(T* C_d) { - atomicAddNoRet(C_d, INCREMENT_VALUE); -} - -template -__global__ void atomic_simple(T* C_d) { - atomicAdd(C_d, INCREMENT_VALUE); -} - -template -bool atomictest_manywaves(const T& initial_val) { - unsigned int ThreadsperBlock = 10; - unsigned int numBlocks = 1; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIP_CHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - REQUIRE(hOData[0] == initial_val+ - static_cast(INCREMENT_VALUE*(ThreadsperBlock*numBlocks))); - - // Cleanup memory - free(hOData); - HIP_CHECK(hipFree(dOData)); - - return true; -} - -template -bool atomictestnoret_manywaves(const T& initial_val) { - unsigned int ThreadsperBlock = 10; - unsigned int numBlocks = 1; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIP_CHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - REQUIRE(hOData[0] == initial_val+ - (INCREMENT_VALUE*(ThreadsperBlock*numBlocks))); - - // Cleanup memory - free(hOData); - HIP_CHECK(hipFree(dOData)); - - return true; -} - -template -bool atomictest_simple(const T& initial_val) { - unsigned int ThreadsperBlock = 1; - unsigned int numBlocks = 1; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIP_CHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomic_simple, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE); - - // Cleanup memory - free(hOData); - HIP_CHECK(hipFree(dOData)); - - return true; -} - -template -bool atomictestnoret_simple(const T& initial_val) { - unsigned int ThreadsperBlock = 1; - unsigned int numBlocks = 1; - T memSize = sizeof(T); - T* hOData = reinterpret_cast(malloc(memSize)); - *hOData = initial_val; - T* dOData; - HIP_CHECK(hipMalloc(&dOData, memSize)); - // copy host memory to device to initialize to zero - HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); - - // execute the kernel - hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks), - dim3(ThreadsperBlock), 0, 0, dOData); - - // Copy result from device to host - HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); - REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE); - - // Cleanup memory - free(hOData); - HIP_CHECK(hipFree(dOData)); - - return true; -} - -TEST_CASE("Unit_hipTestAtomicAdd") { - bool TestPassed = true; - - SECTION("atomic tests with many waves") { - REQUIRE(TestPassed == atomictest_manywaves(INT_INITIAL_VALUE)); - REQUIRE(TestPassed == - atomictest_manywaves(UNSIGNED_INITIAL_VALUE)); - REQUIRE(TestPassed == atomictest_manywaves(FLOAT_INITIAL_VALUE)); - #if HT_AMD - REQUIRE(TestPassed == - atomictest_manywaves(LONG_INITIAL_VALUE)); - REQUIRE(TestPassed == - atomictest_manywaves(DOUBLE_INITIAL_VALUE)); - #endif - } - SECTION("atomic tests with many waves and no return") { - REQUIRE(TestPassed == - atomictestnoret_manywaves(FLOAT_INITIAL_VALUE)); - } - SECTION("simple atomic tests") { - REQUIRE(TestPassed == atomictest_simple(INT_INITIAL_VALUE)); - REQUIRE(TestPassed == - atomictest_simple(UNSIGNED_INITIAL_VALUE)); - REQUIRE(TestPassed == atomictest_simple(FLOAT_INITIAL_VALUE)); - #if HT_AMD - REQUIRE(TestPassed == - atomictest_simple(LONG_INITIAL_VALUE)); - REQUIRE(TestPassed == atomictest_simple(DOUBLE_INITIAL_VALUE)); - #endif - } - SECTION("Simple atomic test with no return") { - REQUIRE(TestPassed == atomictestnoret_simple(FLOAT_INITIAL_VALUE)); - } -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : + (TestCase 1):: + 1) Execute atomicAdd in multi threaded scenario by diverging the data across + multiple threads and validate the output at the end of all operations. + 2) Execute atomicAddNoRet in multi threaded scenario by diverging the data + across multiple threads and validate the output at the end of all operations. + (TestCase 2):: + 3) Execute atomicAdd API and validate the result. + 4) Execute atomicAddNoRet API and validate the result. + (TestCase 3):: + 5) atomicadd/NoRet negative scenarios (TBD). +*/ + +#include +#include +#include +/* + * Defines initial and increment values + */ +#define INCREMENT_VALUE 10 +#define INT_INITIAL_VALUE 10 +#define FLOAT_INITIAL_VALUE 10.50 +#define DOUBLE_INITIAL_VALUE 200.12 +#define LONG_INITIAL_VALUE 10000 +#define UNSIGNED_INITIAL_VALUE 20 + +#if HT_NVIDIA +// atomicAddNoRet is unavailable in cuda +template +__device__ void atomicAddNoRet(T* x, int y) { + atomicAdd(x, static_cast(y)); +} +#endif + +bool p_atomicNoRet = false; + +template +__global__ void atomicnoret_manywaves(T* C_d) { + atomicAddNoRet(C_d, INCREMENT_VALUE); +} + +template +__global__ void atomic_manywaves(T* C_d) { + atomicAdd(C_d, INCREMENT_VALUE); +} + +template +__global__ void atomicnoret_simple(T* C_d) { + atomicAddNoRet(C_d, INCREMENT_VALUE); +} + +template +__global__ void atomic_simple(T* C_d) { + atomicAdd(C_d, INCREMENT_VALUE); +} + +template +bool atomictest_manywaves(const T& initial_val) { + unsigned int ThreadsperBlock = 10; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+ + static_cast(INCREMENT_VALUE*(ThreadsperBlock*numBlocks))); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +template +bool atomictestnoret_manywaves(const T& initial_val) { + unsigned int ThreadsperBlock = 10; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+ + (INCREMENT_VALUE*(ThreadsperBlock*numBlocks))); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +template +bool atomictest_simple(const T& initial_val) { + unsigned int ThreadsperBlock = 1; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomic_simple, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +template +bool atomictestnoret_simple(const T& initial_val) { + unsigned int ThreadsperBlock = 1; + unsigned int numBlocks = 1; + T memSize = sizeof(T); + T* hOData = reinterpret_cast(malloc(memSize)); + *hOData = initial_val; + T* dOData; + HIP_CHECK(hipMalloc(&dOData, memSize)); + // copy host memory to device to initialize to zero + HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice)); + + // execute the kernel + hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks), + dim3(ThreadsperBlock), 0, 0, dOData); + + // Copy result from device to host + HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost)); + REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE); + + // Cleanup memory + free(hOData); + HIP_CHECK(hipFree(dOData)); + + return true; +} + +TEST_CASE("Unit_hipTestAtomicAdd") { + bool TestPassed = true; + + SECTION("atomic tests with many waves") { + REQUIRE(TestPassed == atomictest_manywaves(INT_INITIAL_VALUE)); + REQUIRE(TestPassed == + atomictest_manywaves(UNSIGNED_INITIAL_VALUE)); + REQUIRE(TestPassed == atomictest_manywaves(FLOAT_INITIAL_VALUE)); + #if HT_AMD + REQUIRE(TestPassed == + atomictest_manywaves(LONG_INITIAL_VALUE)); + REQUIRE(TestPassed == + atomictest_manywaves(DOUBLE_INITIAL_VALUE)); + #endif + } + SECTION("atomic tests with many waves and no return") { + REQUIRE(TestPassed == + atomictestnoret_manywaves(FLOAT_INITIAL_VALUE)); + } + SECTION("simple atomic tests") { + REQUIRE(TestPassed == atomictest_simple(INT_INITIAL_VALUE)); + REQUIRE(TestPassed == + atomictest_simple(UNSIGNED_INITIAL_VALUE)); + REQUIRE(TestPassed == atomictest_simple(FLOAT_INITIAL_VALUE)); + #if HT_AMD + REQUIRE(TestPassed == + atomictest_simple(LONG_INITIAL_VALUE)); + REQUIRE(TestPassed == atomictest_simple(DOUBLE_INITIAL_VALUE)); + #endif + } + SECTION("Simple atomic test with no return") { + REQUIRE(TestPassed == atomictestnoret_simple(FLOAT_INITIAL_VALUE)); + } +} diff --git a/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc b/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc index 26dd29c76c..5f7ad45008 100644 --- a/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc +++ b/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc @@ -1,51 +1,51 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - -#define LEN 512 -#define SIZE (LEN * sizeof(int64_t)) - -static __global__ void kernel1(int64_t* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = clock() + clock64() + __clock() + __clock64(); -} - -static __global__ void kernel2(int64_t* Ad) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid]; -} - -TEST_CASE("Unit_hipTestClock") { - int64_t *A, *Ad; - A = new int64_t[LEN]; - for (unsigned i = 0; i < LEN; i++) { - A[i] = 0; - } - HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), SIZE)); - HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); - hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); - HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); - for (unsigned i = 0; i < LEN; i++) { - assert(0 != A[i]); - } -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#define LEN 512 +#define SIZE (LEN * sizeof(int64_t)) + +static __global__ void kernel1(int64_t* Ad) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tid] = clock() + clock64() + __clock() + __clock64(); +} + +static __global__ void kernel2(int64_t* Ad) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid]; +} + +TEST_CASE("Unit_hipTestClock") { + int64_t *A, *Ad; + A = new int64_t[LEN]; + for (unsigned i = 0; i < LEN; i++) { + A[i] = 0; + } + HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), SIZE)); + HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); + hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); + HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); + for (unsigned i = 0; i < LEN; i++) { + assert(0 != A[i]); + } +} diff --git a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc index bae42209ef..10a0797f87 100644 --- a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc +++ b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc @@ -1,88 +1,88 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -#include "error_handling_common.hh" - -/** - * @addtogroup hipDrvGetErrorName hipDrvGetErrorName - * @{ - * @ingroup ErrorTest - * `hipDrvGetErrorName(hipError_t hip_error)` - - * Return hip error as text string form. - */ - -/** - * Test Description - * ------------------------ - * - Validate that the correct string is returned for each supported - * device error enumeration. - * Test source - * ------------------------ - * - unit/errorHandling/hipDrvGetErrorName.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.4 - */ -TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") { - const char* error_string = nullptr; - const auto enumerator = - GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); - INFO("Error: " << enumerator); - - HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string)); - - REQUIRE(error_string != nullptr); - REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0); -} - -/** - * Test Description - * ------------------------ - * - Validate handling of invalid arguments: - * -# When error enumerator is invalid (-1) - * - AMD expected output: return "hipErrorUnknown" - * - NVIDIA expected output: return "cudaErrorUnknown" - * -# When nullptr is passed as store location - * - Expected output: return "hipErrorInvalidValue" - * Test source - * ------------------------ - * - unit/errorHandling/hipDrvGetErrorName.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.4 - */ -TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") { - const char* error_string = nullptr; - SECTION("pass unknown value to hipError") { - HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast(-1), &error_string)), - hipErrorInvalidValue); - } -#if HT_AMD // segfaults on NVIDIA - SECTION("pass nullptr to error string") { - HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue); - } -#endif -} - -/** -* End doxygen group ErrorTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "error_handling_common.hh" + +/** + * @addtogroup hipDrvGetErrorName hipDrvGetErrorName + * @{ + * @ingroup ErrorTest + * `hipDrvGetErrorName(hipError_t hip_error)` - + * Return hip error as text string form. + */ + +/** + * Test Description + * ------------------------ + * - Validate that the correct string is returned for each supported + * device error enumeration. + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorName.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") { + const char* error_string = nullptr; + const auto enumerator = + GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); + + HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string)); + + REQUIRE(error_string != nullptr); + REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0); +} + +/** + * Test Description + * ------------------------ + * - Validate handling of invalid arguments: + * -# When error enumerator is invalid (-1) + * - AMD expected output: return "hipErrorUnknown" + * - NVIDIA expected output: return "cudaErrorUnknown" + * -# When nullptr is passed as store location + * - Expected output: return "hipErrorInvalidValue" + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorName.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") { + const char* error_string = nullptr; + SECTION("pass unknown value to hipError") { + HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast(-1), &error_string)), + hipErrorInvalidValue); + } +#if HT_AMD // segfaults on NVIDIA + SECTION("pass nullptr to error string") { + HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue); + } +#endif +} + +/** +* End doxygen group ErrorTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc index e81ced683a..04f1a833a1 100644 --- a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc +++ b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc @@ -1,88 +1,88 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -#include "error_handling_common.hh" - -/** - * @addtogroup hipDrvGetErrorString hipDrvGetErrorString - * @{ - * @ingroup ErrorTest - * `hipDrvGetErrorString(hipError_t hipError)` - - * Return handy text string message to explain the error which occurred. - */ - -/** - * Test Description - * ------------------------ - * - Validate that the correct string is returned for each supported - * device error enumeration. - * Test source - * ------------------------ - * - unit/errorHandling/hipDrvGetErrorString.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.4 - */ -TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") { - const char* error_string = nullptr; - const auto enumerator = - GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); - INFO("Error: " << enumerator); - - HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string)); - - REQUIRE(error_string != nullptr); - REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0); -} - -/** - * Test Description - * ------------------------ - * - Validate handling of invalid arguments: - * -# When error enumerator is invalid (-1) - * - Expected output: return "hipErrorInvalidValue" - * -# When nullptr is passed as store location - * - Expected output: return "hipErrorInvalidValue" - * Test source - * ------------------------ - * - unit/errorHandling/hipDrvGetErrorString.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.4 - */ -TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") { - const char* error_string = nullptr; - SECTION("pass unknown value to hipError") { - HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(-1), &error_string)), - hipErrorInvalidValue); - } -#if HT_AMD // segfaults on NVIDIA - SECTION("pass nullptr to error string") { - HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(0), nullptr)), - hipErrorInvalidValue); - } -#endif -} - -/** -* End doxygen group ErrorTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "error_handling_common.hh" + +/** + * @addtogroup hipDrvGetErrorString hipDrvGetErrorString + * @{ + * @ingroup ErrorTest + * `hipDrvGetErrorString(hipError_t hipError)` - + * Return handy text string message to explain the error which occurred. + */ + +/** + * Test Description + * ------------------------ + * - Validate that the correct string is returned for each supported + * device error enumeration. + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorString.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") { + const char* error_string = nullptr; + const auto enumerator = + GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); + + HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string)); + + REQUIRE(error_string != nullptr); + REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0); +} + +/** + * Test Description + * ------------------------ + * - Validate handling of invalid arguments: + * -# When error enumerator is invalid (-1) + * - Expected output: return "hipErrorInvalidValue" + * -# When nullptr is passed as store location + * - Expected output: return "hipErrorInvalidValue" + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorString.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") { + const char* error_string = nullptr; + SECTION("pass unknown value to hipError") { + HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(-1), &error_string)), + hipErrorInvalidValue); + } +#if HT_AMD // segfaults on NVIDIA + SECTION("pass nullptr to error string") { + HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(0), nullptr)), + hipErrorInvalidValue); + } +#endif +} + +/** +* End doxygen group ErrorTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/g++/CMakeLists.txt b/projects/hip-tests/catch/unit/g++/CMakeLists.txt index 5adf876616..ac33b11b9b 100644 --- a/projects/hip-tests/catch/unit/g++/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/g++/CMakeLists.txt @@ -1,19 +1,19 @@ -# AMD specific test -if(HIP_PLATFORM MATCHES "amd") -if(UNIX) -set(TEST_SRC - hipMalloc.cc -) -# Creating Custom object file -add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o) -add_library(malloc_gpp OBJECT IMPORTED) -set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o") - -hip_add_exe_to_target(NAME gppTests - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - LINKER_LIBS malloc_gpp) - -add_dependencies(gppTests malloc_custom) -endif() -endif() +# AMD specific test +if(HIP_PLATFORM MATCHES "amd") +if(UNIX) +set(TEST_SRC + hipMalloc.cc +) +# Creating Custom object file +add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o) +add_library(malloc_gpp OBJECT IMPORTED) +set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o") + +hip_add_exe_to_target(NAME gppTests + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS malloc_gpp) + +add_dependencies(gppTests malloc_custom) +endif() +endif() diff --git a/projects/hip-tests/catch/unit/g++/hipMalloc.cc b/projects/hip-tests/catch/unit/g++/hipMalloc.cc index 37a66e22c8..e0cd60306b 100644 --- a/projects/hip-tests/catch/unit/g++/hipMalloc.cc +++ b/projects/hip-tests/catch/unit/g++/hipMalloc.cc @@ -1,54 +1,54 @@ -/* - * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -#include - -#include "hipMalloc.h" -/** - * @addtogroup hipMalloc hipMalloc - * @{ - * @ingroup MemoryTest - * `hipError_t hipMalloc(void** ptr, size_t size)` - - * Allocate memory on the default accelerator. - * @} - */ - -/** - * Test Description - * ------------------------ - * - Allocate memory by using hipMalloc API and verify hipSuccess is returned. - - * Test source - * ------------------------ - * - catch/unit/g++/hipMalloc.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - */ - -TEST_CASE("Unit_hipMalloc_gpptest") { - printf("calling cpp function from here\n"); - int result = MallocFunc(); - REQUIRE(result == 1); -} - -/** -* End doxygen group MemoryTest. -* @} -*/ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +#include + +#include "hipMalloc.h" +/** + * @addtogroup hipMalloc hipMalloc + * @{ + * @ingroup MemoryTest + * `hipError_t hipMalloc(void** ptr, size_t size)` - + * Allocate memory on the default accelerator. + * @} + */ + +/** + * Test Description + * ------------------------ + * - Allocate memory by using hipMalloc API and verify hipSuccess is returned. + + * Test source + * ------------------------ + * - catch/unit/g++/hipMalloc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +TEST_CASE("Unit_hipMalloc_gpptest") { + printf("calling cpp function from here\n"); + int result = MallocFunc(); + REQUIRE(result == 1); +} + +/** +* End doxygen group MemoryTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/g++/hipMalloc.h b/projects/hip-tests/catch/unit/g++/hipMalloc.h index 458e489c5a..9dee5043ec 100644 --- a/projects/hip-tests/catch/unit/g++/hipMalloc.h +++ b/projects/hip-tests/catch/unit/g++/hipMalloc.h @@ -1,22 +1,22 @@ -/* - * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -#include - +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +#include + extern int MallocFunc(); \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/gcc/CMakeLists.txt b/projects/hip-tests/catch/unit/gcc/CMakeLists.txt index 90e9c2db65..5c8385cee8 100644 --- a/projects/hip-tests/catch/unit/gcc/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/gcc/CMakeLists.txt @@ -1,28 +1,28 @@ -# Common Tests - Test independent of all platforms -if(HIP_PLATFORM MATCHES "amd") -if(UNIX) -set(TEST_SRC - gccTest.cc - gpu.cpp -) -# Creating Custom object file -add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o) -add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o) -add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o) -add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o) - -add_library(LaunchKernel_lib OBJECT IMPORTED) -add_library(hipMalloc_lib OBJECT IMPORTED) - -set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o") -set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o") - - -hip_add_exe_to_target(NAME gccTests - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - LINKER_LIBS LaunchKernel_lib hipMalloc_lib) - -add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom) -endif() -endif() +# Common Tests - Test independent of all platforms +if(HIP_PLATFORM MATCHES "amd") +if(UNIX) +set(TEST_SRC + gccTest.cc + gpu.cpp +) +# Creating Custom object file +add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o) +add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o) +add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o) +add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o) + +add_library(LaunchKernel_lib OBJECT IMPORTED) +add_library(hipMalloc_lib OBJECT IMPORTED) + +set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o") +set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o") + + +hip_add_exe_to_target(NAME gccTests + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS LaunchKernel_lib hipMalloc_lib) + +add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom) +endif() +endif() diff --git a/projects/hip-tests/catch/unit/gcc/gccTest.cc b/projects/hip-tests/catch/unit/gcc/gccTest.cc index 6332540682..f520419c75 100644 --- a/projects/hip-tests/catch/unit/gcc/gccTest.cc +++ b/projects/hip-tests/catch/unit/gcc/gccTest.cc @@ -1,64 +1,64 @@ -/* - * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * */ - -#include - -extern "C" { -#include "LaunchKernel.h" -} - -/** - * Test Description - * ------------------------ - * - calling launchKernel which is c function from catch2 - * and compile with gcc compiler and verify the results. - - * Test source - * ------------------------ - * - catch/unit/gcc/gccTest.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - */ - -TEST_CASE("Unit_LaunchKernelgccTests") { - printf("Calling launchKernel files from here\n"); - int result = launchKernel(); - REQUIRE(result == 1); -} - -/** - * Test Description - * ------------------------ - * - Calling hipMalloc which is c file from catch2 and compile - * with gcc compiler and verify the results. - - * Test source - * ------------------------ - * - catch/unit/gcc/gccTest.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - */ - -TEST_CASE("Unit_hipMallocgccTests") { - printf("Calling hipMalloc files from here\n"); - int result = hipMallocfunc(); - REQUIRE(result == 1); -} +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +#include + +extern "C" { +#include "LaunchKernel.h" +} + +/** + * Test Description + * ------------------------ + * - calling launchKernel which is c function from catch2 + * and compile with gcc compiler and verify the results. + + * Test source + * ------------------------ + * - catch/unit/gcc/gccTest.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +TEST_CASE("Unit_LaunchKernelgccTests") { + printf("Calling launchKernel files from here\n"); + int result = launchKernel(); + REQUIRE(result == 1); +} + +/** + * Test Description + * ------------------------ + * - Calling hipMalloc which is c file from catch2 and compile + * with gcc compiler and verify the results. + + * Test source + * ------------------------ + * - catch/unit/gcc/gccTest.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +TEST_CASE("Unit_hipMallocgccTests") { + printf("Calling hipMalloc files from here\n"); + int result = hipMallocfunc(); + REQUIRE(result == 1); +} diff --git a/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc b/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc index e25938d51c..a96990890c 100644 --- a/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc +++ b/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc @@ -1,176 +1,176 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - - -#pragma clang diagnostic ignored "-Wunused-parameter" - -unsigned threadsPerBlock = 256; - -template -__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) { - T tmp; - if (groupElements < batch) - return; - // sdata[tid] += sdata[tid - batch/2] does not work when block size is - // greater than wave size because one wave may complete before another - // wave. - if (tid >= batch/2 && tid < groupElements) - tmp = sdata[tid - batch/2]; - __syncthreads(); - if (tid >= batch/2 && tid < groupElements) - sdata[tid] += tmp; - __syncthreads(); -} - -template -__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d, - size_t numElements, size_t groupElements) { - // declare dynamic shared memory - extern __shared__ double sdata0[]; - T* sdata = reinterpret_cast(sdata0); - - size_t gid = (blockIdx.x * blockDim.x + threadIdx.x); - size_t tid = threadIdx.x; - - // initialize dynamic shared memory - if (tid < groupElements) { - sdata[tid] = static_cast(tid); - } - __syncthreads(); - - // prefix sum inside dynamic shared memory - sum<512>(sdata, groupElements, tid); - sum<256>(sdata, groupElements, tid); - sum<128>(sdata, groupElements, tid); - sum<64>(sdata, groupElements, tid); - sum<32>(sdata, groupElements, tid); - sum<16>(sdata, groupElements, tid); - sum<8>(sdata, groupElements, tid); - sum<4>(sdata, groupElements, tid); - sum<2>(sdata, groupElements, tid); - C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements]; -} - -template -void testExternShared(size_t N, unsigned groupElements) { - size_t Nbytes = N * sizeof(T); - - T *A_d, *B_d, *C_d; - T *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = N/threadsPerBlock; - assert(N == blocks * threadsPerBlock); - - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - // calculate the amount of dynamic shared memory required - size_t groupMemBytes = groupElements * sizeof(T); - - // launch kernel with dynamic shared memory - hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel), dim3(blocks), - dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d, - N, groupElements); - - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - - // verify - for (size_t i = 0; i < N; ++i) { - size_t tid = (i % min(threadsPerBlock, groupElements)); - T sumFromSharedMemory = static_cast(tid * (tid + 1) / 2); - T expected = A_h[i] + B_h[i] + sumFromSharedMemory; - REQUIRE(C_h[i] == expected); - } - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); -} - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - launch kernel with dynamic shared memory for float and double - * datatypes and verify the results. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipDynamicShared.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipDynamicShared") { - SECTION("test case with float for least size") { - testExternShared(1024, 4); - testExternShared(1024, 8); - testExternShared(1024, 16); - testExternShared(1024, 32); - testExternShared(1024, 64); - } - - SECTION("test case with float for max size") { - testExternShared(65536, 4); - testExternShared(65536, 8); - testExternShared(65536, 16); - testExternShared(65536, 32); - testExternShared(65536, 64); - } - - SECTION("test case with double for least size") { - testExternShared(1024, 4); - testExternShared(1024, 8); - testExternShared(1024, 16); - testExternShared(1024, 32); - testExternShared(1024, 64); - } - - SECTION("test case with double for max size") { - testExternShared(65536, 4); - testExternShared(65536, 8); - testExternShared(65536, 16); - testExternShared(65536, 32); - testExternShared(65536, 64); - } - - SECTION("test case with float for max LDS size") { - int maxLDS = 0; - HIP_CHECK(hipDeviceGetAttribute(&maxLDS, - hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); - testExternShared(1024, maxLDS/sizeof(float)); - } -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + + +#pragma clang diagnostic ignored "-Wunused-parameter" + +unsigned threadsPerBlock = 256; + +template +__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) { + T tmp; + if (groupElements < batch) + return; + // sdata[tid] += sdata[tid - batch/2] does not work when block size is + // greater than wave size because one wave may complete before another + // wave. + if (tid >= batch/2 && tid < groupElements) + tmp = sdata[tid - batch/2]; + __syncthreads(); + if (tid >= batch/2 && tid < groupElements) + sdata[tid] += tmp; + __syncthreads(); +} + +template +__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d, + size_t numElements, size_t groupElements) { + // declare dynamic shared memory + extern __shared__ double sdata0[]; + T* sdata = reinterpret_cast(sdata0); + + size_t gid = (blockIdx.x * blockDim.x + threadIdx.x); + size_t tid = threadIdx.x; + + // initialize dynamic shared memory + if (tid < groupElements) { + sdata[tid] = static_cast(tid); + } + __syncthreads(); + + // prefix sum inside dynamic shared memory + sum<512>(sdata, groupElements, tid); + sum<256>(sdata, groupElements, tid); + sum<128>(sdata, groupElements, tid); + sum<64>(sdata, groupElements, tid); + sum<32>(sdata, groupElements, tid); + sum<16>(sdata, groupElements, tid); + sum<8>(sdata, groupElements, tid); + sum<4>(sdata, groupElements, tid); + sum<2>(sdata, groupElements, tid); + C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements]; +} + +template +void testExternShared(size_t N, unsigned groupElements) { + size_t Nbytes = N * sizeof(T); + + T *A_d, *B_d, *C_d; + T *A_h, *B_h, *C_h; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = N/threadsPerBlock; + assert(N == blocks * threadsPerBlock); + + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + // calculate the amount of dynamic shared memory required + size_t groupMemBytes = groupElements * sizeof(T); + + // launch kernel with dynamic shared memory + hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel), dim3(blocks), + dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d, + N, groupElements); + + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + // verify + for (size_t i = 0; i < N; ++i) { + size_t tid = (i % min(threadsPerBlock, groupElements)); + T sumFromSharedMemory = static_cast(tid * (tid + 1) / 2); + T expected = A_h[i] + B_h[i] + sumFromSharedMemory; + REQUIRE(C_h[i] == expected); + } + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); +} + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - launch kernel with dynamic shared memory for float and double + * datatypes and verify the results. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipDynamicShared.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipDynamicShared") { + SECTION("test case with float for least size") { + testExternShared(1024, 4); + testExternShared(1024, 8); + testExternShared(1024, 16); + testExternShared(1024, 32); + testExternShared(1024, 64); + } + + SECTION("test case with float for max size") { + testExternShared(65536, 4); + testExternShared(65536, 8); + testExternShared(65536, 16); + testExternShared(65536, 32); + testExternShared(65536, 64); + } + + SECTION("test case with double for least size") { + testExternShared(1024, 4); + testExternShared(1024, 8); + testExternShared(1024, 16); + testExternShared(1024, 32); + testExternShared(1024, 64); + } + + SECTION("test case with double for max size") { + testExternShared(65536, 4); + testExternShared(65536, 8); + testExternShared(65536, 16); + testExternShared(65536, 32); + testExternShared(65536, 64); + } + + SECTION("test case with float for max LDS size") { + int maxLDS = 0; + HIP_CHECK(hipDeviceGetAttribute(&maxLDS, + hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + testExternShared(1024, maxLDS/sizeof(float)); + } +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc b/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc index 248b7a0dbc..3fa8dad04a 100644 --- a/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc +++ b/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc @@ -1,94 +1,94 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - - -#define LEN (16 * 1024) -#define SIZE (LEN * sizeof(float)) - -__global__ void vectorAdd(float* Ad, float* Bd) { - extern __shared__ float sBd[]; - int tx = threadIdx.x; - for (int i = 0; i < LEN / 64; i++) { - sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f; - Bd[tx + i * 64] = sBd[tx + i * 64]; - } -} - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - Assign max dynamic shared memory to kernel function and - * verify the results. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipDynamicShared2.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipDynamicShared2") { - float *A, *B, *Ad, *Bd; - A = new float[LEN]; - B = new float[LEN]; - for (int i = 0; i < LEN; i++) { - A[i] = 1.0f; - B[i] = 1.0f; - } - HIP_CHECK(hipMalloc(&Ad, SIZE)); - HIP_CHECK(hipMalloc(&Bd, SIZE)); - HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); - - hipError_t ret = hipFuncSetAttribute( - reinterpret_cast(&vectorAdd), - hipFuncAttributeMaxDynamicSharedMemorySize, SIZE); - - REQUIRE(ret == hipSuccess); - hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd); - HIP_CHECK(hipGetLastError()); - HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); - for (int i = 0; i < LEN; i++) { - assert(B[i] > 1.0f && B[i] < 3.0f); - } - HIP_CHECK(hipFree(Ad)); - HIP_CHECK(hipFree(Bd)); - - delete[] A; - delete[] B; -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + + +#define LEN (16 * 1024) +#define SIZE (LEN * sizeof(float)) + +__global__ void vectorAdd(float* Ad, float* Bd) { + extern __shared__ float sBd[]; + int tx = threadIdx.x; + for (int i = 0; i < LEN / 64; i++) { + sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f; + Bd[tx + i * 64] = sBd[tx + i * 64]; + } +} + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - Assign max dynamic shared memory to kernel function and + * verify the results. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipDynamicShared2.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipDynamicShared2") { + float *A, *B, *Ad, *Bd; + A = new float[LEN]; + B = new float[LEN]; + for (int i = 0; i < LEN; i++) { + A[i] = 1.0f; + B[i] = 1.0f; + } + HIP_CHECK(hipMalloc(&Ad, SIZE)); + HIP_CHECK(hipMalloc(&Bd, SIZE)); + HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice)); + + hipError_t ret = hipFuncSetAttribute( + reinterpret_cast(&vectorAdd), + hipFuncAttributeMaxDynamicSharedMemorySize, SIZE); + + REQUIRE(ret == hipSuccess); + hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost)); + for (int i = 0; i < LEN; i++) { + assert(B[i] > 1.0f && B[i] < 3.0f); + } + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); + + delete[] A; + delete[] B; +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc b/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc index 478f39550a..d26a94b197 100644 --- a/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc +++ b/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc @@ -1,59 +1,59 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - - -#pragma clang diagnostic ignored "-Wunused-parameter" - -__global__ void Empty(int param) {} - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - pass empty Kernel function. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipEmptyKernel.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipEmptyKernel") { - hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0); - HIP_CHECK(hipDeviceSynchronize()); -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + + +#pragma clang diagnostic ignored "-Wunused-parameter" + +__global__ void Empty(int param) {} + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - pass empty Kernel function. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipEmptyKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipEmptyKernel") { + hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0); + HIP_CHECK(hipDeviceSynchronize()); +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc b/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc index 962b58e605..9bdf7ebc76 100644 --- a/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc +++ b/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc @@ -1,138 +1,138 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test the Grid_Launch syntax. - -#include -#include -#include - -#include "hip/hip_ext.h" - -static unsigned threadsPerBlock = 256; -static unsigned blocksPerCU = 6; - -struct _t { - double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j; -}; - -typedef struct _t _T; - -__global__ void sKernel(_T s, double *a) { - *a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j; -} - -__global__ void mKernel(char f, int16_t a, int b, double c, - int16_t d, int e, double* res) { - *res = a + b + c + d + e + f; -} - -void testMixData() { - double m = 0; - double *d_m; - HIP_CHECK(hipMalloc(&d_m, sizeof(double))); - int a = 1, e = 10; - int16_t b = 2, d = 4; - double c = 3.0; - char ff = 10; - hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff, - b, a, c, d, e, d_m); - HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); - REQUIRE(m == 30.0); - HIP_CHECK(hipFree(d_m)); -} - -void testStruct() { - double m = 0; - double *d_m; - HIP_CHECK(hipMalloc(&d_m, sizeof(double))); - _T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m); - HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); - REQUIRE(m == 55.0); - HIP_CHECK(hipFree(d_m)); -} - -void test(size_t N) { - size_t Nbytes = N * sizeof(int); - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - - hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), - dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0, - static_cast(A_d), - static_cast(B_d), C_d, N); - - HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - HipTest::checkVectorADD(A_h, B_h, C_h, N); -} - -/** -* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, - hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags, - Args... args)` - -* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments -*/ - -/** - * Test Description - * ------------------------ - * - Test case to verify sample array with hipExtLaunchKernelGGL() - * and verify the results. - * - Test case to verify struct data with hipExtLaunchKernelGGL() - * and verify the results. - * - Test case to verify mix datatypes with hipExtLaunchKernelGGL() - * and verify the results. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipExtLaunchKernelGGL.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipExtLaunchKernelGGL") { - SECTION("test run") { - size_t N = 4 * 1024 * 1024; - test(N); - } - SECTION("testStruct run") { - testStruct(); - } - SECTION("testMixData run") { - testMixData(); - } -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test the Grid_Launch syntax. + +#include +#include +#include + +#include "hip/hip_ext.h" + +static unsigned threadsPerBlock = 256; +static unsigned blocksPerCU = 6; + +struct _t { + double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j; +}; + +typedef struct _t _T; + +__global__ void sKernel(_T s, double *a) { + *a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j; +} + +__global__ void mKernel(char f, int16_t a, int b, double c, + int16_t d, int e, double* res) { + *res = a + b + c + d + e + f; +} + +void testMixData() { + double m = 0; + double *d_m; + HIP_CHECK(hipMalloc(&d_m, sizeof(double))); + int a = 1, e = 10; + int16_t b = 2, d = 4; + double c = 3.0; + char ff = 10; + hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff, + b, a, c, d, e, d_m); + HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); + REQUIRE(m == 30.0); + HIP_CHECK(hipFree(d_m)); +} + +void testStruct() { + double m = 0; + double *d_m; + HIP_CHECK(hipMalloc(&d_m, sizeof(double))); + _T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m); + HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost)); + REQUIRE(m == 55.0); + HIP_CHECK(hipFree(d_m)); +} + +void test(size_t N) { + size_t Nbytes = N * sizeof(int); + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), + dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0, + static_cast(A_d), + static_cast(B_d), C_d, N); + + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, N); +} + +/** +* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, + hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags, + Args... args)` - +* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify sample array with hipExtLaunchKernelGGL() + * and verify the results. + * - Test case to verify struct data with hipExtLaunchKernelGGL() + * and verify the results. + * - Test case to verify mix datatypes with hipExtLaunchKernelGGL() + * and verify the results. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipExtLaunchKernelGGL.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipExtLaunchKernelGGL") { + SECTION("test run") { + size_t N = 4 * 1024 * 1024; + test(N); + } + SECTION("testStruct run") { + testStruct(); + } + SECTION("testMixData run") { + testMixData(); + } +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc b/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc index 29667aa60b..91ba18a4a1 100644 --- a/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc +++ b/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc @@ -1,122 +1,122 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Test the Grid_Launch syntax. - -#include -#include -#include - - -static unsigned threadsPerBlock = 256; -static unsigned blocksPerCU = 6; - -// __device__ maps to __attribute__((hc)) -__device__ int foo(int i) { return i + 1; } - - -template -__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) { - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N; i += stride) { - double foo = __hiloint2double(A_d[i], B_d[i]); - C_d[i] = __double2loint(foo) + __double2hiint(foo); - } -} - -int test_gl2(size_t N) { - size_t Nbytes = N * sizeof(int); - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - // Full vadd in one large chunk, to get things started: - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock), - 0, 0, A_d, B_d, C_d, N); - HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - // verify - HipTest::checkVectorADD(A_h, B_h, C_h, N); - return 0; -} - -#if __HIP__ -int test_triple_chevron(size_t N) { - size_t Nbytes = N * sizeof(int); - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); - - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - // Full vadd in one large chunk, to get things started: - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - vectorADD2<<>>(A_d, B_d, C_d, N); - HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - // verify - HipTest::checkVectorADD(A_h, B_h, C_h, N); - return 0; -} -#endif - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - Test case to verify the Grid_Launch syntax. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipGridLaunch.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipGridLaunch") { - size_t N = 4 * 1024 * 1024; - SECTION("Test test_gl2") { - test_gl2(N); - } - -#if __HIP__ - SECTION("Test triple_chevron") { - test_triple_chevron(N); - } -#endif -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test the Grid_Launch syntax. + +#include +#include +#include + + +static unsigned threadsPerBlock = 256; +static unsigned blocksPerCU = 6; + +// __device__ maps to __attribute__((hc)) +__device__ int foo(int i) { return i + 1; } + + +template +__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + double foo = __hiloint2double(A_d[i], B_d[i]); + C_d[i] = __double2loint(foo) + __double2hiint(foo); + } +} + +int test_gl2(size_t N) { + size_t Nbytes = N * sizeof(int); + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + // Full vadd in one large chunk, to get things started: + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock), + 0, 0, A_d, B_d, C_d, N); + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + // verify + HipTest::checkVectorADD(A_h, B_h, C_h, N); + return 0; +} + +#if __HIP__ +int test_triple_chevron(size_t N) { + size_t Nbytes = N * sizeof(int); + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + // Full vadd in one large chunk, to get things started: + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + vectorADD2<<>>(A_d, B_d, C_d, N); + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + // verify + HipTest::checkVectorADD(A_h, B_h, C_h, N); + return 0; +} +#endif + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - Test case to verify the Grid_Launch syntax. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipGridLaunch.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipGridLaunch") { + size_t N = 4 * 1024 * 1024; + SECTION("Test test_gl2") { + test_gl2(N); + } + +#if __HIP__ + SECTION("Test triple_chevron") { + test_triple_chevron(N); + } +#endif +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc b/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc index 0308c087d1..2ef83a8713 100644 --- a/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc +++ b/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc @@ -1,111 +1,111 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - -#include - -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wuninitialized" - -// Simple tests for variable type qualifiers: -__device__ int deviceVar; - -// TODO-HCC __constant__ not working yet. -__constant__ int constantVar1; - -__constant__ __device__ int constantVar2; - -// Test HOST space: -__host__ void foo() { printf("foo!\n"); } - -__device__ __noinline__ int sum1_noinline(int a) { return a + 1; } -__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; } - - -__device__ __host__ float PlusOne(float x) { return x + 1.0; } - -__global__ void MyKernel(const float* a, const float* b, float* c, - unsigned N) { - unsigned gid = threadIdx.x; - if (gid < N) { - c[gid] = a[gid] + PlusOne(b[gid]); - } -} - -void callMyKernel() { - float *a, *b, *c; - const unsigned blockSize = 256; - unsigned N = blockSize; - hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize), - 0, 0, a, b, c, N); -} - -template -__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) { -#ifdef NOT_YET - int a = __shfl_up(x, 1); -#endif - float x = 1.0; -#ifdef NOT_YET - float fastZ = __sin(x); -#endif - __syncthreads(); - - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i = offset; i < N; i += stride) { - C_d[i] = A_d[i] + B_d[i]; - } -} - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - Collection of code to make sure that various features - * in the hip kernel language compile. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipLanguageExtensions.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipLanguageExtensions") { - REQUIRE(true); -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wuninitialized" + +// Simple tests for variable type qualifiers: +__device__ int deviceVar; + +// TODO-HCC __constant__ not working yet. +__constant__ int constantVar1; + +__constant__ __device__ int constantVar2; + +// Test HOST space: +__host__ void foo() { printf("foo!\n"); } + +__device__ __noinline__ int sum1_noinline(int a) { return a + 1; } +__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; } + + +__device__ __host__ float PlusOne(float x) { return x + 1.0; } + +__global__ void MyKernel(const float* a, const float* b, float* c, + unsigned N) { + unsigned gid = threadIdx.x; + if (gid < N) { + c[gid] = a[gid] + PlusOne(b[gid]); + } +} + +void callMyKernel() { + float *a, *b, *c; + const unsigned blockSize = 256; + unsigned N = blockSize; + hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize), + 0, 0, a, b, c, N); +} + +template +__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) { +#ifdef NOT_YET + int a = __shfl_up(x, 1); +#endif + float x = 1.0; +#ifdef NOT_YET + float fastZ = __sin(x); +#endif + __syncthreads(); + + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N; i += stride) { + C_d[i] = A_d[i] + B_d[i]; + } +} + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - Collection of code to make sure that various features + * in the hip kernel language compile. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipLanguageExtensions.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipLanguageExtensions") { + REQUIRE(true); +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc b/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc index 86d574778c..016454eeea 100644 --- a/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc +++ b/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc @@ -1,1019 +1,1019 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - -#include - -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wunused-result" -#pragma clang diagnostic ignored "-Wuninitialized" - -// Memory alignment is broken -// Update: with latest changes the aligment is working fine, hence enabled -#define ENABLE_ALIGNMENT_TEST_SMALL_BAR 1 - -// Packed member atribute broken -#define ENABLE_PACKED_TEST 0 - -// Update: with latest changes struct class object -// from device is working fine, hence enabled -#define ENABLE_CLASS_OBJ_ACCESS 1 - -// accessing dynamic/heap memory from device is broken -#define ENABLE_HEAP_MEMORY_ACCESS 0 - -// Update: with latest changes it's working hence enabled -#define ENABLE_USER_STL 1 - -// Update: with latest changes it's working hence enabled -#define ENABLE_OUT_OF_ORDER_INITIALIZATION 1 - -// Direct initialization of struct broken, -// ip_d9 is a pointer, uint_t*, hipLaunchKernelStruct_h9 = {'c', ip_d9}; -#define ENABLE_DECLARE_INITIALIZATION_POINTER 0 - -// Bit fields are broken -#define ENABLE_BIT_FIELDS 0 - -static const int BLOCK_DIM_SIZE = 512; - -// allocate memory on device and host for result validation -static bool *result_d, *result_h; - -static hipError_t hipMallocError = hipErrorUnknown; -static hipError_t hipHostMallocError = hipErrorUnknown; -static hipError_t hipMemsetError = hipErrorUnknown; - -static void ResultValidation() { - HIP_CHECK(hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - REQUIRE(result_h[k] == true); - } - return; -} - -// Segregating the reset part as it was causing a problem when i put inside -// ResultValidation() function, the memory was not reset correctly for the -// tests which were disabled. -static void ResetValidationMem() { - // reset the memory to false to reuse it. - HIP_CHECK(hipMemset(result_d, false, BLOCK_DIM_SIZE)); - HIP_CHECK(hipMemset(result_h, false, BLOCK_DIM_SIZE)); - return; -} - -// This test is to verify Struct with variables -// support, read from device. -typedef struct hipLaunchKernelStruct1 { - int li; // local int - float lf; // local float - bool result; // local bool -} hipLaunchKernelStruct_t1; - -// This test is to verify struct with padding, read from device -typedef struct hipLaunchKernelStruct2 { - char c1; - int64_t l1; - char c2; - int64_t l2; - bool result; -} hipLaunchKernelStruct_t2; - -// This test is to verify struct with padding, read from device -typedef struct hipLaunchKernelStruct3 { - char bf1; - char bf2; - int64_t l1; - char bf3; - bool result; -} hipLaunchKernelStruct_t3; - -// This test is to verify empty struct -typedef struct hipLaunchKernelStruct4 { - // empty struct, size will be verified from device side,size 1Byte -} hipLaunchKernelStruct_t4; - -// This test is to verify struct with pointer member variable. -typedef struct hipLaunchKernelStruct5 { - char c1; - char* cp; // char pointer -} hipLaunchKernelStruct_t5; - - -// This test is to verify struct with aligned(8), -// right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct6 { - char c1; - int16_t si; -} __attribute__((aligned(8))) hipLaunchKernelStruct_t6; - -// This test is to verify struct with aligned(16), -// right now it's brokenon hcc & hip-clang -typedef struct hipLaunchKernelStruct7 { - char c1; - int16_t si; -} __attribute__((aligned(16))) hipLaunchKernelStruct_t7; - -// This test is to verify struct with packed & aligned, -// size should be 4Bytes right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct8 { - char c1; - int16_t si; - bool b; -}__attribute__((packed, aligned(4))) hipLaunchKernelStruct_t8; - -// This test is to verify struct with packed, no alignment as Sam suggested -// size should be 4Bytes, right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct8A { - char c1; - int16_t si; - bool b; -}__attribute__((packed)) hipLaunchKernelStruct_t8A; - -// This test is to verify struct with alignment, no packing as Sam suggested -// size should be 8Bytes as no packing, right now it's broken on hcc & hip-clang -typedef struct hipLaunchKernelStruct8B { - char c1; - int16_t si; - bool b; -}__attribute__((aligned(8))) hipLaunchKernelStruct_t8B; - -// This test is to verify const struct object -typedef struct hipLaunchKernelStruct9 { - char c1; - uint32_t* ip; // uint pointer -} hipLaunchKernelStruct_t9; - -// This test is to verify struct with stdint types, uintN_t -typedef struct hipLaunchKernelStruct10 { - uint64_t u64; - uint32_t u32; - uint8_t u8; -} hipLaunchKernelStruct_t10; - -// This test is to verify struct with volatile member -typedef struct hipLaunchKernelStruct11 { - int i1; - volatile unsigned int vint; -} hipLaunchKernelStruct_t11; - -// This test is to verify struct with simple class object -class base { - public: - int i = 0; - base() {} -}; -typedef struct hipLaunchKernelStruct12 { - base b; - char c1; -} hipLaunchKernelStruct_t12; - -// This test is to verify struct with __device__ func() attribute -typedef struct hipLaunchKernelStruct13 { - int i1; - __device__ int getvalue() { return i1; } -} hipLaunchKernelStruct_t13; - -// This test is to verify struct with array variable, -// write to from device -typedef struct hipLaunchKernelStruct14 { - int readint; - int writeint[BLOCK_DIM_SIZE]; // will write to this from device -} hipLaunchKernelStruct_t14; - -// This test is to verify struct with dynamic memory, new int -// the heap memory will be accessed from device -typedef struct hipLaunchKernelStruct15 { - char c1; - int* heapmem; // allocated using hipMalloc() -} hipLaunchKernelStruct_t15; - -// This test is to verify simple template struct -template -struct hipLaunchKernelStruct_t16 { - T t1; -}; - -// This test is to verify simple explicity template struct -template struct hipLaunchKernelStruct_t17 {}; -template<> // explicit template -struct hipLaunchKernelStruct_t17 { - int t1; -}; - -// This test is to verity write to struct memory using __device__ func() -typedef struct hipLaunchKernelStruct18 { - char c1; - __device__ void setChar(char c) { c1 = c; } - __device__ int getChar() { return c1; } -} hipLaunchKernelStruct_t18; - -// This test is to verity user defined STL, simple stack implementation -typedef struct stackNode { - int data; - stackNode* nextNode = NULL; -} stackNode_t; -typedef struct hipLaunchKernelStruct19 { - stackNode_t* stack = NULL; - unsigned int size_ = 0; - void pushMe(int value) { // not a device function, setting from host - stackNode_t* newNode; - HIP_CHECK(hipMalloc(reinterpret_cast(&newNode), - sizeof(stackNode_t))); - HIP_CHECK(hipMemset(&newNode->data, value, sizeof(stackNode_t))); - // newNode->data = value; - ++size_; - if (stack == NULL) { - stack = newNode; - return; - } - stackNode_t* currentHead = stack; - stack = newNode; - stack->nextNode = currentHead; - return; - } - __device__ void popMe() { - stackNode_t* currentHead = stack; - stack = stack->nextNode; - --size_; - // delete currentHead; // no idea why delete not working - return; - } - int stackSize() { - return size_; - } -} hipLaunchKernelStruct_t19; - -// This test is to verify out of order initalizer of struct elements -// and access in-order, from device. -typedef struct hipLaunchKernelStruct20 { - char name; - int age; - int rank; -} hipLaunchKernelStruct_t20; - -// This test is to verify bit fields operations -// the size should be 1Bytes -typedef struct hipLaunchKernelStruct21 { - int i : 3; // limiting bits to 3 - int j : 2; // limiting bits to 2 -} hipLaunchKernelStruct_t21; - -// Passing struct to a hipLaunchKernelGGL(), -// read and write into the same struct -__global__ void hipLaunchKernelStructFunc1( - hipLaunchKernelStruct_t1 hipLaunchKernelStruct_, - bool* result_d1) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d1[x] = ((hipLaunchKernelStruct_.li == 1) - && (hipLaunchKernelStruct_.lf == 1.0) - && (hipLaunchKernelStruct_.result == false)); -} - -// Passing struct to a hipLaunchKernelGGL(), checks padding, -// read and write into the same struct -__global__ void hipLaunchKernelStructFunc2( - hipLaunchKernelStruct_t2 hipLaunchKernelStruct_, - bool* result_d2) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d2[x] = ((hipLaunchKernelStruct_.c1 == 'a') - && (hipLaunchKernelStruct_.l1 == 1.0) - && (hipLaunchKernelStruct_.c2 == 'b') - && (hipLaunchKernelStruct_.l2 == 2.0) ); -} - -// Passing struct to a hipLaunchKernelGGL(), checks padding, -// read and write into the same struct -__global__ void hipLaunchKernelStructFunc3( - hipLaunchKernelStruct_t3 hipLaunchKernelStruct_, - bool* result_d3) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d3[x] = ((hipLaunchKernelStruct_.bf1 == 1) - && (hipLaunchKernelStruct_.bf2 == 1) - && (hipLaunchKernelStruct_.l1 == 1.0) - && (hipLaunchKernelStruct_.bf3 == 1) ); -} - -// Passing empty struct to a hipLaunchKernelGGL(), -// check the size of 1Byte, set result_d4 to true if condition met -__global__ void hipLaunchKernelStructFunc4( - hipLaunchKernelStruct_t4 hipLaunchKernelStruct_, - bool* result_d4) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d4[x] = (sizeof(hipLaunchKernelStruct_) == 1); -} - -// Passing struct with pointer object to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc5( - hipLaunchKernelStruct_t5 hipLaunchKernelStruct_, - bool* result_d5) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d5[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (*hipLaunchKernelStruct_.cp == 'p')); -} - -// Passing struct which is aligned to 8Byte to a hipLaunchKernelGGL(), -// set the result_d6 to true if condition met -__global__ void hipLaunchKernelStructFunc6( - hipLaunchKernelStruct_t6 hipLaunchKernelStruct_, - bool* result_d6) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // get the address of the struct - // size_t(p)%8 will be 0 if aligned to 8Byte address space - int *p = reinterpret_cast(&hipLaunchKernelStruct_); - result_d6[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%8 ==0)); -} - -// Passing struct which is aligned to 16Byte, -// set the result_d7 to true if condition met -__global__ void hipLaunchKernelStructFunc7( - hipLaunchKernelStruct_t7 hipLaunchKernelStruct_, - bool* result_d7) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - // get the address of the struct - // size_t(p)%16 will be 0 if aligned to 16Byte address space - int *p = reinterpret_cast(&hipLaunchKernelStruct_); - result_d7[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%16 ==0) ); -} - -// Passing struct which is packed & aligned to 4Byte, -// set the result_d8 to true if condition met -__global__ void hipLaunchKernelStructFunc8( - hipLaunchKernelStruct_t8 hipLaunchKernelStruct_, - bool* result_d8) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - // get the address of the xth element, struct[x], - // size_t(p)%4 will be 0 if aligned to 4Byte address space - int *p = reinterpret_cast(&hipLaunchKernelStruct_); - result_d8[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%4 ==0) - && (sizeof(hipLaunchKernelStruct_) == 4)); -} - -// Passing struct which is packed only, as Sam suggested, should be 4Bytes -// set the result_d8A to true if condition met -__global__ void hipLaunchKernelStructFunc8A( - hipLaunchKernelStruct_t8A hipLaunchKernelStruct_, - bool* result_d8A) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - // this is packed struct - // the address will not be aglined in this case hence condition removed - // only sizeof(hipLaunchKernelStruct_) will be valided - result_d8A[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && (sizeof(hipLaunchKernelStruct_) == 4)); -} - -// Passing struct which is aligned(4) only, as Sam suggested -// , size should be 8Bytes, set the result_d8B to true if condition met -__global__ void hipLaunchKernelStructFunc8B( - hipLaunchKernelStruct_t8B hipLaunchKernelStruct_, - bool* result_d8B) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - // get the address of the xth element, struct[x], - // size_t(p)%4 will be 0 if aligned to 4Byte address space - int *p = reinterpret_cast(&hipLaunchKernelStruct_); - result_d8B[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.si == 1) - && ((size_t(p))%8 == 0) - && (sizeof(hipLaunchKernelStruct_) == 8)); -} - -// Passing struct with uint pointer object to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc9( - const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_, - bool* result_d9) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - - // set the result to true if the condition met - result_d9[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (*hipLaunchKernelStruct_.ip == 1)); -} - -// Passing struct with stdint types object, uintN_t, to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc10( - hipLaunchKernelStruct_t10 hipLaunchKernelStruct_, - bool* result_d10) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d10[x] = ((hipLaunchKernelStruct_.u64 == UINT64_MAX) - && (hipLaunchKernelStruct_.u32 == 1) - && (hipLaunchKernelStruct_.u8 == UINT8_MAX)); -} - -// Passing struct with volatile member, to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc11( - hipLaunchKernelStruct_t11 hipLaunchKernelStruct_, - bool* result_d11) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d11[x] = ((hipLaunchKernelStruct_.i1 == 1) - && (hipLaunchKernelStruct_.vint == 0)); -} - -// Passing struct with simple class obj, to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc12( - hipLaunchKernelStruct_t12 hipLaunchKernelStruct_, - bool* result_d12) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d12[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.b.i == 0)); -} - -// Passing struct with simple __device__ func(), to a hipLaunchKernelGGL() -__global__ void hipLaunchKernelStructFunc13( - hipLaunchKernelStruct_t13 hipLaunchKernelStruct_, - bool* result_d13) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d13[x] = ((hipLaunchKernelStruct_.i1 == 1) - && (hipLaunchKernelStruct_.getvalue() == 1)); -} - -// Passing struct with array variable, write to from device -__global__ void hipLaunchKernelStructFunc14( - hipLaunchKernelStruct_t14 hipLaunchKernelStruct_, - bool* result_d14) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - hipLaunchKernelStruct_.writeint[x] = 1; - // set the result to true if the condition met - result_d14[x] = ((hipLaunchKernelStruct_.readint == 1) - && (hipLaunchKernelStruct_.writeint[x] == 1)); -} - -// Passing struct with struct with dynamic memory, new int -// the heap memory will be accessed from device -__global__ void hipLaunchKernelStructFunc15( - hipLaunchKernelStruct_t15 hipLaunchKernelStruct_, - bool* result_d15) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d15[x] = ((hipLaunchKernelStruct_.c1 == 'c') - && (hipLaunchKernelStruct_.heapmem[x] == 1)); -} - -// Passing simple template struct -__global__ void hipLaunchKernelStructFunc16( - hipLaunchKernelStruct_t16 hipLaunchKernelStruct_, - bool* result_d16) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d16[x] = (hipLaunchKernelStruct_.t1 == 'c'); -} - -// Passing simple explicit template struct -__global__ void hipLaunchKernelStructFunc17( - hipLaunchKernelStruct_t17 hipLaunchKernelStruct_, - bool* result_d17) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // set the result to true if the condition met - result_d17[x] = (hipLaunchKernelStruct_.t1 == 1); -} - -// Passing struct and write to struct memory using __device__ func() -__global__ void hipLaunchKernelStructFunc18( - hipLaunchKernelStruct_t18 hipLaunchKernelStruct_, - bool* result_d18) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - hipLaunchKernelStruct_.setChar('c'); - // set the result to true if the condition met - result_d18[x] = (hipLaunchKernelStruct_.getChar() == 'c'); -} - -// Passing simple user defined stack implemenration, using __device__ func() -__global__ void hipLaunchKernelStructFunc19( - hipLaunchKernelStruct_t19 hipLaunchKernelStruct_) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // stack should be empty after the kernel execustion, verify on host side - hipLaunchKernelStruct_.popMe(); -} - -// Passing out of order initalized struct, access in-order -__global__ void hipLaunchKernelStructFunc20( - hipLaunchKernelStruct_t20 hipLaunchKernelStruct_, - bool* result_d20) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // accessing struct members in order - result_d20[x] = (hipLaunchKernelStruct_.name == 'A' - // strcmp(hipLaunchKernelStruct_.name, "AMD") -> strcmp is not broken - && hipLaunchKernelStruct_.age == 42 - && hipLaunchKernelStruct_.rank == 2); -} - -// Passing struct with bit fields -__global__ void hipLaunchKernelStructFunc21( - hipLaunchKernelStruct_t21 hipLaunchKernelStruct_, - bool* result_d21) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - // accessing struct members in order - result_d21[x] = (hipLaunchKernelStruct_.i == 2 - && hipLaunchKernelStruct_.j == 0 - && (sizeof(hipLaunchKernelStruct_) == 1)); -} - -__global__ void vAdd(float* a) {} - -template -__global__ void myKernel(T1 a, T2 b) {} - - -//--- -// Some wrapper macro for testing: -#define WRAP(...) __VA_ARGS__ - -#define MY_LAUNCH_MACRO(cmd, elapsed, quiet) \ - do { \ - HIP_CHECK(hipDeviceSynchronize()); \ - cmd; \ - HIP_CHECK(hipDeviceSynchronize()); \ - } while (0); - - -#define MY_LAUNCH(command, doTrace, msg) \ - { \ - if (doTrace) printf("TRACE: %s %s\n", msg, #command); \ - command; \ - } - - -#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg) \ - { \ - if (doTrace) printf("TRACE: %s %s\n", msg, #command); \ - (command); \ - } - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - Passing struct to a hipLaunchKernelGGL(), - * read and write into the same struct - * - Test to verify by Passing Struct type, checks padding - * - Test to verify by Passing Struct type, checks padding, assigning integer to a char - * - Test to verify by Passing empty struct - * - Test to verify by Passing struct with pointer object to a hipLaunchKernelGGL() - * - Test to verify by Passing struct with aligned(8) - * - Test to verify by Passing struct with aligned(16) - * - Test to verify by Passing struct with packed aligned to 4Bytes - * - Test to verify by Passing struct with packed to 4Bytes - * - Test to verify by Passing struct with aligned(4) to 4Bytes, size is 8Bytes - * - Test to verify by Passing const struct object to a hipLaunchKernelGGL() - * - Test to verify by Passing struct with uintN_t as member variables - * - Test to verify by Passing struct with uintN_t as member variables - * - Test to verify by Passing struct with simple class object - * - Test to verify by Passing struct with simple __device__ func() - * - Test to verify by Passing struct with array variable, write to from device - * - Test to verify by Passing simple template struct - * - Test to verify by Passing simple explicit template struct - * - Test to verify by Passing struct with simple __device__ func() to struct memory - * - Test to verify by Passing struct which is initiazed out of order - * accessing same elements in order from device - * - Test to verify by Passing struct with bit fields operation - * accessing same elements in order from device - * - Test to verify by Passing the different hipLaunchParm options - - * Test source - * ------------------------ - * - catch/unit/kernel/hipLaunchParm.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipLaunchParm") { - hipMallocError = hipMalloc(reinterpret_cast(&result_d), - BLOCK_DIM_SIZE*sizeof(bool)); - hipHostMallocError = hipHostMalloc(reinterpret_cast(&result_h), - BLOCK_DIM_SIZE*sizeof(bool)); - hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE); - - // Validating memory & initial value, for result_d, result_h - REQUIRE(hipMallocError == hipSuccess); - REQUIRE(hipHostMallocError == hipSuccess); - REQUIRE(hipMemsetError == hipSuccess); - - SECTION("check access from device") { - ResetValidationMem(); - hipLaunchKernelStruct_t1 hipLaunchKernelStruct_h1; - hipLaunchKernelStruct_h1.li = 1; - hipLaunchKernelStruct_h1.lf = 1.0; - hipLaunchKernelStruct_h1.result = false; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc1), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h1, - result_d); - ResultValidation(); - } - - SECTION("check padding") { - ResetValidationMem(); - hipLaunchKernelStruct_t2 hipLaunchKernelStruct_h2; - hipLaunchKernelStruct_h2.c1 = 'a'; - hipLaunchKernelStruct_h2.l1 = 1.0; - hipLaunchKernelStruct_h2.c2 = 'b'; - hipLaunchKernelStruct_h2.l2 = 2.0; - hipLaunchKernelStruct_h2.result = false; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc2), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h2, - result_d); - ResultValidation(); - } - - SECTION("check padding assigning int to char") { - ResetValidationMem(); - hipLaunchKernelStruct_t3 hipLaunchKernelStruct_h3; - hipLaunchKernelStruct_h3.bf1 = 1; - hipLaunchKernelStruct_h3.bf2 = 1; - hipLaunchKernelStruct_h3.l1 = 1.0; - hipLaunchKernelStruct_h3.bf3 = 1; - hipLaunchKernelStruct_h3.result = false; - // initialize to false, will be set to - // true if the struct size is 1Byte, from device size - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc3), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h3, - result_d); - ResultValidation(); - } - - SECTION("Empty struct") { - ResetValidationMem(); - hipLaunchKernelStruct_t4 hipLaunchKernelStruct_h4; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc4), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h4, - result_d); - ResultValidation(); - } - - SECTION("Passing struct with pointer object") { - ResetValidationMem(); - hipLaunchKernelStruct_t5 hipLaunchKernelStruct_h5; - char* cp_d5; // This is passed as pointer to struct member - // allocating memory for char pointer on device - HIP_CHECK(hipMalloc(reinterpret_cast(&cp_d5), sizeof(char))); - HIP_CHECK(hipMemset(cp_d5, 'p', sizeof(char))); - hipLaunchKernelStruct_h5.c1 = 'c'; - hipLaunchKernelStruct_h5.cp = cp_d5; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc5), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h5, - result_d); - ResultValidation(); - } - - SECTION("Passing struct with aligned(8)") { - ResetValidationMem(); - hipLaunchKernelStruct_t6 hipLaunchKernelStruct_h6; - hipLaunchKernelStruct_h6.c1 = 'c'; - hipLaunchKernelStruct_h6.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc6), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h6, - result_d); - // alignment is broken hence disabled the validation part - #if ENABLE_ALIGNMENT_TEST_SMALL_BAR - ResultValidation(); - #endif - } - - SECTION("Passing struct with aligned(16)") { - ResetValidationMem(); - hipLaunchKernelStruct_t7 hipLaunchKernelStruct_h7; - hipLaunchKernelStruct_h7.c1 = 'c'; - hipLaunchKernelStruct_h7.si = 1; - #if ENABLE_ALIGNMENT_TEST_SMALL_BAR // This is broken on small bar - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc7), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h7, - result_d); - ResultValidation(); - #endif - } - - SECTION("Passing struct with packed aligned to 4bytes") { - ResetValidationMem(); - hipLaunchKernelStruct_t8 hipLaunchKernelStruct_h8; - hipLaunchKernelStruct_h8.c1 = 'c'; - hipLaunchKernelStruct_h8.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h8, - result_d); - // packed member broken on large and small bar setup. - #if ENABLE_PACKED_TEST - ResultValidation(); - #endif - } - - SECTION("Passing struct with packed to 4Bytes") { - ResetValidationMem(); - hipLaunchKernelStruct_t8A hipLaunchKernelStruct_h8A; - hipLaunchKernelStruct_h8A.c1 = 'c'; - hipLaunchKernelStruct_h8A.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8A), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h8A, - result_d); - // packed member broken on large and small bar setup. - #if ENABLE_PACKED_TEST - ResultValidation(); - #endif - } - - SECTION("Passing struct with aligned(4) to 4Bytes") { - ResetValidationMem(); - hipLaunchKernelStruct_t8B hipLaunchKernelStruct_h8B; - hipLaunchKernelStruct_h8B.c1 = 'c'; - hipLaunchKernelStruct_h8B.si = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8B), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h8B, - result_d); - // alignment is broken hence disabled the validation part - #if ENABLE_ALIGNMENT_TEST_SMALL_BAR - ResultValidation(); - #endif - } - - SECTION("Passing const struct object") { - ResetValidationMem(); - uint32_t* ip_d9; - // allocating memory for char pointer on device - HIP_CHECK(hipMalloc(reinterpret_cast(&ip_d9), sizeof(uint32_t))); - HIP_CHECK(hipMemset(ip_d9, 1, sizeof(uint32_t))); - // ip_d9 passed as pointer to struct member, struct.ip = &ip_d9 - const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_h9 = {'c', ip_d9}; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc9), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h9, - result_d); - #if ENABLE_DECLARE_INITIALIZATION_POINTER - ResultValidation(); - #endif - } - - SECTION("Passing struct with uintN_t") { - ResetValidationMem(); - hipLaunchKernelStruct_t10 hipLaunchKernelStruct_h10; - hipLaunchKernelStruct_h10.u64 = UINT64_MAX; - hipLaunchKernelStruct_h10.u32 = 1; - hipLaunchKernelStruct_h10.u8 = UINT8_MAX; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc10), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h10, - result_d); - ResultValidation(); - } - - SECTION("hipLaunchKernelStructFunc11") { - ResetValidationMem(); - hipLaunchKernelStruct_t11 hipLaunchKernelStruct_h11; - hipLaunchKernelStruct_h11.i1 = 1; - hipLaunchKernelStruct_h11.vint = 0; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc11), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h11, - result_d); - ResultValidation(); - } - - SECTION("Passing struct with simple class object") { - ResetValidationMem(); - hipLaunchKernelStruct_t12 hipLaunchKernelStruct_h12; - hipLaunchKernelStruct_h12.c1 = 'c'; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc12), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h12, - result_d); - #if ENABLE_CLASS_OBJ_ACCESS // access class obj from device broken - // Validation part of the struct, hipLaunchKernelStructFunc12 - ResultValidation(); - #endif - } - - SECTION("Passing struct with simple __device__ func()") { - ResetValidationMem(); - hipLaunchKernelStruct_t13 hipLaunchKernelStruct_h13; - hipLaunchKernelStruct_h13.i1 = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc13), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h13, - result_d); - ResultValidation(); - } - - SECTION("Passing struct with array variable") { - ResetValidationMem(); - hipLaunchKernelStruct_t14 hipLaunchKernelStruct_h14; - hipLaunchKernelStruct_h14.readint = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc14), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h14, - result_d); - ResultValidation(); - } - - SECTION("Passing struct with heap memory") { - ResetValidationMem(); - hipLaunchKernelStruct_t15 hipLaunchKernelStruct_h15; - hipLaunchKernelStruct_h15.c1 = 'c'; - - #if ENABLE_HEAP_MEMORY_ACCESS // causing page fault here, - // on small bar set - HIP_CHECK(hipMalloc(&hipLaunchKernelStruct_h15.heapmem, - BLOCK_DIM_SIZE*sizeof(int))); - HIP_CHECK(hipMemset(&hipLaunchKernelStruct_h15.heapmem, - 0, BLOCK_DIM_SIZE)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc15), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h15, - result_d); - ResultValidation(); - #endif - } - - SECTION("Passing simple template struct") { - ResetValidationMem(); - hipLaunchKernelStruct_t16 hipLaunchKernelStruct_h16; - hipLaunchKernelStruct_h16.t1 = 'c'; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc16), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h16, - result_d); - ResultValidation(); - } - - SECTION("Passing simple explicit template struct") { - ResetValidationMem(); - hipLaunchKernelStruct_t17 hipLaunchKernelStruct_h17; - hipLaunchKernelStruct_h17.t1 = 1; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc17), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h17, - result_d); - ResultValidation(); - } - - SECTION("Passing struct with simple __device__ func()") { - ResetValidationMem(); - hipLaunchKernelStruct_t18 hipLaunchKernelStruct_h18; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc18), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h18, - result_d); - ResultValidation(); - } - - SECTION("Passing user defined stack") { - ResetValidationMem(); - hipLaunchKernelStruct_t19 hipLaunchKernelStruct_h19; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc19), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h19); - #if ENABLE_USER_STL - // Validation part of the struct, hipLaunchKernelStructFunc19 - HIPASSERT(hipLaunchKernelStruct_h19.stackSize() == 0); - #endif - } - - // Test: Passing struct which is initiazed out of order - // accessing same elements in order from device - SECTION("Passing struct which is initiazed out of order") { - ResetValidationMem(); - hipLaunchKernelStruct_t20 hipLaunchKernelStruct_h20; - hipLaunchKernelStruct_h20.name = 'A'; - hipLaunchKernelStruct_h20.rank = 2; - hipLaunchKernelStruct_h20.age = 42; - bool *result_d20, *result_h20; - #if ENABLE_OUT_OF_ORDER_INITIALIZATION - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc20), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h20, result_d); - ResultValidation(); - #endif - } - - SECTION("Passing struct with bit fields operation") { - ResetValidationMem(); - hipLaunchKernelStruct_t21 hipLaunchKernelStruct_h21 = - // out of order initalization - {2, 0}; - bool *result_d21, *result_h21; - hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc21), - dim3(BLOCK_DIM_SIZE), - dim3(1), 0, 0, hipLaunchKernelStruct_h21, result_d); - #if ENABLE_BIT_FIELDS - ResultValidation(); - #endif - } - - SECTION("Passing the different hipLaunchParm options") { - float* Ad; - HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), 1024)); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), size_t(1024), 1, 0, 0, Ad); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), 1024, dim3(1), 0, 0, Ad); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), 1, 0, 0, Ad); - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), dim3(1), 0, 0, Ad); - - // Test: Passing macro to hipLaunchKernelGGL -#define KERNEL_CONFIG dim3(1024), dim3(1), 0, 0 - hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), KERNEL_CONFIG, Ad); - - // Test: Same thing with templates: - int a; - float b; - hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel), - KERNEL_CONFIG, a, b); - -#define TYPE_PARAM_CONFIG int, float - hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel), - KERNEL_CONFIG, a, b); - - // Test: Passing hipLaunchKernelGGL inside another macro: - float e0; - MY_LAUNCH_MACRO(hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad), e0, j); - MY_LAUNCH_MACRO(WRAP(hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad)), e0, j); - -#ifdef EXTRA_PARENS_1 - // Don't wrap hipLaunchKernelGGL in extra set of parens: - MY_LAUNCH_MACRO((hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad)), e0, j); -#endif - - MY_LAUNCH(hipLaunchKernelGGL(vAdd, dim3(1024), dim3(1), - 0, 0, Ad), true, "firstCall"); - float* A; - float e1; - MY_LAUNCH_WITH_PAREN(hipMalloc(&A, 100), true, "launch2"); - -#ifdef EXTRA_PARENS_2 - // MY_LAUNCH_WITH_PAREN wraps cmd in () which can cause issues. - MY_LAUNCH_WITH_PAREN(hipLaunchKernelGGL(vAdd, dim3(1024), - dim3(1), 0, 0, Ad), true, "firstCall"); -#endif - } - HIP_CHECK(hipHostFree(result_h)); - HIP_CHECK(hipFree(result_d)); -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wunused-result" +#pragma clang diagnostic ignored "-Wuninitialized" + +// Memory alignment is broken +// Update: with latest changes the aligment is working fine, hence enabled +#define ENABLE_ALIGNMENT_TEST_SMALL_BAR 1 + +// Packed member atribute broken +#define ENABLE_PACKED_TEST 0 + +// Update: with latest changes struct class object +// from device is working fine, hence enabled +#define ENABLE_CLASS_OBJ_ACCESS 1 + +// accessing dynamic/heap memory from device is broken +#define ENABLE_HEAP_MEMORY_ACCESS 0 + +// Update: with latest changes it's working hence enabled +#define ENABLE_USER_STL 1 + +// Update: with latest changes it's working hence enabled +#define ENABLE_OUT_OF_ORDER_INITIALIZATION 1 + +// Direct initialization of struct broken, +// ip_d9 is a pointer, uint_t*, hipLaunchKernelStruct_h9 = {'c', ip_d9}; +#define ENABLE_DECLARE_INITIALIZATION_POINTER 0 + +// Bit fields are broken +#define ENABLE_BIT_FIELDS 0 + +static const int BLOCK_DIM_SIZE = 512; + +// allocate memory on device and host for result validation +static bool *result_d, *result_h; + +static hipError_t hipMallocError = hipErrorUnknown; +static hipError_t hipHostMallocError = hipErrorUnknown; +static hipError_t hipMemsetError = hipErrorUnknown; + +static void ResultValidation() { + HIP_CHECK(hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + REQUIRE(result_h[k] == true); + } + return; +} + +// Segregating the reset part as it was causing a problem when i put inside +// ResultValidation() function, the memory was not reset correctly for the +// tests which were disabled. +static void ResetValidationMem() { + // reset the memory to false to reuse it. + HIP_CHECK(hipMemset(result_d, false, BLOCK_DIM_SIZE)); + HIP_CHECK(hipMemset(result_h, false, BLOCK_DIM_SIZE)); + return; +} + +// This test is to verify Struct with variables +// support, read from device. +typedef struct hipLaunchKernelStruct1 { + int li; // local int + float lf; // local float + bool result; // local bool +} hipLaunchKernelStruct_t1; + +// This test is to verify struct with padding, read from device +typedef struct hipLaunchKernelStruct2 { + char c1; + int64_t l1; + char c2; + int64_t l2; + bool result; +} hipLaunchKernelStruct_t2; + +// This test is to verify struct with padding, read from device +typedef struct hipLaunchKernelStruct3 { + char bf1; + char bf2; + int64_t l1; + char bf3; + bool result; +} hipLaunchKernelStruct_t3; + +// This test is to verify empty struct +typedef struct hipLaunchKernelStruct4 { + // empty struct, size will be verified from device side,size 1Byte +} hipLaunchKernelStruct_t4; + +// This test is to verify struct with pointer member variable. +typedef struct hipLaunchKernelStruct5 { + char c1; + char* cp; // char pointer +} hipLaunchKernelStruct_t5; + + +// This test is to verify struct with aligned(8), +// right now it's broken on hcc & hip-clang +typedef struct hipLaunchKernelStruct6 { + char c1; + int16_t si; +} __attribute__((aligned(8))) hipLaunchKernelStruct_t6; + +// This test is to verify struct with aligned(16), +// right now it's brokenon hcc & hip-clang +typedef struct hipLaunchKernelStruct7 { + char c1; + int16_t si; +} __attribute__((aligned(16))) hipLaunchKernelStruct_t7; + +// This test is to verify struct with packed & aligned, +// size should be 4Bytes right now it's broken on hcc & hip-clang +typedef struct hipLaunchKernelStruct8 { + char c1; + int16_t si; + bool b; +}__attribute__((packed, aligned(4))) hipLaunchKernelStruct_t8; + +// This test is to verify struct with packed, no alignment as Sam suggested +// size should be 4Bytes, right now it's broken on hcc & hip-clang +typedef struct hipLaunchKernelStruct8A { + char c1; + int16_t si; + bool b; +}__attribute__((packed)) hipLaunchKernelStruct_t8A; + +// This test is to verify struct with alignment, no packing as Sam suggested +// size should be 8Bytes as no packing, right now it's broken on hcc & hip-clang +typedef struct hipLaunchKernelStruct8B { + char c1; + int16_t si; + bool b; +}__attribute__((aligned(8))) hipLaunchKernelStruct_t8B; + +// This test is to verify const struct object +typedef struct hipLaunchKernelStruct9 { + char c1; + uint32_t* ip; // uint pointer +} hipLaunchKernelStruct_t9; + +// This test is to verify struct with stdint types, uintN_t +typedef struct hipLaunchKernelStruct10 { + uint64_t u64; + uint32_t u32; + uint8_t u8; +} hipLaunchKernelStruct_t10; + +// This test is to verify struct with volatile member +typedef struct hipLaunchKernelStruct11 { + int i1; + volatile unsigned int vint; +} hipLaunchKernelStruct_t11; + +// This test is to verify struct with simple class object +class base { + public: + int i = 0; + base() {} +}; +typedef struct hipLaunchKernelStruct12 { + base b; + char c1; +} hipLaunchKernelStruct_t12; + +// This test is to verify struct with __device__ func() attribute +typedef struct hipLaunchKernelStruct13 { + int i1; + __device__ int getvalue() { return i1; } +} hipLaunchKernelStruct_t13; + +// This test is to verify struct with array variable, +// write to from device +typedef struct hipLaunchKernelStruct14 { + int readint; + int writeint[BLOCK_DIM_SIZE]; // will write to this from device +} hipLaunchKernelStruct_t14; + +// This test is to verify struct with dynamic memory, new int +// the heap memory will be accessed from device +typedef struct hipLaunchKernelStruct15 { + char c1; + int* heapmem; // allocated using hipMalloc() +} hipLaunchKernelStruct_t15; + +// This test is to verify simple template struct +template +struct hipLaunchKernelStruct_t16 { + T t1; +}; + +// This test is to verify simple explicity template struct +template struct hipLaunchKernelStruct_t17 {}; +template<> // explicit template +struct hipLaunchKernelStruct_t17 { + int t1; +}; + +// This test is to verity write to struct memory using __device__ func() +typedef struct hipLaunchKernelStruct18 { + char c1; + __device__ void setChar(char c) { c1 = c; } + __device__ int getChar() { return c1; } +} hipLaunchKernelStruct_t18; + +// This test is to verity user defined STL, simple stack implementation +typedef struct stackNode { + int data; + stackNode* nextNode = NULL; +} stackNode_t; +typedef struct hipLaunchKernelStruct19 { + stackNode_t* stack = NULL; + unsigned int size_ = 0; + void pushMe(int value) { // not a device function, setting from host + stackNode_t* newNode; + HIP_CHECK(hipMalloc(reinterpret_cast(&newNode), + sizeof(stackNode_t))); + HIP_CHECK(hipMemset(&newNode->data, value, sizeof(stackNode_t))); + // newNode->data = value; + ++size_; + if (stack == NULL) { + stack = newNode; + return; + } + stackNode_t* currentHead = stack; + stack = newNode; + stack->nextNode = currentHead; + return; + } + __device__ void popMe() { + stackNode_t* currentHead = stack; + stack = stack->nextNode; + --size_; + // delete currentHead; // no idea why delete not working + return; + } + int stackSize() { + return size_; + } +} hipLaunchKernelStruct_t19; + +// This test is to verify out of order initalizer of struct elements +// and access in-order, from device. +typedef struct hipLaunchKernelStruct20 { + char name; + int age; + int rank; +} hipLaunchKernelStruct_t20; + +// This test is to verify bit fields operations +// the size should be 1Bytes +typedef struct hipLaunchKernelStruct21 { + int i : 3; // limiting bits to 3 + int j : 2; // limiting bits to 2 +} hipLaunchKernelStruct_t21; + +// Passing struct to a hipLaunchKernelGGL(), +// read and write into the same struct +__global__ void hipLaunchKernelStructFunc1( + hipLaunchKernelStruct_t1 hipLaunchKernelStruct_, + bool* result_d1) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + result_d1[x] = ((hipLaunchKernelStruct_.li == 1) + && (hipLaunchKernelStruct_.lf == 1.0) + && (hipLaunchKernelStruct_.result == false)); +} + +// Passing struct to a hipLaunchKernelGGL(), checks padding, +// read and write into the same struct +__global__ void hipLaunchKernelStructFunc2( + hipLaunchKernelStruct_t2 hipLaunchKernelStruct_, + bool* result_d2) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + result_d2[x] = ((hipLaunchKernelStruct_.c1 == 'a') + && (hipLaunchKernelStruct_.l1 == 1.0) + && (hipLaunchKernelStruct_.c2 == 'b') + && (hipLaunchKernelStruct_.l2 == 2.0) ); +} + +// Passing struct to a hipLaunchKernelGGL(), checks padding, +// read and write into the same struct +__global__ void hipLaunchKernelStructFunc3( + hipLaunchKernelStruct_t3 hipLaunchKernelStruct_, + bool* result_d3) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + result_d3[x] = ((hipLaunchKernelStruct_.bf1 == 1) + && (hipLaunchKernelStruct_.bf2 == 1) + && (hipLaunchKernelStruct_.l1 == 1.0) + && (hipLaunchKernelStruct_.bf3 == 1) ); +} + +// Passing empty struct to a hipLaunchKernelGGL(), +// check the size of 1Byte, set result_d4 to true if condition met +__global__ void hipLaunchKernelStructFunc4( + hipLaunchKernelStruct_t4 hipLaunchKernelStruct_, + bool* result_d4) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + result_d4[x] = (sizeof(hipLaunchKernelStruct_) == 1); +} + +// Passing struct with pointer object to a hipLaunchKernelGGL() +__global__ void hipLaunchKernelStructFunc5( + hipLaunchKernelStruct_t5 hipLaunchKernelStruct_, + bool* result_d5) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + result_d5[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (*hipLaunchKernelStruct_.cp == 'p')); +} + +// Passing struct which is aligned to 8Byte to a hipLaunchKernelGGL(), +// set the result_d6 to true if condition met +__global__ void hipLaunchKernelStructFunc6( + hipLaunchKernelStruct_t6 hipLaunchKernelStruct_, + bool* result_d6) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + // get the address of the struct + // size_t(p)%8 will be 0 if aligned to 8Byte address space + int *p = reinterpret_cast(&hipLaunchKernelStruct_); + result_d6[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.si == 1) + && ((size_t(p))%8 ==0)); +} + +// Passing struct which is aligned to 16Byte, +// set the result_d7 to true if condition met +__global__ void hipLaunchKernelStructFunc7( + hipLaunchKernelStruct_t7 hipLaunchKernelStruct_, + bool* result_d7) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + // get the address of the struct + // size_t(p)%16 will be 0 if aligned to 16Byte address space + int *p = reinterpret_cast(&hipLaunchKernelStruct_); + result_d7[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.si == 1) + && ((size_t(p))%16 ==0) ); +} + +// Passing struct which is packed & aligned to 4Byte, +// set the result_d8 to true if condition met +__global__ void hipLaunchKernelStructFunc8( + hipLaunchKernelStruct_t8 hipLaunchKernelStruct_, + bool* result_d8) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + // get the address of the xth element, struct[x], + // size_t(p)%4 will be 0 if aligned to 4Byte address space + int *p = reinterpret_cast(&hipLaunchKernelStruct_); + result_d8[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.si == 1) + && ((size_t(p))%4 ==0) + && (sizeof(hipLaunchKernelStruct_) == 4)); +} + +// Passing struct which is packed only, as Sam suggested, should be 4Bytes +// set the result_d8A to true if condition met +__global__ void hipLaunchKernelStructFunc8A( + hipLaunchKernelStruct_t8A hipLaunchKernelStruct_, + bool* result_d8A) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + // this is packed struct + // the address will not be aglined in this case hence condition removed + // only sizeof(hipLaunchKernelStruct_) will be valided + result_d8A[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.si == 1) + && (sizeof(hipLaunchKernelStruct_) == 4)); +} + +// Passing struct which is aligned(4) only, as Sam suggested +// , size should be 8Bytes, set the result_d8B to true if condition met +__global__ void hipLaunchKernelStructFunc8B( + hipLaunchKernelStruct_t8B hipLaunchKernelStruct_, + bool* result_d8B) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + // get the address of the xth element, struct[x], + // size_t(p)%4 will be 0 if aligned to 4Byte address space + int *p = reinterpret_cast(&hipLaunchKernelStruct_); + result_d8B[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.si == 1) + && ((size_t(p))%8 == 0) + && (sizeof(hipLaunchKernelStruct_) == 8)); +} + +// Passing struct with uint pointer object to a hipLaunchKernelGGL() +__global__ void hipLaunchKernelStructFunc9( + const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_, + bool* result_d9) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + + // set the result to true if the condition met + result_d9[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (*hipLaunchKernelStruct_.ip == 1)); +} + +// Passing struct with stdint types object, uintN_t, to a hipLaunchKernelGGL() +__global__ void hipLaunchKernelStructFunc10( + hipLaunchKernelStruct_t10 hipLaunchKernelStruct_, + bool* result_d10) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d10[x] = ((hipLaunchKernelStruct_.u64 == UINT64_MAX) + && (hipLaunchKernelStruct_.u32 == 1) + && (hipLaunchKernelStruct_.u8 == UINT8_MAX)); +} + +// Passing struct with volatile member, to a hipLaunchKernelGGL() +__global__ void hipLaunchKernelStructFunc11( + hipLaunchKernelStruct_t11 hipLaunchKernelStruct_, + bool* result_d11) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d11[x] = ((hipLaunchKernelStruct_.i1 == 1) + && (hipLaunchKernelStruct_.vint == 0)); +} + +// Passing struct with simple class obj, to a hipLaunchKernelGGL() +__global__ void hipLaunchKernelStructFunc12( + hipLaunchKernelStruct_t12 hipLaunchKernelStruct_, + bool* result_d12) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d12[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.b.i == 0)); +} + +// Passing struct with simple __device__ func(), to a hipLaunchKernelGGL() +__global__ void hipLaunchKernelStructFunc13( + hipLaunchKernelStruct_t13 hipLaunchKernelStruct_, + bool* result_d13) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d13[x] = ((hipLaunchKernelStruct_.i1 == 1) + && (hipLaunchKernelStruct_.getvalue() == 1)); +} + +// Passing struct with array variable, write to from device +__global__ void hipLaunchKernelStructFunc14( + hipLaunchKernelStruct_t14 hipLaunchKernelStruct_, + bool* result_d14) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + hipLaunchKernelStruct_.writeint[x] = 1; + // set the result to true if the condition met + result_d14[x] = ((hipLaunchKernelStruct_.readint == 1) + && (hipLaunchKernelStruct_.writeint[x] == 1)); +} + +// Passing struct with struct with dynamic memory, new int +// the heap memory will be accessed from device +__global__ void hipLaunchKernelStructFunc15( + hipLaunchKernelStruct_t15 hipLaunchKernelStruct_, + bool* result_d15) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d15[x] = ((hipLaunchKernelStruct_.c1 == 'c') + && (hipLaunchKernelStruct_.heapmem[x] == 1)); +} + +// Passing simple template struct +__global__ void hipLaunchKernelStructFunc16( + hipLaunchKernelStruct_t16 hipLaunchKernelStruct_, + bool* result_d16) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d16[x] = (hipLaunchKernelStruct_.t1 == 'c'); +} + +// Passing simple explicit template struct +__global__ void hipLaunchKernelStructFunc17( + hipLaunchKernelStruct_t17 hipLaunchKernelStruct_, + bool* result_d17) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // set the result to true if the condition met + result_d17[x] = (hipLaunchKernelStruct_.t1 == 1); +} + +// Passing struct and write to struct memory using __device__ func() +__global__ void hipLaunchKernelStructFunc18( + hipLaunchKernelStruct_t18 hipLaunchKernelStruct_, + bool* result_d18) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + hipLaunchKernelStruct_.setChar('c'); + // set the result to true if the condition met + result_d18[x] = (hipLaunchKernelStruct_.getChar() == 'c'); +} + +// Passing simple user defined stack implemenration, using __device__ func() +__global__ void hipLaunchKernelStructFunc19( + hipLaunchKernelStruct_t19 hipLaunchKernelStruct_) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // stack should be empty after the kernel execustion, verify on host side + hipLaunchKernelStruct_.popMe(); +} + +// Passing out of order initalized struct, access in-order +__global__ void hipLaunchKernelStructFunc20( + hipLaunchKernelStruct_t20 hipLaunchKernelStruct_, + bool* result_d20) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // accessing struct members in order + result_d20[x] = (hipLaunchKernelStruct_.name == 'A' + // strcmp(hipLaunchKernelStruct_.name, "AMD") -> strcmp is not broken + && hipLaunchKernelStruct_.age == 42 + && hipLaunchKernelStruct_.rank == 2); +} + +// Passing struct with bit fields +__global__ void hipLaunchKernelStructFunc21( + hipLaunchKernelStruct_t21 hipLaunchKernelStruct_, + bool* result_d21) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + // accessing struct members in order + result_d21[x] = (hipLaunchKernelStruct_.i == 2 + && hipLaunchKernelStruct_.j == 0 + && (sizeof(hipLaunchKernelStruct_) == 1)); +} + +__global__ void vAdd(float* a) {} + +template +__global__ void myKernel(T1 a, T2 b) {} + + +//--- +// Some wrapper macro for testing: +#define WRAP(...) __VA_ARGS__ + +#define MY_LAUNCH_MACRO(cmd, elapsed, quiet) \ + do { \ + HIP_CHECK(hipDeviceSynchronize()); \ + cmd; \ + HIP_CHECK(hipDeviceSynchronize()); \ + } while (0); + + +#define MY_LAUNCH(command, doTrace, msg) \ + { \ + if (doTrace) printf("TRACE: %s %s\n", msg, #command); \ + command; \ + } + + +#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg) \ + { \ + if (doTrace) printf("TRACE: %s %s\n", msg, #command); \ + (command); \ + } + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - Passing struct to a hipLaunchKernelGGL(), + * read and write into the same struct + * - Test to verify by Passing Struct type, checks padding + * - Test to verify by Passing Struct type, checks padding, assigning integer to a char + * - Test to verify by Passing empty struct + * - Test to verify by Passing struct with pointer object to a hipLaunchKernelGGL() + * - Test to verify by Passing struct with aligned(8) + * - Test to verify by Passing struct with aligned(16) + * - Test to verify by Passing struct with packed aligned to 4Bytes + * - Test to verify by Passing struct with packed to 4Bytes + * - Test to verify by Passing struct with aligned(4) to 4Bytes, size is 8Bytes + * - Test to verify by Passing const struct object to a hipLaunchKernelGGL() + * - Test to verify by Passing struct with uintN_t as member variables + * - Test to verify by Passing struct with uintN_t as member variables + * - Test to verify by Passing struct with simple class object + * - Test to verify by Passing struct with simple __device__ func() + * - Test to verify by Passing struct with array variable, write to from device + * - Test to verify by Passing simple template struct + * - Test to verify by Passing simple explicit template struct + * - Test to verify by Passing struct with simple __device__ func() to struct memory + * - Test to verify by Passing struct which is initiazed out of order + * accessing same elements in order from device + * - Test to verify by Passing struct with bit fields operation + * accessing same elements in order from device + * - Test to verify by Passing the different hipLaunchParm options + + * Test source + * ------------------------ + * - catch/unit/kernel/hipLaunchParm.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipLaunchParm") { + hipMallocError = hipMalloc(reinterpret_cast(&result_d), + BLOCK_DIM_SIZE*sizeof(bool)); + hipHostMallocError = hipHostMalloc(reinterpret_cast(&result_h), + BLOCK_DIM_SIZE*sizeof(bool)); + hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE); + + // Validating memory & initial value, for result_d, result_h + REQUIRE(hipMallocError == hipSuccess); + REQUIRE(hipHostMallocError == hipSuccess); + REQUIRE(hipMemsetError == hipSuccess); + + SECTION("check access from device") { + ResetValidationMem(); + hipLaunchKernelStruct_t1 hipLaunchKernelStruct_h1; + hipLaunchKernelStruct_h1.li = 1; + hipLaunchKernelStruct_h1.lf = 1.0; + hipLaunchKernelStruct_h1.result = false; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc1), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h1, + result_d); + ResultValidation(); + } + + SECTION("check padding") { + ResetValidationMem(); + hipLaunchKernelStruct_t2 hipLaunchKernelStruct_h2; + hipLaunchKernelStruct_h2.c1 = 'a'; + hipLaunchKernelStruct_h2.l1 = 1.0; + hipLaunchKernelStruct_h2.c2 = 'b'; + hipLaunchKernelStruct_h2.l2 = 2.0; + hipLaunchKernelStruct_h2.result = false; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc2), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h2, + result_d); + ResultValidation(); + } + + SECTION("check padding assigning int to char") { + ResetValidationMem(); + hipLaunchKernelStruct_t3 hipLaunchKernelStruct_h3; + hipLaunchKernelStruct_h3.bf1 = 1; + hipLaunchKernelStruct_h3.bf2 = 1; + hipLaunchKernelStruct_h3.l1 = 1.0; + hipLaunchKernelStruct_h3.bf3 = 1; + hipLaunchKernelStruct_h3.result = false; + // initialize to false, will be set to + // true if the struct size is 1Byte, from device size + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc3), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h3, + result_d); + ResultValidation(); + } + + SECTION("Empty struct") { + ResetValidationMem(); + hipLaunchKernelStruct_t4 hipLaunchKernelStruct_h4; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc4), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h4, + result_d); + ResultValidation(); + } + + SECTION("Passing struct with pointer object") { + ResetValidationMem(); + hipLaunchKernelStruct_t5 hipLaunchKernelStruct_h5; + char* cp_d5; // This is passed as pointer to struct member + // allocating memory for char pointer on device + HIP_CHECK(hipMalloc(reinterpret_cast(&cp_d5), sizeof(char))); + HIP_CHECK(hipMemset(cp_d5, 'p', sizeof(char))); + hipLaunchKernelStruct_h5.c1 = 'c'; + hipLaunchKernelStruct_h5.cp = cp_d5; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc5), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h5, + result_d); + ResultValidation(); + } + + SECTION("Passing struct with aligned(8)") { + ResetValidationMem(); + hipLaunchKernelStruct_t6 hipLaunchKernelStruct_h6; + hipLaunchKernelStruct_h6.c1 = 'c'; + hipLaunchKernelStruct_h6.si = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc6), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h6, + result_d); + // alignment is broken hence disabled the validation part + #if ENABLE_ALIGNMENT_TEST_SMALL_BAR + ResultValidation(); + #endif + } + + SECTION("Passing struct with aligned(16)") { + ResetValidationMem(); + hipLaunchKernelStruct_t7 hipLaunchKernelStruct_h7; + hipLaunchKernelStruct_h7.c1 = 'c'; + hipLaunchKernelStruct_h7.si = 1; + #if ENABLE_ALIGNMENT_TEST_SMALL_BAR // This is broken on small bar + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc7), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h7, + result_d); + ResultValidation(); + #endif + } + + SECTION("Passing struct with packed aligned to 4bytes") { + ResetValidationMem(); + hipLaunchKernelStruct_t8 hipLaunchKernelStruct_h8; + hipLaunchKernelStruct_h8.c1 = 'c'; + hipLaunchKernelStruct_h8.si = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h8, + result_d); + // packed member broken on large and small bar setup. + #if ENABLE_PACKED_TEST + ResultValidation(); + #endif + } + + SECTION("Passing struct with packed to 4Bytes") { + ResetValidationMem(); + hipLaunchKernelStruct_t8A hipLaunchKernelStruct_h8A; + hipLaunchKernelStruct_h8A.c1 = 'c'; + hipLaunchKernelStruct_h8A.si = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8A), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h8A, + result_d); + // packed member broken on large and small bar setup. + #if ENABLE_PACKED_TEST + ResultValidation(); + #endif + } + + SECTION("Passing struct with aligned(4) to 4Bytes") { + ResetValidationMem(); + hipLaunchKernelStruct_t8B hipLaunchKernelStruct_h8B; + hipLaunchKernelStruct_h8B.c1 = 'c'; + hipLaunchKernelStruct_h8B.si = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8B), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h8B, + result_d); + // alignment is broken hence disabled the validation part + #if ENABLE_ALIGNMENT_TEST_SMALL_BAR + ResultValidation(); + #endif + } + + SECTION("Passing const struct object") { + ResetValidationMem(); + uint32_t* ip_d9; + // allocating memory for char pointer on device + HIP_CHECK(hipMalloc(reinterpret_cast(&ip_d9), sizeof(uint32_t))); + HIP_CHECK(hipMemset(ip_d9, 1, sizeof(uint32_t))); + // ip_d9 passed as pointer to struct member, struct.ip = &ip_d9 + const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_h9 = {'c', ip_d9}; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc9), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h9, + result_d); + #if ENABLE_DECLARE_INITIALIZATION_POINTER + ResultValidation(); + #endif + } + + SECTION("Passing struct with uintN_t") { + ResetValidationMem(); + hipLaunchKernelStruct_t10 hipLaunchKernelStruct_h10; + hipLaunchKernelStruct_h10.u64 = UINT64_MAX; + hipLaunchKernelStruct_h10.u32 = 1; + hipLaunchKernelStruct_h10.u8 = UINT8_MAX; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc10), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h10, + result_d); + ResultValidation(); + } + + SECTION("hipLaunchKernelStructFunc11") { + ResetValidationMem(); + hipLaunchKernelStruct_t11 hipLaunchKernelStruct_h11; + hipLaunchKernelStruct_h11.i1 = 1; + hipLaunchKernelStruct_h11.vint = 0; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc11), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h11, + result_d); + ResultValidation(); + } + + SECTION("Passing struct with simple class object") { + ResetValidationMem(); + hipLaunchKernelStruct_t12 hipLaunchKernelStruct_h12; + hipLaunchKernelStruct_h12.c1 = 'c'; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc12), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h12, + result_d); + #if ENABLE_CLASS_OBJ_ACCESS // access class obj from device broken + // Validation part of the struct, hipLaunchKernelStructFunc12 + ResultValidation(); + #endif + } + + SECTION("Passing struct with simple __device__ func()") { + ResetValidationMem(); + hipLaunchKernelStruct_t13 hipLaunchKernelStruct_h13; + hipLaunchKernelStruct_h13.i1 = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc13), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h13, + result_d); + ResultValidation(); + } + + SECTION("Passing struct with array variable") { + ResetValidationMem(); + hipLaunchKernelStruct_t14 hipLaunchKernelStruct_h14; + hipLaunchKernelStruct_h14.readint = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc14), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h14, + result_d); + ResultValidation(); + } + + SECTION("Passing struct with heap memory") { + ResetValidationMem(); + hipLaunchKernelStruct_t15 hipLaunchKernelStruct_h15; + hipLaunchKernelStruct_h15.c1 = 'c'; + + #if ENABLE_HEAP_MEMORY_ACCESS // causing page fault here, + // on small bar set + HIP_CHECK(hipMalloc(&hipLaunchKernelStruct_h15.heapmem, + BLOCK_DIM_SIZE*sizeof(int))); + HIP_CHECK(hipMemset(&hipLaunchKernelStruct_h15.heapmem, + 0, BLOCK_DIM_SIZE)); + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc15), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h15, + result_d); + ResultValidation(); + #endif + } + + SECTION("Passing simple template struct") { + ResetValidationMem(); + hipLaunchKernelStruct_t16 hipLaunchKernelStruct_h16; + hipLaunchKernelStruct_h16.t1 = 'c'; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc16), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h16, + result_d); + ResultValidation(); + } + + SECTION("Passing simple explicit template struct") { + ResetValidationMem(); + hipLaunchKernelStruct_t17 hipLaunchKernelStruct_h17; + hipLaunchKernelStruct_h17.t1 = 1; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc17), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h17, + result_d); + ResultValidation(); + } + + SECTION("Passing struct with simple __device__ func()") { + ResetValidationMem(); + hipLaunchKernelStruct_t18 hipLaunchKernelStruct_h18; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc18), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h18, + result_d); + ResultValidation(); + } + + SECTION("Passing user defined stack") { + ResetValidationMem(); + hipLaunchKernelStruct_t19 hipLaunchKernelStruct_h19; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc19), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h19); + #if ENABLE_USER_STL + // Validation part of the struct, hipLaunchKernelStructFunc19 + HIPASSERT(hipLaunchKernelStruct_h19.stackSize() == 0); + #endif + } + + // Test: Passing struct which is initiazed out of order + // accessing same elements in order from device + SECTION("Passing struct which is initiazed out of order") { + ResetValidationMem(); + hipLaunchKernelStruct_t20 hipLaunchKernelStruct_h20; + hipLaunchKernelStruct_h20.name = 'A'; + hipLaunchKernelStruct_h20.rank = 2; + hipLaunchKernelStruct_h20.age = 42; + bool *result_d20, *result_h20; + #if ENABLE_OUT_OF_ORDER_INITIALIZATION + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc20), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h20, result_d); + ResultValidation(); + #endif + } + + SECTION("Passing struct with bit fields operation") { + ResetValidationMem(); + hipLaunchKernelStruct_t21 hipLaunchKernelStruct_h21 = + // out of order initalization + {2, 0}; + bool *result_d21, *result_h21; + hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc21), + dim3(BLOCK_DIM_SIZE), + dim3(1), 0, 0, hipLaunchKernelStruct_h21, result_d); + #if ENABLE_BIT_FIELDS + ResultValidation(); + #endif + } + + SECTION("Passing the different hipLaunchParm options") { + float* Ad; + HIP_CHECK(hipMalloc(reinterpret_cast(&Ad), 1024)); + hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), size_t(1024), 1, 0, 0, Ad); + hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), 1024, dim3(1), 0, 0, Ad); + hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), 1, 0, 0, Ad); + hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), dim3(1), 0, 0, Ad); + + // Test: Passing macro to hipLaunchKernelGGL +#define KERNEL_CONFIG dim3(1024), dim3(1), 0, 0 + hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), KERNEL_CONFIG, Ad); + + // Test: Same thing with templates: + int a; + float b; + hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel), + KERNEL_CONFIG, a, b); + +#define TYPE_PARAM_CONFIG int, float + hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel), + KERNEL_CONFIG, a, b); + + // Test: Passing hipLaunchKernelGGL inside another macro: + float e0; + MY_LAUNCH_MACRO(hipLaunchKernelGGL(vAdd, dim3(1024), + dim3(1), 0, 0, Ad), e0, j); + MY_LAUNCH_MACRO(WRAP(hipLaunchKernelGGL(vAdd, dim3(1024), + dim3(1), 0, 0, Ad)), e0, j); + +#ifdef EXTRA_PARENS_1 + // Don't wrap hipLaunchKernelGGL in extra set of parens: + MY_LAUNCH_MACRO((hipLaunchKernelGGL(vAdd, dim3(1024), + dim3(1), 0, 0, Ad)), e0, j); +#endif + + MY_LAUNCH(hipLaunchKernelGGL(vAdd, dim3(1024), dim3(1), + 0, 0, Ad), true, "firstCall"); + float* A; + float e1; + MY_LAUNCH_WITH_PAREN(hipMalloc(&A, 100), true, "launch2"); + +#ifdef EXTRA_PARENS_2 + // MY_LAUNCH_WITH_PAREN wraps cmd in () which can cause issues. + MY_LAUNCH_WITH_PAREN(hipLaunchKernelGGL(vAdd, dim3(1024), + dim3(1), 0, 0, Ad), true, "firstCall"); +#endif + } + HIP_CHECK(hipHostFree(result_h)); + HIP_CHECK(hipFree(result_d)); +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc b/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc index a453dbbde6..679bce5355 100644 --- a/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc +++ b/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc @@ -1,464 +1,464 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include - - -class HipFunctorTests { - public: - // Test that a class functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForSimpleClassFunctor(void); - // Test that a templated class functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForClassTemplateFunctor(void); - // Test that a class functor object ptr can be passed to hiplaunchparam - // and can be used in kernel - void TestForClassObjPtrFunctor(void); - // Test that a class object containing functor can be passed - // to hiplaunchparam and can be used in kernel - void TestForFunctorContainInClassObj(void); - // Test that a stuct functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForSimpleStructFunctor(void); - // Test that a stuct functor object ptr can be passed to hiplaunchparam - // and can be used in kernel - void TestForStructObjPtrFunctor(void); - // Test that a templated struct functor can be passed to hiplaunchparam - // and can be used in kernel - void TestForStructTemplateFunctor(void); - // Test that a struct object containing functor can be - // passed to hiplaunchparam and can be used in kernel - void TestForFunctorContainInStructObj(void); -}; - -static const int BLOCK_DIM_SIZE = 1024; -static const int THREADS_PER_BLOCK = 1; - -// class functor tests - -// Simple doubler Functor -class DoublerFunctor{ - public: - __device__ int operator()(int x) { return x * 2;} -}; - -// simple doubler functor passed to kernel -__global__ void DoublerFunctorKernel( - DoublerFunctor doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForSimpleClassFunctor(void) { - DoublerFunctor doubler; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults); - - // Validation part of TestForSimpleClassFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); -} - -// pointer functor passed to kernel -__global__ void PtrDoublerFunctorKernel( - DoublerFunctor *doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = (*doubler_)(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForClassObjPtrFunctor(void) { - DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)]; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults); - - // Validation part of TestForClassObjPtrFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); - delete[] ptrdoubler; -} - -class compare { - public: - template - __device__ bool operator()(const T1& v1, const T2& v2) { - return v1 > v2; - } -}; - -// template functor passed to kernel -__global__ void TemplateFunctorKernel( - compare compare_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - deviceResult[x] = compare_(2.2, 2.1); - deviceResult[x] = compare_(2, 1); - deviceResult[x] = compare_('b', 'a'); -} - -void HipFunctorTests::TestForClassTemplateFunctor(void) { - compare comparefunctor; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults); - - // Validation part of TestForClassTemplateFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); -} - - -// Doubler calculator -class DoublerCalculator { - public: - int a, result; - // fucntor contained in class object - DoublerFunctor doubler; -}; - -// doubler functor conatined in class obj passed to kernel -__global__ void DoublerCalculatorFunctorKernel( - DoublerCalculator doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_.doubler(doubler_.a); - deviceResult[x] = (doubler_.result == result); -} - -void HipFunctorTests::TestForFunctorContainInClassObj(void) { - DoublerCalculator Doubler; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - Doubler.a = 5; - Doubler.result = 10; - // pass comparefunctor to hipLaunchParm - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults); - - // Validation part of TestForStructTemplateFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); -} - -// Struct functor tests - -// Simple doubler Functor -struct sDoublerFunctor { - public: - __device__ int operator()(int x) { return x * 2;} -}; - - -// simple sturct doubler functor passed to kernel -__global__ void structDoublerFunctorKernel( - sDoublerFunctor doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForSimpleStructFunctor(void) { - sDoublerFunctor doubler; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults); - - // Validation part of TestForSimpleStructFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); -} - -// ptr functor passed to kernel -__global__ void structPtrDoublerFunctorKernel( - sDoublerFunctor *doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = (*doubler_)(5); - deviceResult[x] = (result == 10); -} - -void HipFunctorTests::TestForStructObjPtrFunctor(void) { - sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)]; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults); - - // Validation part of TestForStructObjPtrFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); - delete[] ptrdoubler; -} - -struct sCompare { - public: - template< typename T1, typename T2 > - __device__ bool operator()(const T1& v1, const T2& v2) { - return v1 > v2; - } -}; - -// template functor passed to kernel -__global__ void structTemplateFunctorKernel( - sCompare compare_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - deviceResult[x] = compare_(2.2, 2.1); - deviceResult[x] = compare_(2, 1); - deviceResult[x] = compare_('b', 'a'); -} - -void HipFunctorTests::TestForStructTemplateFunctor(void) { - sCompare comparefunctor; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - - // pass comparefunctor to hipLaunchKernelGGL - hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults); - - // Validation part of TestForStructTemplateFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); -} - -// Doubler calculator struct -struct sDoublerCalculator { - public: - int a, result; - // fucntor contained in class object - DoublerFunctor doubler; -}; - - - -// doubler functor contained in struct passed to kernel -__global__ void DoublerCalculatorFunctorKernel( - sDoublerCalculator doubler_, - bool* deviceResult) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int result = doubler_.doubler(doubler_.a); - deviceResult[x] = (doubler_.result == result); -} - -void HipFunctorTests::TestForFunctorContainInStructObj(void) { - sDoublerCalculator Doubler; - bool *deviceResults, *hostResults; - HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); - HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { - // initialize to false, will be set to - // true if the functor is called in device code - hostResults[k] = false; - } - - Doubler.a = 5; - Doubler.result = 10; - HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyHostToDevice)); - - - // pass comparefunctor to hipLaunchKernelGGL - hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE), - dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults); - - // Validation part of TestForStructTemplateFunctor - HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), - hipMemcpyDeviceToHost)); - for (int k = 0; k < BLOCK_DIM_SIZE; ++k) - REQUIRE(hostResults[k] == true); - HIP_CHECK(hipHostFree(hostResults)); - HIP_CHECK(hipFree(deviceResults)); -} - -/** -* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL -* @{ -* @ingroup KernelTest -* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, - std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - -* Method to invocate kernel functions -*/ - -/** - * Test Description - * ------------------------ - * - Test that a class functor can be passed to hiplaunchparam - * and can be used in kernel. - * - Test that a templated class functor can be passed to hiplaunchparam - * and can be used in kernel. - * - Test that a class functor object ptr can be passed to hiplaunchparam - * and can be used in kernel. - * - Test that a class object containing functor can be passed to hiplaunchparam - * and can be used in kernel - * - Test that a stuct functor can be passed to hiplaunchparam - * and can be used in kernel - * - Test that a stuct functor object ptr can be passed to hiplaunchparam - * and can be used in kernel - * - Test that a templated struct functor can be passed to hiplaunchparam - * and can be used in kernel - * - Test that a struct object containing functor can be passed to hiplaunchparam - * and can be used in kernel - - * Test source - * ------------------------ - * - catch/unit/kernel/hipLaunchParmFunctor.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipLaunchParmFunctor") { - HipFunctorTests FunctorTests; - - SECTION("test for simple class functor") { - FunctorTests.TestForSimpleClassFunctor(); - } - SECTION("test for class objptr functor") { - FunctorTests.TestForClassObjPtrFunctor(); - } - SECTION("test for class templete functor") { - FunctorTests.TestForClassTemplateFunctor(); - } - SECTION("test for simple struct functor") { - FunctorTests.TestForSimpleStructFunctor(); - } - SECTION("test for struct objptr functor") { - FunctorTests.TestForStructObjPtrFunctor(); - } - SECTION("test for struct templete functor") { - FunctorTests.TestForStructTemplateFunctor(); - } - SECTION("test for functor contain in classobj") { - FunctorTests.TestForFunctorContainInClassObj(); - } - SECTION("test for functor contain in structobj") { - FunctorTests.TestForFunctorContainInStructObj(); - } -} - -/** -* End doxygen group KernelTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + + +class HipFunctorTests { + public: + // Test that a class functor can be passed to hiplaunchparam + // and can be used in kernel + void TestForSimpleClassFunctor(void); + // Test that a templated class functor can be passed to hiplaunchparam + // and can be used in kernel + void TestForClassTemplateFunctor(void); + // Test that a class functor object ptr can be passed to hiplaunchparam + // and can be used in kernel + void TestForClassObjPtrFunctor(void); + // Test that a class object containing functor can be passed + // to hiplaunchparam and can be used in kernel + void TestForFunctorContainInClassObj(void); + // Test that a stuct functor can be passed to hiplaunchparam + // and can be used in kernel + void TestForSimpleStructFunctor(void); + // Test that a stuct functor object ptr can be passed to hiplaunchparam + // and can be used in kernel + void TestForStructObjPtrFunctor(void); + // Test that a templated struct functor can be passed to hiplaunchparam + // and can be used in kernel + void TestForStructTemplateFunctor(void); + // Test that a struct object containing functor can be + // passed to hiplaunchparam and can be used in kernel + void TestForFunctorContainInStructObj(void); +}; + +static const int BLOCK_DIM_SIZE = 1024; +static const int THREADS_PER_BLOCK = 1; + +// class functor tests + +// Simple doubler Functor +class DoublerFunctor{ + public: + __device__ int operator()(int x) { return x * 2;} +}; + +// simple doubler functor passed to kernel +__global__ void DoublerFunctorKernel( + DoublerFunctor doubler_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int result = doubler_(5); + deviceResult[x] = (result == 10); +} + +void HipFunctorTests::TestForSimpleClassFunctor(void) { + DoublerFunctor doubler; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults); + + // Validation part of TestForSimpleClassFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); +} + +// pointer functor passed to kernel +__global__ void PtrDoublerFunctorKernel( + DoublerFunctor *doubler_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int result = (*doubler_)(5); + deviceResult[x] = (result == 10); +} + +void HipFunctorTests::TestForClassObjPtrFunctor(void) { + DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)]; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults); + + // Validation part of TestForClassObjPtrFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); + delete[] ptrdoubler; +} + +class compare { + public: + template + __device__ bool operator()(const T1& v1, const T2& v2) { + return v1 > v2; + } +}; + +// template functor passed to kernel +__global__ void TemplateFunctorKernel( + compare compare_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + deviceResult[x] = compare_(2.2, 2.1); + deviceResult[x] = compare_(2, 1); + deviceResult[x] = compare_('b', 'a'); +} + +void HipFunctorTests::TestForClassTemplateFunctor(void) { + compare comparefunctor; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults); + + // Validation part of TestForClassTemplateFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); +} + + +// Doubler calculator +class DoublerCalculator { + public: + int a, result; + // fucntor contained in class object + DoublerFunctor doubler; +}; + +// doubler functor conatined in class obj passed to kernel +__global__ void DoublerCalculatorFunctorKernel( + DoublerCalculator doubler_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int result = doubler_.doubler(doubler_.a); + deviceResult[x] = (doubler_.result == result); +} + +void HipFunctorTests::TestForFunctorContainInClassObj(void) { + DoublerCalculator Doubler; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + Doubler.a = 5; + Doubler.result = 10; + // pass comparefunctor to hipLaunchParm + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults); + + // Validation part of TestForStructTemplateFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); +} + +// Struct functor tests + +// Simple doubler Functor +struct sDoublerFunctor { + public: + __device__ int operator()(int x) { return x * 2;} +}; + + +// simple sturct doubler functor passed to kernel +__global__ void structDoublerFunctorKernel( + sDoublerFunctor doubler_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int result = doubler_(5); + deviceResult[x] = (result == 10); +} + +void HipFunctorTests::TestForSimpleStructFunctor(void) { + sDoublerFunctor doubler; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults); + + // Validation part of TestForSimpleStructFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); +} + +// ptr functor passed to kernel +__global__ void structPtrDoublerFunctorKernel( + sDoublerFunctor *doubler_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int result = (*doubler_)(5); + deviceResult[x] = (result == 10); +} + +void HipFunctorTests::TestForStructObjPtrFunctor(void) { + sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)]; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults); + + // Validation part of TestForStructObjPtrFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); + delete[] ptrdoubler; +} + +struct sCompare { + public: + template< typename T1, typename T2 > + __device__ bool operator()(const T1& v1, const T2& v2) { + return v1 > v2; + } +}; + +// template functor passed to kernel +__global__ void structTemplateFunctorKernel( + sCompare compare_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + deviceResult[x] = compare_(2.2, 2.1); + deviceResult[x] = compare_(2, 1); + deviceResult[x] = compare_('b', 'a'); +} + +void HipFunctorTests::TestForStructTemplateFunctor(void) { + sCompare comparefunctor; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + + // pass comparefunctor to hipLaunchKernelGGL + hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults); + + // Validation part of TestForStructTemplateFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); +} + +// Doubler calculator struct +struct sDoublerCalculator { + public: + int a, result; + // fucntor contained in class object + DoublerFunctor doubler; +}; + + + +// doubler functor contained in struct passed to kernel +__global__ void DoublerCalculatorFunctorKernel( + sDoublerCalculator doubler_, + bool* deviceResult) { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int result = doubler_.doubler(doubler_.a); + deviceResult[x] = (doubler_.result == result); +} + +void HipFunctorTests::TestForFunctorContainInStructObj(void) { + sDoublerCalculator Doubler; + bool *deviceResults, *hostResults; + HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool))); + HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool))); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) { + // initialize to false, will be set to + // true if the functor is called in device code + hostResults[k] = false; + } + + Doubler.a = 5; + Doubler.result = 10; + HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyHostToDevice)); + + + // pass comparefunctor to hipLaunchKernelGGL + hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE), + dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults); + + // Validation part of TestForStructTemplateFunctor + HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool), + hipMemcpyDeviceToHost)); + for (int k = 0; k < BLOCK_DIM_SIZE; ++k) + REQUIRE(hostResults[k] == true); + HIP_CHECK(hipHostFree(hostResults)); + HIP_CHECK(hipFree(deviceResults)); +} + +/** +* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL +* @{ +* @ingroup KernelTest +* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, + std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` - +* Method to invocate kernel functions +*/ + +/** + * Test Description + * ------------------------ + * - Test that a class functor can be passed to hiplaunchparam + * and can be used in kernel. + * - Test that a templated class functor can be passed to hiplaunchparam + * and can be used in kernel. + * - Test that a class functor object ptr can be passed to hiplaunchparam + * and can be used in kernel. + * - Test that a class object containing functor can be passed to hiplaunchparam + * and can be used in kernel + * - Test that a stuct functor can be passed to hiplaunchparam + * and can be used in kernel + * - Test that a stuct functor object ptr can be passed to hiplaunchparam + * and can be used in kernel + * - Test that a templated struct functor can be passed to hiplaunchparam + * and can be used in kernel + * - Test that a struct object containing functor can be passed to hiplaunchparam + * and can be used in kernel + + * Test source + * ------------------------ + * - catch/unit/kernel/hipLaunchParmFunctor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipLaunchParmFunctor") { + HipFunctorTests FunctorTests; + + SECTION("test for simple class functor") { + FunctorTests.TestForSimpleClassFunctor(); + } + SECTION("test for class objptr functor") { + FunctorTests.TestForClassObjPtrFunctor(); + } + SECTION("test for class templete functor") { + FunctorTests.TestForClassTemplateFunctor(); + } + SECTION("test for simple struct functor") { + FunctorTests.TestForSimpleStructFunctor(); + } + SECTION("test for struct objptr functor") { + FunctorTests.TestForStructObjPtrFunctor(); + } + SECTION("test for struct templete functor") { + FunctorTests.TestForStructTemplateFunctor(); + } + SECTION("test for functor contain in classobj") { + FunctorTests.TestForFunctorContainInClassObj(); + } + SECTION("test for functor contain in structobj") { + FunctorTests.TestForFunctorContainInStructObj(); + } +} + +/** +* End doxygen group KernelTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp b/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp index e3144dd596..c4d9fd3854 100644 --- a/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp +++ b/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp @@ -119,7 +119,7 @@ void verify_linked_lists_on_device(hipStream_t stream, Node* pNodes, unsigned int* pNumCorrect, unsigned int numLists, unsigned int ListLength) { *pNumCorrect = 0; // reset numCorrect to zero - + verify_linked_lists_on_device<<<(numLists + 255) / 256, 256, 0, stream>>>(pNodes, pNumCorrect, ListLength); diff --git a/projects/hip-tests/catch/unit/p2p/CMakeLists.txt b/projects/hip-tests/catch/unit/p2p/CMakeLists.txt index d24910daf2..7170294cb8 100644 --- a/projects/hip-tests/catch/unit/p2p/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/p2p/CMakeLists.txt @@ -1,24 +1,24 @@ -# Common Tests - Test independent of all platforms -# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to -# /catch/unit/p2p folder and its dependent files. -set(TEST_SRC - hipDeviceGetP2PAttribute.cc -) - -# only for AMD -if(HIP_PLATFORM MATCHES "amd") - set(AMD_SRC - hipP2pLinkTypeAndHopFunc.cc - ) - set(TEST_SRC ${TEST_SRC} ${AMD_SRC}) -endif() - -set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17) - -add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc) - -hip_add_exe_to_target(NAME p2pTests - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests) - -add_dependencies(build_tests hipDeviceGetP2PAttribute_exe) +# Common Tests - Test independent of all platforms +# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to +# /catch/unit/p2p folder and its dependent files. +set(TEST_SRC + hipDeviceGetP2PAttribute.cc +) + +# only for AMD +if(HIP_PLATFORM MATCHES "amd") + set(AMD_SRC + hipP2pLinkTypeAndHopFunc.cc + ) + set(TEST_SRC ${TEST_SRC} ${AMD_SRC}) +endif() + +set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17) + +add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc) + +hip_add_exe_to_target(NAME p2pTests + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) + +add_dependencies(build_tests hipDeviceGetP2PAttribute_exe) diff --git a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc index 71e0c6f0c7..7624cf0507 100644 --- a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc +++ b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc @@ -1,356 +1,356 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "hipP2pLinkTypeAndHopFunc.h" -#include -#include -#include - -#ifdef __linux__ -#include -#include -#include -#endif -#include -#define MAX_SIZE 30 -#define VISIBLE_DEVICE 0 - -/** - * Fetches Gpu device count - */ -#ifdef __linux__ -void getDeviceCount(int *pdevCnt) { - int fd[2], val = 0; - pid_t childpid; - // create pipe descriptors - pipe(fd); - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - - childpid = fork(); - if (childpid > 0) { // Parent - close(fd[1]); - // parent will wait to read the device cnt - read(fd[0], &val, sizeof(val)); - // close the read-descriptor - close(fd[0]); - // wait for child exit - wait(NULL); - *pdevCnt = val; - } else if (!childpid) { // Child - int devCnt = 1; - // writing only, no need for read-descriptor - close(fd[0]); - HIP_CHECK(hipGetDeviceCount(&devCnt)); - // send the value on the write-descriptor: - write(fd[1], &devCnt, sizeof(devCnt)); - // close the write descriptor: - close(fd[1]); - exit(0); - } else { // failure - *pdevCnt = 1; - return; - } -} - -bool testMaskedDevice(int actualNumGPUs) { - bool testResult = true; - int fd[2]; - pipe(fd); - - pid_t cPid; - cPid = fork(); - if (cPid == 0) { // child - hipError_t err; - char visibleDeviceString[MAX_SIZE] = {}; - snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE); - // disable visible_devices env from shell - unsetenv("ROCR_VISIBLE_DEVICES"); - unsetenv("HIP_VISIBLE_DEVICES"); - setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1); - setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1); - uint32_t linktype; - uint32_t hopcount; - for (int count = 1; - count < actualNumGPUs; count++) { - err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE, - VISIBLE_DEVICE+count, &linktype, &hopcount); - REQUIRE(err == hipSuccess); - } - close(fd[0]); - write(fd[1], &testResult, sizeof(testResult)); - close(fd[1]); - exit(0); - - } else if (cPid > 0) { // parent - close(fd[1]); - read(fd[0], &testResult, sizeof(testResult)); - close(fd[0]); - wait(NULL); - - } else { - printf("Info:fork() failed\n"); - testResult = false; - } - return testResult; -} -#endif - -bool testhipInvalidDevice(int numDevices) { - hipError_t ret; - uint32_t linktype; - uint32_t hopcount; - SECTION("Invalid device number case 1") { - ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount); - REQUIRE(ret != hipSuccess); - } - SECTION("Invalid device number case 2") { - ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount); - REQUIRE(ret != hipSuccess); - } - SECTION("Invalid device number case 3") { - ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount); - REQUIRE(ret != hipSuccess); - } - SECTION("Invalid device number case 4") { - ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount); - REQUIRE(ret != hipSuccess); - } - SECTION("Invalid device number case 5") { - ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount); - REQUIRE(ret != hipSuccess); - } - return true; -} - -#ifdef __linux__ -bool testhipInvalidLinkType() { - uint32_t hopcount; - REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr, - &hopcount)); - return true; -} - -bool testhipInvalidHopcount() { - uint32_t linktype; - REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr)); - return true; -} - -bool testhipSameDevice(int numGPUs) { - hipError_t ret; - uint32_t linktype = 0; - uint32_t hopcount = 0; - for (int gpuId = 0; gpuId < numGPUs; gpuId++) { - ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount); - REQUIRE(ret != hipSuccess); - } - return true; -} - -bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) { - bool TestPassed = true; - // Get the unique pair of devices - for (int x = 0; x < numDevices; x++) { - for (int y = x+1; y < numDevices; y++) { - uint32_t linktype1 = 0, linktype2 = 0; - uint32_t hopcount1 = 0, hopcount2 = 0; - HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y, - &linktype1, &hopcount1)); - HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x, - &linktype2, &hopcount2)); - if (hopcount1 != hopcount2) { - TestPassed = false; - break; - } - } - } - return TestPassed; -} - -/** - * Internal Function - */ -bool validateLinkType(uint32_t linktype_Hip, - RSMI_IO_LINK_TYPE linktype_RocmSmi) { - bool TestPassed = false; - - if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) && - (linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) { - TestPassed = true; - } else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) && - (linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) { - TestPassed = true; - } else { - printf("linktype Hip = %u, linktype RocmSmi = %u\n", - linktype_Hip, linktype_RocmSmi); - TestPassed = false; - } - return TestPassed; -} - -bool testhipLinkTypeHopcountDevice(int numDevices) { - bool TestPassed = true; - // Opening and initializing rocm-smi library - void *lib_rocm_smi_hdl; - rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*, - RSMI_IO_LINK_TYPE*); - rsmi_status_t (*fntopo_init)(uint64_t); - rsmi_status_t (*fntopo_shut_down)(); - - lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so", - RTLD_LAZY); - REQUIRE(lib_rocm_smi_hdl); - - void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type"); - REQUIRE(fnsym); - - fntopo_get_link_type = reinterpret_cast(fnsym); - - fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init"); - REQUIRE(fnsym); - fntopo_init = reinterpret_cast(fnsym); - - fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down"); - REQUIRE(fnsym); - fntopo_shut_down = reinterpret_cast(fnsym); - - uint64_t init_flags = 0; - rsmi_status_t retsmi_init; - retsmi_init = fntopo_init(init_flags); - REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init); - - // Use rocm-smi API rsmi_topo_get_link_type() to validate - struct devicePair { - int device1; - int device2; - }; - std::vector devicePairList; - // Get the unique pair of devices - for (int x = 0; x < numDevices; x++) { - for (int y = x+1; y < numDevices; y++) { - devicePairList.push_back({x, y}); - } - } - for (auto pos=devicePairList.begin(); - pos != devicePairList.end(); pos++) { - uint32_t linktype1 = 0; - uint32_t hopcount1 = 0; - RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED; - uint64_t hopcount2 = 0; - rsmi_status_t retsmi; - HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1, - (*pos).device2, &linktype1, &hopcount1)); - retsmi = fntopo_get_link_type((*pos).device1, - (*pos).device2, &hopcount2, &linktype2); - REQUIRE(RSMI_STATUS_SUCCESS == retsmi); - - // Validate linktype - TestPassed = validateLinkType(linktype1, linktype2); - } - fntopo_shut_down(); - dlclose(lib_rocm_smi_hdl); - return TestPassed; -} -#endif - -/** - * @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount - * @{ - * @ingroup p2pTest - * `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` - - * Returns the link type and hop count between two devices - * @} - */ - -/** - * Test Description - * ------------------------ - * - Validates negative scenarios for hipExtGetLinkTypeAndHopCount - * 1)Test Scenario to verify when device1 is visible and device2 is masked - * 2)Test Scenario to verify Invalid Device Number(s) - * 3)Test Scenario to verify when linktype = NULL - * 4)Test Scenario to verify when hopcount = NULL - * 5)Test Scenario to verify when device1 = device2 - * 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2) - * and (src = device2, dest = device1), where device1 and device2 are valid device numbers. - * 7)Test Scenario: Verify (hopcount, linktype) values for all combination of - * GPUs with the output of rocm_smi tool. - - * Test source - * ------------------------ - * - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") { - int numDevices = 0; - bool TestPassed = true; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices < 2) { - HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); - return; - } - SECTION("Test running for testhipInvalidDevice") { - TestPassed = testhipInvalidDevice(numDevices); - REQUIRE(TestPassed == true); - } -#ifdef __linux__ - getDeviceCount(&numDevices); - if (numDevices < 2) { - HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); - return; - } - SECTION("Test running for testMaskedDevice") { - TestPassed = testMaskedDevice(numDevices); - REQUIRE(TestPassed == true); - } - SECTION("Test running for testhipInvalidLinkType") { - TestPassed = testhipInvalidLinkType(); - REQUIRE(TestPassed == true); - } - SECTION("Test running for testhipInvalidHopcount") { - TestPassed = testhipInvalidHopcount(); - REQUIRE(TestPassed == true); - } - SECTION("Test running for testhipSameDevice") { - TestPassed = testhipSameDevice(numDevices); - REQUIRE(TestPassed == true); - } - SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") { - TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices); - REQUIRE(TestPassed == true); - } - SECTION("Test running for testhipLinkTypeHopcountDevice") { - TestPassed = testhipLinkTypeHopcountDevice(numDevices); - REQUIRE(TestPassed == true); - } -#else - printf("This test is skipped due to non linux environment.\n"); -#endif -} - -/** -* End doxygen group p2pTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hipP2pLinkTypeAndHopFunc.h" +#include +#include +#include + +#ifdef __linux__ +#include +#include +#include +#endif +#include +#define MAX_SIZE 30 +#define VISIBLE_DEVICE 0 + +/** + * Fetches Gpu device count + */ +#ifdef __linux__ +void getDeviceCount(int *pdevCnt) { + int fd[2], val = 0; + pid_t childpid; + // create pipe descriptors + pipe(fd); + // disable visible_devices env from shell + unsetenv("ROCR_VISIBLE_DEVICES"); + unsetenv("HIP_VISIBLE_DEVICES"); + + childpid = fork(); + if (childpid > 0) { // Parent + close(fd[1]); + // parent will wait to read the device cnt + read(fd[0], &val, sizeof(val)); + // close the read-descriptor + close(fd[0]); + // wait for child exit + wait(NULL); + *pdevCnt = val; + } else if (!childpid) { // Child + int devCnt = 1; + // writing only, no need for read-descriptor + close(fd[0]); + HIP_CHECK(hipGetDeviceCount(&devCnt)); + // send the value on the write-descriptor: + write(fd[1], &devCnt, sizeof(devCnt)); + // close the write descriptor: + close(fd[1]); + exit(0); + } else { // failure + *pdevCnt = 1; + return; + } +} + +bool testMaskedDevice(int actualNumGPUs) { + bool testResult = true; + int fd[2]; + pipe(fd); + + pid_t cPid; + cPid = fork(); + if (cPid == 0) { // child + hipError_t err; + char visibleDeviceString[MAX_SIZE] = {}; + snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE); + // disable visible_devices env from shell + unsetenv("ROCR_VISIBLE_DEVICES"); + unsetenv("HIP_VISIBLE_DEVICES"); + setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1); + setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1); + uint32_t linktype; + uint32_t hopcount; + for (int count = 1; + count < actualNumGPUs; count++) { + err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE, + VISIBLE_DEVICE+count, &linktype, &hopcount); + REQUIRE(err == hipSuccess); + } + close(fd[0]); + write(fd[1], &testResult, sizeof(testResult)); + close(fd[1]); + exit(0); + + } else if (cPid > 0) { // parent + close(fd[1]); + read(fd[0], &testResult, sizeof(testResult)); + close(fd[0]); + wait(NULL); + + } else { + printf("Info:fork() failed\n"); + testResult = false; + } + return testResult; +} +#endif + +bool testhipInvalidDevice(int numDevices) { + hipError_t ret; + uint32_t linktype; + uint32_t hopcount; + SECTION("Invalid device number case 1") { + ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount); + REQUIRE(ret != hipSuccess); + } + SECTION("Invalid device number case 2") { + ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount); + REQUIRE(ret != hipSuccess); + } + SECTION("Invalid device number case 3") { + ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount); + REQUIRE(ret != hipSuccess); + } + SECTION("Invalid device number case 4") { + ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount); + REQUIRE(ret != hipSuccess); + } + SECTION("Invalid device number case 5") { + ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount); + REQUIRE(ret != hipSuccess); + } + return true; +} + +#ifdef __linux__ +bool testhipInvalidLinkType() { + uint32_t hopcount; + REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr, + &hopcount)); + return true; +} + +bool testhipInvalidHopcount() { + uint32_t linktype; + REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr)); + return true; +} + +bool testhipSameDevice(int numGPUs) { + hipError_t ret; + uint32_t linktype = 0; + uint32_t hopcount = 0; + for (int gpuId = 0; gpuId < numGPUs; gpuId++) { + ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount); + REQUIRE(ret != hipSuccess); + } + return true; +} + +bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) { + bool TestPassed = true; + // Get the unique pair of devices + for (int x = 0; x < numDevices; x++) { + for (int y = x+1; y < numDevices; y++) { + uint32_t linktype1 = 0, linktype2 = 0; + uint32_t hopcount1 = 0, hopcount2 = 0; + HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y, + &linktype1, &hopcount1)); + HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x, + &linktype2, &hopcount2)); + if (hopcount1 != hopcount2) { + TestPassed = false; + break; + } + } + } + return TestPassed; +} + +/** + * Internal Function + */ +bool validateLinkType(uint32_t linktype_Hip, + RSMI_IO_LINK_TYPE linktype_RocmSmi) { + bool TestPassed = false; + + if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) && + (linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) { + TestPassed = true; + } else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) && + (linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) { + TestPassed = true; + } else { + printf("linktype Hip = %u, linktype RocmSmi = %u\n", + linktype_Hip, linktype_RocmSmi); + TestPassed = false; + } + return TestPassed; +} + +bool testhipLinkTypeHopcountDevice(int numDevices) { + bool TestPassed = true; + // Opening and initializing rocm-smi library + void *lib_rocm_smi_hdl; + rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*, + RSMI_IO_LINK_TYPE*); + rsmi_status_t (*fntopo_init)(uint64_t); + rsmi_status_t (*fntopo_shut_down)(); + + lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so", + RTLD_LAZY); + REQUIRE(lib_rocm_smi_hdl); + + void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type"); + REQUIRE(fnsym); + + fntopo_get_link_type = reinterpret_cast(fnsym); + + fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init"); + REQUIRE(fnsym); + fntopo_init = reinterpret_cast(fnsym); + + fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down"); + REQUIRE(fnsym); + fntopo_shut_down = reinterpret_cast(fnsym); + + uint64_t init_flags = 0; + rsmi_status_t retsmi_init; + retsmi_init = fntopo_init(init_flags); + REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init); + + // Use rocm-smi API rsmi_topo_get_link_type() to validate + struct devicePair { + int device1; + int device2; + }; + std::vector devicePairList; + // Get the unique pair of devices + for (int x = 0; x < numDevices; x++) { + for (int y = x+1; y < numDevices; y++) { + devicePairList.push_back({x, y}); + } + } + for (auto pos=devicePairList.begin(); + pos != devicePairList.end(); pos++) { + uint32_t linktype1 = 0; + uint32_t hopcount1 = 0; + RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED; + uint64_t hopcount2 = 0; + rsmi_status_t retsmi; + HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1, + (*pos).device2, &linktype1, &hopcount1)); + retsmi = fntopo_get_link_type((*pos).device1, + (*pos).device2, &hopcount2, &linktype2); + REQUIRE(RSMI_STATUS_SUCCESS == retsmi); + + // Validate linktype + TestPassed = validateLinkType(linktype1, linktype2); + } + fntopo_shut_down(); + dlclose(lib_rocm_smi_hdl); + return TestPassed; +} +#endif + +/** + * @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount + * @{ + * @ingroup p2pTest + * `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` - + * Returns the link type and hop count between two devices + * @} + */ + +/** + * Test Description + * ------------------------ + * - Validates negative scenarios for hipExtGetLinkTypeAndHopCount + * 1)Test Scenario to verify when device1 is visible and device2 is masked + * 2)Test Scenario to verify Invalid Device Number(s) + * 3)Test Scenario to verify when linktype = NULL + * 4)Test Scenario to verify when hopcount = NULL + * 5)Test Scenario to verify when device1 = device2 + * 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2) + * and (src = device2, dest = device1), where device1 and device2 are valid device numbers. + * 7)Test Scenario: Verify (hopcount, linktype) values for all combination of + * GPUs with the output of rocm_smi tool. + + * Test source + * ------------------------ + * - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") { + int numDevices = 0; + bool TestPassed = true; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices < 2) { + HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); + return; + } + SECTION("Test running for testhipInvalidDevice") { + TestPassed = testhipInvalidDevice(numDevices); + REQUIRE(TestPassed == true); + } +#ifdef __linux__ + getDeviceCount(&numDevices); + if (numDevices < 2) { + HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); + return; + } + SECTION("Test running for testMaskedDevice") { + TestPassed = testMaskedDevice(numDevices); + REQUIRE(TestPassed == true); + } + SECTION("Test running for testhipInvalidLinkType") { + TestPassed = testhipInvalidLinkType(); + REQUIRE(TestPassed == true); + } + SECTION("Test running for testhipInvalidHopcount") { + TestPassed = testhipInvalidHopcount(); + REQUIRE(TestPassed == true); + } + SECTION("Test running for testhipSameDevice") { + TestPassed = testhipSameDevice(numDevices); + REQUIRE(TestPassed == true); + } + SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") { + TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices); + REQUIRE(TestPassed == true); + } + SECTION("Test running for testhipLinkTypeHopcountDevice") { + TestPassed = testhipLinkTypeHopcountDevice(numDevices); + REQUIRE(TestPassed == true); + } +#else + printf("This test is skipped due to non linux environment.\n"); +#endif +} + +/** +* End doxygen group p2pTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h index 755743c328..0e791b66fd 100644 --- a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h +++ b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h @@ -1,110 +1,110 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_ -#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_ -/** - * rocm_smi.h enums - */ -typedef enum { - RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful - RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid - RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or - //!< action is not available for the - //!< given input, on the given system - RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This - //!< may because the operation is not - //!< supported by the Linux kernel - //!< version running on the executing - //!< machine - RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file - //!< error. Many functions require - //!< root access to run. - RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other - //!< resource - RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught - RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of - //!< allowable or safe range - RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi - //!< initializing internal data - //!< structures - RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR, - RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not - //!< yet been implemented in the - //!< current system for the current - //!< devices - RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not - //!< found - RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were - //!< available for the operation - RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during - //!< execution of function - RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data - //!< was read - RSMI_STATUS_NO_DATA, //!< No data was found for a given - //!< input - RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to - //!< function is not what was expected - RSMI_STATUS_BUSY, //!< A resource or mutex could not be - //!< acquired because it is already - //!< being used - RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter - //!< exceeded INT32_MAX - - RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred -} rsmi_status_t; - -/** - * Types for IO Link returned from rocm_smi - */ -typedef enum _RSMI_IO_LINK_TYPE { - RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type. - RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express - RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI - RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types - RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types -} RSMI_IO_LINK_TYPE; - -/** - * Types for IO Link returned from rocm runtime - */ -typedef enum { - /** - * Hyper-transport bus type. - */ - HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0, - /** - * QPI bus type. - */ - HSA_AMD_LINK_INFO_TYPE_QPI = 1, - /** - * PCIe bus type. - */ - HSA_AMD_LINK_INFO_TYPE_PCIE = 2, - /** - * Infiniband bus type. - */ - HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3, - /** - * xGMI link type. - */ - HSA_AMD_LINK_INFO_TYPE_XGMI = 4 -} hsa_amd_link_info_type_t; - -#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_ +#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_ +/** + * rocm_smi.h enums + */ +typedef enum { + RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful + RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid + RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or + //!< action is not available for the + //!< given input, on the given system + RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This + //!< may because the operation is not + //!< supported by the Linux kernel + //!< version running on the executing + //!< machine + RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file + //!< error. Many functions require + //!< root access to run. + RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other + //!< resource + RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught + RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of + //!< allowable or safe range + RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi + //!< initializing internal data + //!< structures + RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR, + RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not + //!< yet been implemented in the + //!< current system for the current + //!< devices + RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not + //!< found + RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were + //!< available for the operation + RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during + //!< execution of function + RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data + //!< was read + RSMI_STATUS_NO_DATA, //!< No data was found for a given + //!< input + RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to + //!< function is not what was expected + RSMI_STATUS_BUSY, //!< A resource or mutex could not be + //!< acquired because it is already + //!< being used + RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter + //!< exceeded INT32_MAX + + RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred +} rsmi_status_t; + +/** + * Types for IO Link returned from rocm_smi + */ +typedef enum _RSMI_IO_LINK_TYPE { + RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type. + RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express + RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI + RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types + RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types +} RSMI_IO_LINK_TYPE; + +/** + * Types for IO Link returned from rocm runtime + */ +typedef enum { + /** + * Hyper-transport bus type. + */ + HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0, + /** + * QPI bus type. + */ + HSA_AMD_LINK_INFO_TYPE_QPI = 1, + /** + * PCIe bus type. + */ + HSA_AMD_LINK_INFO_TYPE_PCIE = 2, + /** + * Infiniband bus type. + */ + HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3, + /** + * xGMI link type. + */ + HSA_AMD_LINK_INFO_TYPE_XGMI = 4 +} hsa_amd_link_info_type_t; + +#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_ diff --git a/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp b/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp index 14ea47e6c2..8de295783f 100644 --- a/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp +++ b/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp @@ -1,3300 +1,3300 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* -This file contains functions for idividual HIPRTC supported compiler options -validation. For PASS senario the function returns 1 or 0 otherwise. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include "headers/RtcUtility.h" -#include "headers/RtcFunctions.h" -#include "headers/RtcKernels.h" -#include -#include "headers/printf_common.h" - -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wunused-variable" - -bool check_architecture(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "architecture"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - hipDeviceProp_t prop; - HIP_CHECK(hipGetDeviceProperties(&prop, 0)); - std::string actual_architecture = prop.gcnArchName; - std::string complete_CO = retrieved_CO + actual_architecture; - const char* compiler_option = complete_CO.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - return 1; -} - -bool check_rdc(const char** Combination_CO, int Combination_CO_size, - int max_thread_pos, int fast_math_present) { - std::string block_name = "rdc"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string CO = get_string_parameters("compiler_option", - block_name); - if (CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - const char* compiler_opt = CO.c_str(); - float *A_d, *B_d, *C_d; - float *A_h, *B_h, *C_h, *result; - float Nbytes = sizeof(float); - A_h = new float[1]; - B_h = new float[1]; - C_h = new float[1]; - result = new float[1]; - for (int i = 0; i < 1; i++) { - A_h[i] = 4; - B_h[i] = 4; - result[i] = 16; - } - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&B_d, Nbytes)); - HIP_CHECK(hipMalloc(&C_d, Nbytes)); - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, rdc_string, kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_opt); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_opt)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_opt); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - void* kernelParam[] = {A_d, B_d, C_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - size_t codeSize; - HIPRTC_CHECK(hiprtcGetBitcodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetBitcode(prog, codec.data())); - float wall_time; - int reg_count = 2; - int max_thread = 1; - unsigned int log_size = 5120; - char error_log[5120]; - char info_log[5120]; - std::vector jit_options = {HIPRTC_JIT_MAX_REGISTERS, - HIPRTC_JIT_THREADS_PER_BLOCK, - HIPRTC_JIT_WALL_TIME, - HIPRTC_JIT_INFO_LOG_BUFFER, - HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES, - HIPRTC_JIT_ERROR_LOG_BUFFER, - HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, - HIPRTC_JIT_LOG_VERBOSE}; - const void* lopts[] = {reinterpret_cast(®_count), - reinterpret_cast(&max_thread), - reinterpret_cast(&wall_time), - info_log, - reinterpret_cast(log_size), - error_log, - reinterpret_cast(log_size), - reinterpret_cast(1)}; - hiprtcLinkState rtc_link_state; - void* binary; - size_t binarySize; - int pass_count = 0; - hipModule_t module; - hipFunction_t function; - for (int i = 0; i < 2; i++) { - switch (i) { - case 0 : - HIPRTC_CHECK(hiprtcLinkCreate(0, nullptr, nullptr, &rtc_link_state)); - HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state, - HIPRTC_JIT_INPUT_LLVM_BITCODE, - codec.data(), codeSize, 0, 0, 0, 0)); - HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize)); - HIP_CHECK(hipModuleLoadData(&module, binary)); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, - nullptr, kernel_parameter)); - pass_count++; - break; - case 1 : - HIPRTC_CHECK(hiprtcLinkCreate(8, jit_options.data(), - reinterpret_cast(&lopts), - &rtc_link_state)); - HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state, - HIPRTC_JIT_INPUT_LLVM_BITCODE, - codec.data(), codeSize, 0, 0, 0, 0)); - HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize)); - HIP_CHECK(hipModuleLoadData(&module, binary)); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, - nullptr, kernel_parameter)); - pass_count++; - break; - default: - WARN(" NOT VALID INPUT "); - break; - } - } - HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost)); - for (int i = 0 ; i< 1; i++) { - if (result[i] != ((A_h[i] * B_h[i]))) { - WARN("Compiler Option : " << compiler_opt); - WARN("EXPECTED RESULT DOES NOT MATCH "); - WARN("INPUT A & B : " << A_h[i] <<" , "<< B_h[i]); - WARN("EXPECTED RES : " << (A_h[i] * B_h[i])); - WARN("OBTAINED RES : " << result[i]); - return 0; - } - } - if (pass_count == 2) { - return 1; - } else { - WARN(" pass_count IS NOT MATCHING "); - return 0; - } -} - -bool check_denormals_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "denormals"; - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name); - picojson::array Expected_Results = get_array_parameters("Expected_Results", - block_name); - const char* kername = kernel_name.c_str(); - const char* compiler_option = retrieved_CO.c_str(); - std::vector double_vec_input; - for (auto& indx : Input_Vals) { - double_vec_input.push_back(indx.get()); - } - std::vector Input_Vals_int; - for (auto& indx : double_vec_input) { - Input_Vals_int.push_back(static_cast(indx)); - } - std::vector double_vec_expected; - for (auto& indx : Expected_Results) { - double_vec_expected.push_back(indx.get()); - } - std::vector Expected_Results_int; - for (auto& indx : double_vec_expected) { - Expected_Results_int.push_back(static_cast(indx)); - } - int test_case, res_inc; - for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() && - res_inc < Expected_Results_int.size(); test_case+=2, res_inc++) { - double *base_h, *power_h, *result_h; - double *base_d, *power_d, *result_d; - double Nbytes = sizeof(double); - base_h = new double[1]; - power_h = new double[1]; - result_h = new double[1]; - *base_h = Input_Vals_int[test_case]; - *power_h = Input_Vals_int[test_case+1]; - *result_h = 1; - HIP_CHECK(hipMalloc(&base_d, Nbytes)); - HIP_CHECK(hipMalloc(&power_d, Nbytes)); - HIP_CHECK(hipMalloc(&result_d, Nbytes)); - HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice)); - hiprtcProgram program; - HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string, - "denormals", 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(program, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(program, 1, - &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); - WARN(log); - } - return 0; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(program, codec.data())); - void* kernelParam[] = {base_d, power_d, result_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, - nullptr, kernel_parameter)); - HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double), - hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&program)); - if (*result_h != Expected_Results_int[res_inc]) { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc); - WARN("th ITERATION (start iteration is 0 ) "); - WARN("INPUT : pow(2, " << *power_h << ") "); - WARN("EXPECTED OP: " << Expected_Results_int[res_inc]); - WARN("OBTAINED OP: " << *result_h); - return 0; - } - } - return 1; -} - -bool check_denormals_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "denormals"; - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name); - picojson::array Expected_Results_for_no = get_array_parameters( - "Expected_Results_for_no", block_name); - const char* kername = kernel_name.c_str(); - const char* compiler_option = retrieved_CO.c_str(); - std::vector double_vec_input; - for (auto& indx : Input_Vals) { - double_vec_input.push_back(indx.get()); - } - std::vector Input_Vals_int; - for (auto& indx : double_vec_input) { - Input_Vals_int.push_back(static_cast(indx)); - } - std::vector double_vec_expected_for_no; - for (auto& indx : Expected_Results_for_no) { - double_vec_expected_for_no.push_back(indx.get()); - } - std::vector Expected_Results_for_no_int; - for (auto& indx : double_vec_expected_for_no) { - Expected_Results_for_no_int.push_back(static_cast(indx)); - } - int test_case, res_inc; - for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() && - res_inc < Expected_Results_for_no_int.size(); test_case+=2, res_inc++) { - double *base_h, *power_h, *result_h; - double *base_d, *power_d, *result_d; - double Nbytes = sizeof(double); - base_h = new double[1]; - power_h = new double[1]; - result_h = new double[1]; - *base_h = Input_Vals_int[test_case]; - *power_h = Input_Vals_int[test_case+1]; - *result_h = 0; - HIP_CHECK(hipMalloc(&base_d, Nbytes)); - HIP_CHECK(hipMalloc(&power_d, Nbytes)); - HIP_CHECK(hipMalloc(&result_d, Nbytes)); - HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice)); - hiprtcProgram program; - HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string, - "denormals", 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(program, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(program, 1, - &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); - WARN(log); - } - return 0; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(program, codec.data())); - void* kernelParam[] = {base_d, power_d, result_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, - nullptr, kernel_parameter)); - HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double), - hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&program)); - if (*result_h != Expected_Results_for_no_int[res_inc]) { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc); - WARN("th ITERATION (start iteration is 0 ) "); - WARN("INPUT : pow(2, " << *power_h << ") "); - WARN("EXPECTED OP: "<< Expected_Results_for_no_int[res_inc]); - WARN("OBTAINED OP: "<< *result_h); - return 0; - } - } - return 1; -} - -bool check_ffp_contract_off(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "ffp_contract"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 3) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - std::string hold = CO_vec[0]; - CO_IRadded[0] = hold.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO[0]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (data.find("fmul contract") != -1 && - data.find("@llvm.fmuladd.f32") != -1) { - WARN("Compiler option : " << retrieved_CO[0]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN EITHER"); - WARN("'fmul contract' or '@llvm.fmuladd.f32' or both "); - WARN("WHICH IS NOT EXPECTED"); - return 0; - } else { - return 1; - } -} - -bool check_ffp_contract_on(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "ffp_contract"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 3) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - std::string hold = CO_vec[1]; - CO_IRadded[0] = hold.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, - CO_IRadded_size, Combination_CO, - Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO[1]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 0 && data.find("@llvm.fmuladd.f32")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO[1]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' "); - return 0; - } - } else { - if (data.find("@llvm.fmuladd.f32") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO[1]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' "); - return 0; - } - } -} - -bool check_ffp_contract_fast(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "ffp_contract"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 3) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - std::string hold = CO_vec[2]; - CO_IRadded[0] = hold.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO[2]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 1 && data.find("contract")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO[2]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul contract' "); - return 0; - } - } else { - if (data.find("fmul contract") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO[2]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul contract' "); - return 0; - } - } -} - -bool check_fast_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "fast_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (data.find("fmul fast")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast' "); - return 0; - } -} - -bool check_fast_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "fast_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (data.find("fmul fast")!= -1) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast' "); - return 0; - } else { - return 1; - } -} - -bool check_slp_vectorize_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "slp_vectorize"; - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - __half2 *a_d, *x_d, *y_d; - __half2 a_h, x_h; - a_h.data.x = 1.5; - x_h.data.y = 3.0; - CaptureStream capture(stderr); - HIP_CHECK(hipMalloc(&a_d, sizeof(__half2))); - HIP_CHECK(hipMalloc(&x_d, sizeof(__half2))); - HIP_CHECK(hipMalloc(&y_d, sizeof(__half2))); - HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice)); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - int Combination_CO_IRadded_size = Combination_CO_size+3; - int b = 0; - std::vector add_ir_forcombi(Combination_CO_size + 3, ""); - const char** Combination_CO_IRadded = - new const char*[Combination_CO_size+3]; - for (int i = 0; i < Combination_CO_size+3; ++i) { - if (i == Combination_CO_size) { - Combination_CO_IRadded[i] = "-fno-signed-zeros"; - Combination_CO_IRadded[i+1] = "-mllvm"; - Combination_CO_IRadded[i+2] = "-print-after=constmerge"; - break; - } - add_ir_forcombi[i] = Combination_CO[b]; - Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); - b++; - } - capture.Begin(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_IRadded_size, - Combination_CO_IRadded)}; - capture.End(); - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler option : " << retrieved_CO); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size+3; i++) { - WARN(Combination_CO_IRadded[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - capture.Begin(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size, - CO_IRadded)}; - capture.End(); - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler option : " << retrieved_CO); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - std::string data = capture.getData(); - std::stringstream dataStream; - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - void* kernelParam[] = {reinterpret_cast(a_d), - reinterpret_cast(x_d), - reinterpret_cast(y_d)}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, - kernel_parameter)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - int times = 0; - if (data.find("contract <2 x half>", 0) != -1) { - times++; - } - int start = data.find("contract <2 x half>", 0) + 1; - while (data.find("contract <2 x half>", start) != -1) { - times++; - start = data.find("contract <2 x half>", start)+1; - } - if (times == 1) { - return 1; - } else if (times == 0) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fadd contract <2 x half>' "); - return 0; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times"); - WARN(" WHICH IS NOT EXPECTED (IT SHOULD BE PRESENT ONCE)"); - return 0; - } -} - -bool check_slp_vectorize_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "slp_vectorize"; - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - int CO_IRadded_size = 3; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - __half2 *a_d, *x_d, *y_d; - __half2 a_h, x_h; - a_h.data.x = 1.5; - x_h.data.y = 3.0; - CaptureStream capture(stderr); - HIP_CHECK(hipMalloc(&a_d, sizeof(__half2))); - HIP_CHECK(hipMalloc(&x_d, sizeof(__half2))); - HIP_CHECK(hipMalloc(&y_d, sizeof(__half2))); - HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice)); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - int Combination_CO_IRadded_size = Combination_CO_size+3; - int b = 0; - std::vector add_ir_forcombi(Combination_CO_size + 3, ""); - const char** Combination_CO_IRadded = - new const char*[Combination_CO_size+3]; - for (int i = 0; i < Combination_CO_size+3; ++i) { - if (i == Combination_CO_size) { - Combination_CO_IRadded[i] = "-fno-signed-zeros"; - Combination_CO_IRadded[i+1] = "-mllvm"; - Combination_CO_IRadded[i+2] = "-print-after=constmerge"; - break; - } - add_ir_forcombi[i] = Combination_CO[b]; - Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); - b++; - } - capture.Begin(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_IRadded_size, - Combination_CO_IRadded)}; - capture.End(); - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler option : " << retrieved_CO); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size+3; i++) { - WARN(Combination_CO_IRadded[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - capture.Begin(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size, - CO_IRadded)}; - capture.End(); - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler option : " << retrieved_CO); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - std::string data = capture.getData(); - std::stringstream dataStream; - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - void* kernelParam[] = {reinterpret_cast(a_d), - reinterpret_cast(x_d), - reinterpret_cast(y_d)}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, - kernel_parameter)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - int times = 0; - if (data.find("contract <2 x half>", 0) != -1) { - times++; - } - int start = data.find("contract <2 x half>", 0) + 1; - while (data.find("contract <2 x half>", start) != -1) { - times++; - start = data.find("contract <2 x half>", start)+1; - } - if (times == 2) { - return 1; - } else if (times < 2) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times"); - WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)"); - return 0; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times"); - WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)"); - return 0; - } -} - -bool check_macro(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "macro"; - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - picojson::array Expected_Results = get_array_parameters("Expected_Results", - block_name); - const char* kername = kernel_name.c_str(); - std::vector double_vec_expected; - for (auto& indx : Expected_Results) { - double_vec_expected.push_back(indx.get()); - } - std::vector Expected_Results_int; - for (auto& indx : double_vec_expected) { - Expected_Results_int.push_back(static_cast(indx)); - } - const char* compiler_option = retrieved_CO.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, macro_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - int *macro_value_h; - int *macro_value_d; - macro_value_h = new int[1]; - HIP_CHECK(hipMalloc(¯o_value_d, sizeof(int))); - *macro_value_h = 0; - HIP_CHECK(hipMemcpy(macro_value_d, macro_value_h, sizeof(int), - hipMemcpyHostToDevice)); - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - hiprtcGetCode(prog, codec.data()); - void* kernelParam[] = {macro_value_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[]={HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, - kernel_parameter)); - HIP_CHECK(hipMemcpy(macro_value_h, macro_value_d, sizeof(int), - hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - if (*macro_value_h != Expected_Results_int[0]) { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED RESULT DOES NOT MATCH"); - WARN("INPUT: " << compiler_option); - WARN("EXPECTED OP : "<< Expected_Results_int[0]); - WARN("OBTAINED OP: "<< *macro_value_h); - return 0; - } else { - return 1; - } -} - -bool check_undef_macro(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "undef_macro"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - picojson::array comp_opt = get_array_parameters("compiler_option", - block_name); - if (comp_opt.size() < 2) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::vector compiler_option; - for (auto& indx : comp_opt) { - compiler_option.push_back(indx.get()); - } - std::vector variable(compiler_option.size(), ""); - const char** appended_compiler_options = - new const char*[compiler_option.size()]; - for (int i = 0; i < compiler_option.size(); ++i) { - variable[i] = compiler_option[i]; - appended_compiler_options[i] = variable[i].c_str(); - } - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, undef_macro_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - if (log.find("undeclared identifier")) { - return 1; - } - } else { - WARN("Compiler Option : " << appended_compiler_options[1]); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("Expected error : 'undeclared identifier' NOT GENERATED"); - return 0; - } - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, - compiler_option.size(), - appended_compiler_options)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - if (log.find("undeclared identifier")) { - return 1; - } - } else { - WARN("Compiler Option : " << appended_compiler_options[0]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("Expected error : 'undeclared identifier' NOT GENERATED"); - return 0; - } - } - } - WARN("Compiler Option : " << appended_compiler_options[0]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED ERROR WAS NOT GENERATED"); - return 0; -} - -bool check_header_dir(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "header_dir"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string compiler_option = get_string_parameters("compiler_option", - block_name); - if (compiler_option == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - picojson::array Headers = get_array_parameters("Headers", block_name); - picojson::array depending_comp_optn = - get_array_parameters("depending_comp_optn", block_name); - picojson::array Src_headers = - get_array_parameters("Src_headers", block_name); - picojson::array Input_Thrd_Vals = - get_array_parameters("Input_Vals", block_name); - picojson::array Expected_Results = - get_array_parameters("Expected_Results", block_name); - std::string str = "pwd"; - const char *cmd = str.c_str(); - CaptureStream capture(stdout); - capture.Begin(); - system(cmd); - capture.End(); - std::string wor_dir = capture.getData(); - std::string break_dir = wor_dir.substr(0, wor_dir.find("build")); - std::string append_str = "catch/unit/rtc/headers"; - std::string CO = compiler_option + " " + break_dir + append_str; - const char* appended_CO = CO.c_str(); - std::vector Headers_list; - for (auto& indx : Headers) { - Headers_list.push_back(indx.get()); - } - std::vector Src_headers_list; - for (auto& indx : Src_headers) { - Src_headers_list.push_back(indx.get()); - } - std::vector depending_co_list; - for (auto& indx : depending_comp_optn) { - depending_co_list.push_back(indx.get()); - } - std::vector double_vec_target; - for (auto& indx : Input_Thrd_Vals) { - double_vec_target.push_back(indx.get()); - } - std::vector Input_Thrd_Vals_int; - for (auto& indx : double_vec_target) { - Input_Thrd_Vals_int.push_back(static_cast(indx)); - } - std::vector double_vec_expected; - for (auto& indx : Expected_Results) { - double_vec_expected.push_back(indx.get()); - } - std::vector Expected_Results_int; - for (auto& indx : double_vec_expected) { - Expected_Results_int.push_back(static_cast(indx)); - } - std::vector src_var_hdr_lst(Src_headers_list.size(), ""); - const char** src_hder_lst = new const char*[Src_headers_list.size()]; - for (int i = 0; i < Src_headers_list.size(); ++i) { - src_var_hdr_lst[i] = Src_headers_list[i]; - src_hder_lst[i] = src_var_hdr_lst[i].c_str(); - } - std::vector var_hdr_lst(Headers_list.size(), ""); - const char** hder_lst = new const char*[Headers_list.size()]; - for (int i = 0; i < Headers_list.size(); ++i) { - var_hdr_lst[i] = Headers_list[i]; - hder_lst[i] = var_hdr_lst[i].c_str(); - } - for (int senario = 0; senario< Input_Thrd_Vals_int.size(); senario++) { - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, header_dir_string, - kername, Headers_list.size(), - src_hder_lst, hder_lst)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << appended_CO); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &appended_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << appended_CO); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - int value_h = 0; - int* ptr_value_h = &value_h; - int input_h = Input_Thrd_Vals_int[senario]; - int* ptr_input_h = &input_h; - int* value_d; - int* input_d; - HIP_CHECK(hipMalloc(&value_d, sizeof(int))); - HIP_CHECK(hipMalloc(&input_d, sizeof(int))); - HIP_CHECK(hipMemcpy(value_d, ptr_value_h, sizeof(int), - hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(input_d, ptr_input_h, sizeof(int), - hipMemcpyHostToDevice)); - void* kernelParam[] = {value_d, input_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, - kernel_parameter)); - HIP_CHECK(hipMemcpy(ptr_value_h, value_d, sizeof(int), - hipMemcpyDeviceToHost)); - if (*ptr_value_h != Expected_Results_int[senario]) { - WARN("Compiler Option : " << appended_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN(" EXPECTED RESULT DOES NOT MATCH FOR " << senario); - WARN("th ITERATION (start iteration is 0 ) "); - WARN(" INPUT: " << Input_Thrd_Vals_int[senario]); - WARN(" EXPECTED OP: "<< Expected_Results_int[senario]); - WARN(" OBTAINED OP: "<< *ptr_value_h); - return 0; - } - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - } - return 1; -} - -bool check_warning(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "warning"; - std::string retrieved_CO = - get_string_parameters("compiler_option", block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - const char* compiler_option = retrieved_CO.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, warning_string, kername, - 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - if (-1 != log.find("#warning")) { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN(" WARNING MESSAGE IS PRINTING WHICH IS NOT SUPRESSED "); - return 0; - } else { - return 1; - } - } else { - return 1; - } -} - -bool check_Rpass_inline(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "Rpass_inline"; - std::string retrieved_CO = - get_string_parameters("compiler_option", block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - const char* compiler_option = retrieved_CO.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - if (log.find("inlined into")) { - return 1; - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED STRING 'inlined into' IS NOT PRESENT IN LOG "); - return 0; - } - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN(" LOG WITH EXPECTED STRING 'inlined into' IS NOT PRESENT "); - return 0; - } -} - -bool check_conversionerror_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "error"; - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 4) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - std::string variable = CO_vec[0]; - const char* compiler_option = variable.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - } - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - std::string variable = "error"; - if (-1 != log.find(variable)) { - return 1; - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("ERROR MSG : '" << variable <<"' NOT FOUND"); - return 0; - } - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("LOG IS NOT GENERATED"); - WARN("maybe due to presence of '-w' compiler option"); - return 0; - } -} - -bool check_conversionerror_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "error"; - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 4) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - std::string variable = CO_vec[1]; - const char* compiler_option = variable.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - }size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - if (-1 != log.find("error")) { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("LOG IS PRESENT WITH ERROR WHICH IS NOT EXPECTED : "); - WARN("maybe due to presence of '-w' compiler option"); - return 0; - } else { - return 1; - } - } else { - return 1; - } -} - -bool check_conversionwarning_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "error"; - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 4) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - std::string variable = CO_vec[2]; - const char* compiler_option = variable.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - }size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - std::string variable = "warning"; - if (-1 != log.find(variable)) { - return 1; - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("LOG DOESN'T CONTAIN WARNING AS EXP : " << compiler_option); - return 0; - } - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("LOG IS NOT GENERATED"); - return 0; - } -} - -bool check_conversionwarning_disabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present) { - std::string block_name = "error"; - picojson::array retrieved_CO = get_array_parameters("compiler_option", - block_name); - if (retrieved_CO.size() < 4) { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::vector CO_vec; - for (auto& indx : retrieved_CO) { - CO_vec.push_back(indx.get()); - } - std::string variable = CO_vec[3]; - const char* compiler_option = variable.c_str(); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, - Combination_CO)}; - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option)}; - }size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - if (-1 != log.find("warning")) { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("WARNING IS GENERATED WHICH IS NOT EXPECTED"); - WARN(compiler_option); - return 0; - } else { - return 1; - } - } else { - return 1; - } -} - -bool check_max_thread(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "max_thread"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - std::string default_CO = get_string_parameters("kernel_name", block_name); - picojson::array Target_Thrd_Vals = get_array_parameters("Target_Vals", - block_name); - picojson::array Input_Thrd_Vals = get_array_parameters("Input_Vals", - block_name); - picojson::array Expected_Results = get_array_parameters("Expected_Results", - block_name); - const char* kername = kernel_name.c_str(); - std::string compiler_option = get_string_parameters("compiler_option", - block_name); - if (compiler_option == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::vector double_vec_target; - for (auto& indx : Target_Thrd_Vals) { - double_vec_target.push_back(indx.get()); - } - std::vector Target_Thrd_Vals_int; - for (auto& indx : double_vec_target) { - Target_Thrd_Vals_int.push_back(static_cast(indx)); - } - int a = 0; - std::vector variable(Target_Thrd_Vals_int.size(), ""); - const char** appended_compiler_options = - new const char*[Target_Thrd_Vals_int.size()]; - for (int i = 0; i < Target_Thrd_Vals_int.size() ; i++) { - variable[i] = compiler_option + std::to_string(Target_Thrd_Vals_int[i]); - appended_compiler_options[i] = variable[i].c_str(); - } - std::vector double_vec_input; - for (auto& indx : Input_Thrd_Vals) { - double_vec_input.push_back(indx.get()); - } - std::vector Input_Thrd_Vals_int; - for (auto& indx : double_vec_input) { - Input_Thrd_Vals_int.push_back(static_cast(indx)); - } - std::vector double_vec_expected; - for (auto& indx : Expected_Results) { - double_vec_expected.push_back(indx.get()); - } - std::vector Expected_Results_int; - for (auto& indx : double_vec_expected) { - Expected_Results_int.push_back(static_cast(indx)); - } - int pass_count = 0; - int inc = (Input_Thrd_Vals_int.size()/Target_Thrd_Vals_int.size()); - int start = 0; - int check, test_case; - for (int senario = 0; senario < Target_Thrd_Vals_int.size(); senario++) { - if (Target_Thrd_Vals_int[senario] == 0) { - check = 0; - for (test_case = start; test_case< (start+inc); test_case++) { - if (check == Expected_Results_int[test_case]) { - pass_count++; - } - } - start+= inc; - continue; - } - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - std::string max_thread_string = variable[senario]; - Combination_CO[max_thread_pos] = max_thread_string.c_str(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << appended_compiler_options[senario]); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &appended_compiler_options[senario])}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << appended_compiler_options[senario]); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - for (test_case = start; test_case< (start+inc); test_case++) { - int num_threads_h = 0; - int* ptr_num_threads_h = &num_threads_h; - int* Thread_count_d; - HIP_CHECK(hipMalloc(&Thread_count_d, sizeof(int))); - HIP_CHECK(hipMemcpy(Thread_count_d, ptr_num_threads_h, sizeof(int), - hipMemcpyHostToDevice)); - void* kernelParam[] = {Thread_count_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - hipError_t status = hipModuleLaunchKernel(function, 1, 1, 1, - Input_Thrd_Vals_int[test_case], - 1, 1, 0, 0, nullptr, - kernel_parameter); - HIP_CHECK(hipMemcpy(ptr_num_threads_h, Thread_count_d, sizeof(int), - hipMemcpyDeviceToHost)); - if ((status == hipSuccess) && - (num_threads_h <= Target_Thrd_Vals_int[senario])) { - check = 1; - } else { - check = 0; - } - if (check != Expected_Results_int[test_case]) { - WARN("Compiler Option : " << appended_compiler_options[senario]); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - std::string max_thread_string = variable[senario]; - Combination_CO[max_thread_pos] = max_thread_string.c_str(); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED RESULT DOES NOT MATCH FOR " << test_case); - WARN("th ITERATION (start iteration is 0 ) "); - WARN("IP THREAD VAL: " << Input_Thrd_Vals_int[test_case]); - WARN("EXPECTED OP: "<< Expected_Results_int[test_case]); - WARN("OBTAINED OP: "<< check); - return 0; - } - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - } - start+=inc; - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - } - return 1; -} - -bool check_unsafe_atomic_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "unsafe_atomic"; - std::string compiler_option = get_string_parameters("compiler_option", - block_name); - if (compiler_option == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - const char *compiler_option_cstr = compiler_option.c_str(); - float *A_d; - const int N = 1000; - float A_h[N]; - float Nbytes = N * sizeof(float); - double sum_w = 0, sum_wo = 0, sum_tocheck = 0; - for (int i = 0; i < N; i++) { - A_h[i] = 0.1f; - sum_tocheck += A_h[i] + 0.2f; - } - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - for (int senario = 0; senario < 2; senario ++) { - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, - &compiler_option_cstr)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - void* kernelParam[] = {A_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0, - nullptr, kernel_parameter)); - HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - for (int i = 0; i < N; i++) { - if (senario == 0) { - sum_wo += A_h[i]; - } else { - sum_w += A_h[i]; - } - } - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - } - if (sum_w != sum_tocheck) { - return 1; - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED : " << sum_w << " != " << sum_tocheck); - return 0; - } -} - -bool check_unsafe_atomic_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "unsafe_atomic"; - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - const char* compiler_option = retrieved_CO.c_str(); - float *A_d; - const int N = 1000; - float A_h[N]; - float Nbytes = N * sizeof(float); - double sum = 0, sum_tocheck = 0; - for (int i = 0; i < N; i++) { - A_h[i] = 0.1f; - sum_tocheck += A_h[i] + 0.2f; - } - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string, - kername, 0, NULL, NULL)); - if (Combination_CO_size != -1) { - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_size, - Combination_CO)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } else { - hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_option)}; - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler Option : " << compiler_option); - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return 0; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - void* kernelParam[] = {A_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0, - nullptr, kernel_parameter)); - HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - for (int i = 0; i < N; i++) { - sum += A_h[i]; - } - HIP_CHECK(hipDeviceSynchronize()); - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - if (sum == sum_tocheck) { - return 1; - } else { - WARN("Compiler Option : " << compiler_option); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("EXPECTED RESULT IS NOT OBTAINED "); - WARN("EXPECTED RESULT: "<< sum_tocheck); - WARN("OBTAINED RESULT: "<< sum); - return 0; - } -} - -bool check_infinite_num_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "infinite_num"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present != -1) { - if (fast_math_present == 0 && data.find("contract") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'contract' "); - return 0; - } - } else { - if (data.find("ninf")!= -1) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'ninf' "); - return 0; - } else { - return 1; - } - } -} - -bool check_infinite_num_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "infinite_num"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present != -1) { - if (fast_math_present == 1 && data.find("fmul fast")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast' "); - return 0; - } - } else { - if (data.find("ninf")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'ninf' "); - return 0; - } - } -} - -bool check_NAN_num_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "NAN_num"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); - WARN(block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 0 && data.find("contract")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'contract' "); - return 0; - } - } else { - if (data.find("nnan")!= -1) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'nnan' "); - return 0; - } else { - return 1; - } - } -} - -bool check_NAN_num_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "NAN_num"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 1 && data.find("fmul fast")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast' "); - return 0; - } - } else { - if (data.find("nnan")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'nnan' "); - return 0; - } - } -} - -bool check_finite_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "finite_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 1 && data.find("fmul fast")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast'"); - return 0; - } - } else { - if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'nnan' or 'ninf' or both "); - return 0; - } - } -} - -bool check_finite_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "finite_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 0 && data.find("contract")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'contract'"); - return 0; - } - } else { - if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN 'nnan' or 'ninf' or both WHICH IS NOT EXPECTED "); - return 0; - } else { - return 1; - } - } -} - -bool check_associative_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "associative_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 4, a = 0; - const char** CO_IRadded = new const char*[4]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-fno-signed-zeros"; - CO_IRadded[2] = "-mllvm"; - CO_IRadded[3] = "-print-after=constmerge"; - std::string data; - if (Combination_CO_size != -1) { - int Combination_CO_IRadded_size = Combination_CO_size+1; - int b = 0; - std::vector add_ir_forcombi(Combination_CO_size + 1, ""); - const char** Combination_CO_IRadded = - new const char*[Combination_CO_size+1]; - for (int i = 0; i < Combination_CO_size+1; ++i) { - if (i == Combination_CO_size) { - Combination_CO_IRadded[i] = "-fno-signed-zeros"; - break; - } - add_ir_forcombi[i] = Combination_CO[b]; - Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); - b++; - } - data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO_IRadded, - Combination_CO_IRadded_size); - } else { - data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO, - Combination_CO_size); - } - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 1 && data.find("fmul fast")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast' "); - return 0; - } - } else { - if (data.find("reassoc") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'reassoc' "); - WARN(data); - return 0; - } - } -} - -bool check_associative_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "associative_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 4, a = 0; - const char** CO_IRadded = new const char*[4]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-fno-signed-zeros"; - CO_IRadded[2] = "-mllvm"; - CO_IRadded[3] = "-print-after=constmerge"; - std::string data; - if (Combination_CO_size != -1) { - int Combination_CO_IRadded_size = Combination_CO_size+1; - int b = 0; - std::vector add_ir_forcombi(Combination_CO_size + 1, ""); - const char** Combination_CO_IRadded = - new const char*[Combination_CO_size+1]; - for (int i = 0; i < Combination_CO_size+1; ++i) { - if (i == Combination_CO_size) { - Combination_CO_IRadded[i] = "-fno-signed-zeros"; - break; - } - add_ir_forcombi[i] = Combination_CO[b]; - Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); - b++; - } - data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO_IRadded, - Combination_CO_IRadded_size); - } else { - data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO, - Combination_CO_size); - } - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 0 && data.find("contract")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'contract' "); - return 0; - } - } else { - if (data.find("reassoc")!= -1) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN 'reassoc' WHICH IS NOT EXPECTED "); - return 0; - } else { - return 1; - } - } -} - -bool check_signed_zeros_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "signed_zeros"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 0 && data.find("contract")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'contract' "); - return 0; - } - } else { - if (data.find("nsz") != -1) { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR CONTAIN 'nsz' WHICH IS NOT EXPECTED "); - return 0; - } else { - return 1; - } - } -} - -bool check_signed_zeros_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "signed_zeros"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (fast_math_present!= -1) { - if (fast_math_present == 1 && data.find("fmul fast")!= -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'fmul fast' "); - return 0; - } - } else { - if (data.find("nsz") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN 'nsz' "); - return 0; - } - } -} - -bool check_trapping_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "trapping_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (data.find("\"no-trapping-math\"=\"true\"") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'"); - return 0; - } -} - -bool check_trapping_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present) { - std::string block_name = "trapping_math"; - std::string kernel_name = get_string_parameters("kernel_name", block_name); - const char* kername = kernel_name.c_str(); - std::string retrieved_CO = get_string_parameters("reverse_compiler_option", - block_name); - if (retrieved_CO == "") { - WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - return 0; - } - int CO_IRadded_size = 3, a = 0; - const char** CO_IRadded = new const char*[3]; - CO_IRadded[0] = retrieved_CO.c_str(); - CO_IRadded[1] = "-mllvm"; - CO_IRadded[2] = "-print-after=constmerge"; - std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, - Combination_CO, Combination_CO_size); - if (data == "") { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR NOT GENERATED"); - return 0; - } - if (data.find("\"no-trapping-math\"=\"true\"") != -1) { - return 1; - } else { - WARN("Compiler option : " << retrieved_CO); - if (Combination_CO_size != -1) { - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - } - WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'"); - return 0; - } -} - -std::string checking_IR(const char* kername, const char** extra_CO_IRadded, - int extra_CO_IRadded_size, const char** Combination_CO, - int Combination_CO_size) { - float *A_d, *B_d, *C_d; - float *A_h, *B_h, *C_h, *result; - float Nbytes = sizeof(float); - A_h = new float[1]; - B_h = new float[1]; - C_h = new float[1]; - result = new float[1]; - for (int i = 0; i < 1; i++) { - A_h[i] = 0.1f; - B_h[i] = 0.1f; - C_h[i] = 0.1f; - result[i] = 0.2f; - } - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&B_d, Nbytes)); - HIP_CHECK(hipMalloc(&C_d, Nbytes)); - HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(C_d, C_h, Nbytes, hipMemcpyHostToDevice)); - hiprtcProgram prog; - HIPRTC_CHECK(hiprtcCreateProgram(&prog, ffp_contract_string, - kername, 0, NULL, NULL)); - int Combination_CO_IRadded_size; - CaptureStream capture(stderr); - if (Combination_CO_size != -1) { - Combination_CO_IRadded_size = Combination_CO_size+2; - int b = 0; - std::vector add_ir_forcombi(Combination_CO_size + 2, ""); - const char** Combination_CO_IRadded = - new const char*[Combination_CO_size+2]; - for (int i = 0; i < Combination_CO_size+2; ++i) { - if (i == Combination_CO_size) { - Combination_CO_IRadded[i] = "-mllvm"; - Combination_CO_IRadded[i+1] = "-print-after=constmerge"; - break; - } - add_ir_forcombi[i] = Combination_CO[b]; - Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); - b++; - } - capture.Begin(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, - Combination_CO_IRadded_size, - Combination_CO_IRadded)}; - capture.End(); - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("Compiler option : " << extra_CO_IRadded[0]); - WARN("FAILED IN COMBINATION :"); - for (int i = 0; i < Combination_CO_size; i++) { - WARN(Combination_CO[i]); - } - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return ""; - } - } else { - capture.Begin(); - hiprtcResult compileResult{hiprtcCompileProgram(prog, - extra_CO_IRadded_size, - extra_CO_IRadded)}; - capture.End(); - if (!(compileResult == HIPRTC_SUCCESS)) { - WARN("hiprtcCompileProgram() api failed!! with error code: "); - WARN(compileResult); - size_t logSize; - HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); - if (logSize) { - std::string log(logSize, '\0'); - HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); - WARN(log); - } - return""; - } - } - size_t codeSize; - HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); - std::vector codec(codeSize); - HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); - void* kernelParam[] = {A_d, B_d, C_d}; - auto size = sizeof(kernelParam); - void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END}; - hipModule_t module; - hipFunction_t function; - HIP_CHECK(hipModuleLoadData(&module, codec.data())); - HIP_CHECK(hipModuleGetFunction(&function, module, kername)); - HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, - kernel_parameter)); - HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost)); - for (int i = 0; i< 1; i++) { - if (result[i] != ((A_h[i] * B_h[i]) + C_h[i])) { - return ""; - } - } - std::string data = capture.getData(); - std::stringstream dataStream; - HIP_CHECK(hipModuleUnload(module)); - HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); - return data; -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This file contains functions for idividual HIPRTC supported compiler options +validation. For PASS senario the function returns 1 or 0 otherwise. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "headers/RtcUtility.h" +#include "headers/RtcFunctions.h" +#include "headers/RtcKernels.h" +#include +#include "headers/printf_common.h" + +#pragma clang diagnostic ignored "-Wunused-parameter" +#pragma clang diagnostic ignored "-Wunused-variable" + +bool check_architecture(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "architecture"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + hipDeviceProp_t prop; + HIP_CHECK(hipGetDeviceProperties(&prop, 0)); + std::string actual_architecture = prop.gcnArchName; + std::string complete_CO = retrieved_CO + actual_architecture; + const char* compiler_option = complete_CO.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + return 1; +} + +bool check_rdc(const char** Combination_CO, int Combination_CO_size, + int max_thread_pos, int fast_math_present) { + std::string block_name = "rdc"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string CO = get_string_parameters("compiler_option", + block_name); + if (CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + const char* compiler_opt = CO.c_str(); + float *A_d, *B_d, *C_d; + float *A_h, *B_h, *C_h, *result; + float Nbytes = sizeof(float); + A_h = new float[1]; + B_h = new float[1]; + C_h = new float[1]; + result = new float[1]; + for (int i = 0; i < 1; i++) { + A_h[i] = 4; + B_h[i] = 4; + result[i] = 16; + } + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&B_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, rdc_string, kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_opt); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_opt)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_opt); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + void* kernelParam[] = {A_d, B_d, C_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + size_t codeSize; + HIPRTC_CHECK(hiprtcGetBitcodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetBitcode(prog, codec.data())); + float wall_time; + int reg_count = 2; + int max_thread = 1; + unsigned int log_size = 5120; + char error_log[5120]; + char info_log[5120]; + std::vector jit_options = {HIPRTC_JIT_MAX_REGISTERS, + HIPRTC_JIT_THREADS_PER_BLOCK, + HIPRTC_JIT_WALL_TIME, + HIPRTC_JIT_INFO_LOG_BUFFER, + HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES, + HIPRTC_JIT_ERROR_LOG_BUFFER, + HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, + HIPRTC_JIT_LOG_VERBOSE}; + const void* lopts[] = {reinterpret_cast(®_count), + reinterpret_cast(&max_thread), + reinterpret_cast(&wall_time), + info_log, + reinterpret_cast(log_size), + error_log, + reinterpret_cast(log_size), + reinterpret_cast(1)}; + hiprtcLinkState rtc_link_state; + void* binary; + size_t binarySize; + int pass_count = 0; + hipModule_t module; + hipFunction_t function; + for (int i = 0; i < 2; i++) { + switch (i) { + case 0 : + HIPRTC_CHECK(hiprtcLinkCreate(0, nullptr, nullptr, &rtc_link_state)); + HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state, + HIPRTC_JIT_INPUT_LLVM_BITCODE, + codec.data(), codeSize, 0, 0, 0, 0)); + HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize)); + HIP_CHECK(hipModuleLoadData(&module, binary)); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, + nullptr, kernel_parameter)); + pass_count++; + break; + case 1 : + HIPRTC_CHECK(hiprtcLinkCreate(8, jit_options.data(), + reinterpret_cast(&lopts), + &rtc_link_state)); + HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state, + HIPRTC_JIT_INPUT_LLVM_BITCODE, + codec.data(), codeSize, 0, 0, 0, 0)); + HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize)); + HIP_CHECK(hipModuleLoadData(&module, binary)); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, + nullptr, kernel_parameter)); + pass_count++; + break; + default: + WARN(" NOT VALID INPUT "); + break; + } + } + HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost)); + for (int i = 0 ; i< 1; i++) { + if (result[i] != ((A_h[i] * B_h[i]))) { + WARN("Compiler Option : " << compiler_opt); + WARN("EXPECTED RESULT DOES NOT MATCH "); + WARN("INPUT A & B : " << A_h[i] <<" , "<< B_h[i]); + WARN("EXPECTED RES : " << (A_h[i] * B_h[i])); + WARN("OBTAINED RES : " << result[i]); + return 0; + } + } + if (pass_count == 2) { + return 1; + } else { + WARN(" pass_count IS NOT MATCHING "); + return 0; + } +} + +bool check_denormals_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "denormals"; + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name); + picojson::array Expected_Results = get_array_parameters("Expected_Results", + block_name); + const char* kername = kernel_name.c_str(); + const char* compiler_option = retrieved_CO.c_str(); + std::vector double_vec_input; + for (auto& indx : Input_Vals) { + double_vec_input.push_back(indx.get()); + } + std::vector Input_Vals_int; + for (auto& indx : double_vec_input) { + Input_Vals_int.push_back(static_cast(indx)); + } + std::vector double_vec_expected; + for (auto& indx : Expected_Results) { + double_vec_expected.push_back(indx.get()); + } + std::vector Expected_Results_int; + for (auto& indx : double_vec_expected) { + Expected_Results_int.push_back(static_cast(indx)); + } + int test_case, res_inc; + for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() && + res_inc < Expected_Results_int.size(); test_case+=2, res_inc++) { + double *base_h, *power_h, *result_h; + double *base_d, *power_d, *result_d; + double Nbytes = sizeof(double); + base_h = new double[1]; + power_h = new double[1]; + result_h = new double[1]; + *base_h = Input_Vals_int[test_case]; + *power_h = Input_Vals_int[test_case+1]; + *result_h = 1; + HIP_CHECK(hipMalloc(&base_d, Nbytes)); + HIP_CHECK(hipMalloc(&power_d, Nbytes)); + HIP_CHECK(hipMalloc(&result_d, Nbytes)); + HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice)); + hiprtcProgram program; + HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string, + "denormals", 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(program, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(program, 1, + &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); + WARN(log); + } + return 0; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(program, codec.data())); + void* kernelParam[] = {base_d, power_d, result_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, + nullptr, kernel_parameter)); + HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double), + hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + if (*result_h != Expected_Results_int[res_inc]) { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc); + WARN("th ITERATION (start iteration is 0 ) "); + WARN("INPUT : pow(2, " << *power_h << ") "); + WARN("EXPECTED OP: " << Expected_Results_int[res_inc]); + WARN("OBTAINED OP: " << *result_h); + return 0; + } + } + return 1; +} + +bool check_denormals_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "denormals"; + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name); + picojson::array Expected_Results_for_no = get_array_parameters( + "Expected_Results_for_no", block_name); + const char* kername = kernel_name.c_str(); + const char* compiler_option = retrieved_CO.c_str(); + std::vector double_vec_input; + for (auto& indx : Input_Vals) { + double_vec_input.push_back(indx.get()); + } + std::vector Input_Vals_int; + for (auto& indx : double_vec_input) { + Input_Vals_int.push_back(static_cast(indx)); + } + std::vector double_vec_expected_for_no; + for (auto& indx : Expected_Results_for_no) { + double_vec_expected_for_no.push_back(indx.get()); + } + std::vector Expected_Results_for_no_int; + for (auto& indx : double_vec_expected_for_no) { + Expected_Results_for_no_int.push_back(static_cast(indx)); + } + int test_case, res_inc; + for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() && + res_inc < Expected_Results_for_no_int.size(); test_case+=2, res_inc++) { + double *base_h, *power_h, *result_h; + double *base_d, *power_d, *result_d; + double Nbytes = sizeof(double); + base_h = new double[1]; + power_h = new double[1]; + result_h = new double[1]; + *base_h = Input_Vals_int[test_case]; + *power_h = Input_Vals_int[test_case+1]; + *result_h = 0; + HIP_CHECK(hipMalloc(&base_d, Nbytes)); + HIP_CHECK(hipMalloc(&power_d, Nbytes)); + HIP_CHECK(hipMalloc(&result_d, Nbytes)); + HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice)); + hiprtcProgram program; + HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string, + "denormals", 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(program, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(program, 1, + &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0])); + WARN(log); + } + return 0; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(program, codec.data())); + void* kernelParam[] = {base_d, power_d, result_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, + nullptr, kernel_parameter)); + HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double), + hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + if (*result_h != Expected_Results_for_no_int[res_inc]) { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc); + WARN("th ITERATION (start iteration is 0 ) "); + WARN("INPUT : pow(2, " << *power_h << ") "); + WARN("EXPECTED OP: "<< Expected_Results_for_no_int[res_inc]); + WARN("OBTAINED OP: "<< *result_h); + return 0; + } + } + return 1; +} + +bool check_ffp_contract_off(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "ffp_contract"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 3) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + std::string hold = CO_vec[0]; + CO_IRadded[0] = hold.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO[0]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (data.find("fmul contract") != -1 && + data.find("@llvm.fmuladd.f32") != -1) { + WARN("Compiler option : " << retrieved_CO[0]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN EITHER"); + WARN("'fmul contract' or '@llvm.fmuladd.f32' or both "); + WARN("WHICH IS NOT EXPECTED"); + return 0; + } else { + return 1; + } +} + +bool check_ffp_contract_on(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "ffp_contract"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 3) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + std::string hold = CO_vec[1]; + CO_IRadded[0] = hold.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, + CO_IRadded_size, Combination_CO, + Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO[1]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 0 && data.find("@llvm.fmuladd.f32")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO[1]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' "); + return 0; + } + } else { + if (data.find("@llvm.fmuladd.f32") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO[1]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' "); + return 0; + } + } +} + +bool check_ffp_contract_fast(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "ffp_contract"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 3) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + std::string hold = CO_vec[2]; + CO_IRadded[0] = hold.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO[2]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 1 && data.find("contract")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO[2]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul contract' "); + return 0; + } + } else { + if (data.find("fmul contract") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO[2]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul contract' "); + return 0; + } + } +} + +bool check_fast_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "fast_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (data.find("fmul fast")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast' "); + return 0; + } +} + +bool check_fast_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "fast_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (data.find("fmul fast")!= -1) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast' "); + return 0; + } else { + return 1; + } +} + +bool check_slp_vectorize_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "slp_vectorize"; + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + __half2 *a_d, *x_d, *y_d; + __half2 a_h, x_h; + a_h.data.x = 1.5; + x_h.data.y = 3.0; + CaptureStream capture(stderr); + HIP_CHECK(hipMalloc(&a_d, sizeof(__half2))); + HIP_CHECK(hipMalloc(&x_d, sizeof(__half2))); + HIP_CHECK(hipMalloc(&y_d, sizeof(__half2))); + HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice)); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + int Combination_CO_IRadded_size = Combination_CO_size+3; + int b = 0; + std::vector add_ir_forcombi(Combination_CO_size + 3, ""); + const char** Combination_CO_IRadded = + new const char*[Combination_CO_size+3]; + for (int i = 0; i < Combination_CO_size+3; ++i) { + if (i == Combination_CO_size) { + Combination_CO_IRadded[i] = "-fno-signed-zeros"; + Combination_CO_IRadded[i+1] = "-mllvm"; + Combination_CO_IRadded[i+2] = "-print-after=constmerge"; + break; + } + add_ir_forcombi[i] = Combination_CO[b]; + Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); + b++; + } + capture.Begin(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_IRadded_size, + Combination_CO_IRadded)}; + capture.End(); + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler option : " << retrieved_CO); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size+3; i++) { + WARN(Combination_CO_IRadded[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + capture.Begin(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size, + CO_IRadded)}; + capture.End(); + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler option : " << retrieved_CO); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + std::string data = capture.getData(); + std::stringstream dataStream; + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + void* kernelParam[] = {reinterpret_cast(a_d), + reinterpret_cast(x_d), + reinterpret_cast(y_d)}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, + kernel_parameter)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + int times = 0; + if (data.find("contract <2 x half>", 0) != -1) { + times++; + } + int start = data.find("contract <2 x half>", 0) + 1; + while (data.find("contract <2 x half>", start) != -1) { + times++; + start = data.find("contract <2 x half>", start)+1; + } + if (times == 1) { + return 1; + } else if (times == 0) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fadd contract <2 x half>' "); + return 0; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times"); + WARN(" WHICH IS NOT EXPECTED (IT SHOULD BE PRESENT ONCE)"); + return 0; + } +} + +bool check_slp_vectorize_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "slp_vectorize"; + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + int CO_IRadded_size = 3; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + __half2 *a_d, *x_d, *y_d; + __half2 a_h, x_h; + a_h.data.x = 1.5; + x_h.data.y = 3.0; + CaptureStream capture(stderr); + HIP_CHECK(hipMalloc(&a_d, sizeof(__half2))); + HIP_CHECK(hipMalloc(&x_d, sizeof(__half2))); + HIP_CHECK(hipMalloc(&y_d, sizeof(__half2))); + HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice)); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + int Combination_CO_IRadded_size = Combination_CO_size+3; + int b = 0; + std::vector add_ir_forcombi(Combination_CO_size + 3, ""); + const char** Combination_CO_IRadded = + new const char*[Combination_CO_size+3]; + for (int i = 0; i < Combination_CO_size+3; ++i) { + if (i == Combination_CO_size) { + Combination_CO_IRadded[i] = "-fno-signed-zeros"; + Combination_CO_IRadded[i+1] = "-mllvm"; + Combination_CO_IRadded[i+2] = "-print-after=constmerge"; + break; + } + add_ir_forcombi[i] = Combination_CO[b]; + Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); + b++; + } + capture.Begin(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_IRadded_size, + Combination_CO_IRadded)}; + capture.End(); + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler option : " << retrieved_CO); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size+3; i++) { + WARN(Combination_CO_IRadded[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + capture.Begin(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size, + CO_IRadded)}; + capture.End(); + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler option : " << retrieved_CO); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + std::string data = capture.getData(); + std::stringstream dataStream; + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + void* kernelParam[] = {reinterpret_cast(a_d), + reinterpret_cast(x_d), + reinterpret_cast(y_d)}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, + kernel_parameter)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + int times = 0; + if (data.find("contract <2 x half>", 0) != -1) { + times++; + } + int start = data.find("contract <2 x half>", 0) + 1; + while (data.find("contract <2 x half>", start) != -1) { + times++; + start = data.find("contract <2 x half>", start)+1; + } + if (times == 2) { + return 1; + } else if (times < 2) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times"); + WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)"); + return 0; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times"); + WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)"); + return 0; + } +} + +bool check_macro(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "macro"; + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + picojson::array Expected_Results = get_array_parameters("Expected_Results", + block_name); + const char* kername = kernel_name.c_str(); + std::vector double_vec_expected; + for (auto& indx : Expected_Results) { + double_vec_expected.push_back(indx.get()); + } + std::vector Expected_Results_int; + for (auto& indx : double_vec_expected) { + Expected_Results_int.push_back(static_cast(indx)); + } + const char* compiler_option = retrieved_CO.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, macro_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + int *macro_value_h; + int *macro_value_d; + macro_value_h = new int[1]; + HIP_CHECK(hipMalloc(¯o_value_d, sizeof(int))); + *macro_value_h = 0; + HIP_CHECK(hipMemcpy(macro_value_d, macro_value_h, sizeof(int), + hipMemcpyHostToDevice)); + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + hiprtcGetCode(prog, codec.data()); + void* kernelParam[] = {macro_value_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[]={HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, + kernel_parameter)); + HIP_CHECK(hipMemcpy(macro_value_h, macro_value_d, sizeof(int), + hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + if (*macro_value_h != Expected_Results_int[0]) { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED RESULT DOES NOT MATCH"); + WARN("INPUT: " << compiler_option); + WARN("EXPECTED OP : "<< Expected_Results_int[0]); + WARN("OBTAINED OP: "<< *macro_value_h); + return 0; + } else { + return 1; + } +} + +bool check_undef_macro(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "undef_macro"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + picojson::array comp_opt = get_array_parameters("compiler_option", + block_name); + if (comp_opt.size() < 2) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::vector compiler_option; + for (auto& indx : comp_opt) { + compiler_option.push_back(indx.get()); + } + std::vector variable(compiler_option.size(), ""); + const char** appended_compiler_options = + new const char*[compiler_option.size()]; + for (int i = 0; i < compiler_option.size(); ++i) { + variable[i] = compiler_option[i]; + appended_compiler_options[i] = variable[i].c_str(); + } + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, undef_macro_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + if (log.find("undeclared identifier")) { + return 1; + } + } else { + WARN("Compiler Option : " << appended_compiler_options[1]); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("Expected error : 'undeclared identifier' NOT GENERATED"); + return 0; + } + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, + compiler_option.size(), + appended_compiler_options)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + if (log.find("undeclared identifier")) { + return 1; + } + } else { + WARN("Compiler Option : " << appended_compiler_options[0]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("Expected error : 'undeclared identifier' NOT GENERATED"); + return 0; + } + } + } + WARN("Compiler Option : " << appended_compiler_options[0]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED ERROR WAS NOT GENERATED"); + return 0; +} + +bool check_header_dir(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "header_dir"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string compiler_option = get_string_parameters("compiler_option", + block_name); + if (compiler_option == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + picojson::array Headers = get_array_parameters("Headers", block_name); + picojson::array depending_comp_optn = + get_array_parameters("depending_comp_optn", block_name); + picojson::array Src_headers = + get_array_parameters("Src_headers", block_name); + picojson::array Input_Thrd_Vals = + get_array_parameters("Input_Vals", block_name); + picojson::array Expected_Results = + get_array_parameters("Expected_Results", block_name); + std::string str = "pwd"; + const char *cmd = str.c_str(); + CaptureStream capture(stdout); + capture.Begin(); + system(cmd); + capture.End(); + std::string wor_dir = capture.getData(); + std::string break_dir = wor_dir.substr(0, wor_dir.find("build")); + std::string append_str = "catch/unit/rtc/headers"; + std::string CO = compiler_option + " " + break_dir + append_str; + const char* appended_CO = CO.c_str(); + std::vector Headers_list; + for (auto& indx : Headers) { + Headers_list.push_back(indx.get()); + } + std::vector Src_headers_list; + for (auto& indx : Src_headers) { + Src_headers_list.push_back(indx.get()); + } + std::vector depending_co_list; + for (auto& indx : depending_comp_optn) { + depending_co_list.push_back(indx.get()); + } + std::vector double_vec_target; + for (auto& indx : Input_Thrd_Vals) { + double_vec_target.push_back(indx.get()); + } + std::vector Input_Thrd_Vals_int; + for (auto& indx : double_vec_target) { + Input_Thrd_Vals_int.push_back(static_cast(indx)); + } + std::vector double_vec_expected; + for (auto& indx : Expected_Results) { + double_vec_expected.push_back(indx.get()); + } + std::vector Expected_Results_int; + for (auto& indx : double_vec_expected) { + Expected_Results_int.push_back(static_cast(indx)); + } + std::vector src_var_hdr_lst(Src_headers_list.size(), ""); + const char** src_hder_lst = new const char*[Src_headers_list.size()]; + for (int i = 0; i < Src_headers_list.size(); ++i) { + src_var_hdr_lst[i] = Src_headers_list[i]; + src_hder_lst[i] = src_var_hdr_lst[i].c_str(); + } + std::vector var_hdr_lst(Headers_list.size(), ""); + const char** hder_lst = new const char*[Headers_list.size()]; + for (int i = 0; i < Headers_list.size(); ++i) { + var_hdr_lst[i] = Headers_list[i]; + hder_lst[i] = var_hdr_lst[i].c_str(); + } + for (int senario = 0; senario< Input_Thrd_Vals_int.size(); senario++) { + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, header_dir_string, + kername, Headers_list.size(), + src_hder_lst, hder_lst)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << appended_CO); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &appended_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << appended_CO); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + int value_h = 0; + int* ptr_value_h = &value_h; + int input_h = Input_Thrd_Vals_int[senario]; + int* ptr_input_h = &input_h; + int* value_d; + int* input_d; + HIP_CHECK(hipMalloc(&value_d, sizeof(int))); + HIP_CHECK(hipMalloc(&input_d, sizeof(int))); + HIP_CHECK(hipMemcpy(value_d, ptr_value_h, sizeof(int), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(input_d, ptr_input_h, sizeof(int), + hipMemcpyHostToDevice)); + void* kernelParam[] = {value_d, input_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, + kernel_parameter)); + HIP_CHECK(hipMemcpy(ptr_value_h, value_d, sizeof(int), + hipMemcpyDeviceToHost)); + if (*ptr_value_h != Expected_Results_int[senario]) { + WARN("Compiler Option : " << appended_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN(" EXPECTED RESULT DOES NOT MATCH FOR " << senario); + WARN("th ITERATION (start iteration is 0 ) "); + WARN(" INPUT: " << Input_Thrd_Vals_int[senario]); + WARN(" EXPECTED OP: "<< Expected_Results_int[senario]); + WARN(" OBTAINED OP: "<< *ptr_value_h); + return 0; + } + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + } + return 1; +} + +bool check_warning(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "warning"; + std::string retrieved_CO = + get_string_parameters("compiler_option", block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + const char* compiler_option = retrieved_CO.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, warning_string, kername, + 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + if (-1 != log.find("#warning")) { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN(" WARNING MESSAGE IS PRINTING WHICH IS NOT SUPRESSED "); + return 0; + } else { + return 1; + } + } else { + return 1; + } +} + +bool check_Rpass_inline(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "Rpass_inline"; + std::string retrieved_CO = + get_string_parameters("compiler_option", block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + const char* compiler_option = retrieved_CO.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + if (log.find("inlined into")) { + return 1; + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED STRING 'inlined into' IS NOT PRESENT IN LOG "); + return 0; + } + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN(" LOG WITH EXPECTED STRING 'inlined into' IS NOT PRESENT "); + return 0; + } +} + +bool check_conversionerror_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "error"; + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 4) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + std::string variable = CO_vec[0]; + const char* compiler_option = variable.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + } + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + std::string variable = "error"; + if (-1 != log.find(variable)) { + return 1; + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("ERROR MSG : '" << variable <<"' NOT FOUND"); + return 0; + } + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("LOG IS NOT GENERATED"); + WARN("maybe due to presence of '-w' compiler option"); + return 0; + } +} + +bool check_conversionerror_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "error"; + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 4) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + std::string variable = CO_vec[1]; + const char* compiler_option = variable.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + }size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + if (-1 != log.find("error")) { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("LOG IS PRESENT WITH ERROR WHICH IS NOT EXPECTED : "); + WARN("maybe due to presence of '-w' compiler option"); + return 0; + } else { + return 1; + } + } else { + return 1; + } +} + +bool check_conversionwarning_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "error"; + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 4) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + std::string variable = CO_vec[2]; + const char* compiler_option = variable.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + }size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + std::string variable = "warning"; + if (-1 != log.find(variable)) { + return 1; + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("LOG DOESN'T CONTAIN WARNING AS EXP : " << compiler_option); + return 0; + } + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("LOG IS NOT GENERATED"); + return 0; + } +} + +bool check_conversionwarning_disabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present) { + std::string block_name = "error"; + picojson::array retrieved_CO = get_array_parameters("compiler_option", + block_name); + if (retrieved_CO.size() < 4) { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::vector CO_vec; + for (auto& indx : retrieved_CO) { + CO_vec.push_back(indx.get()); + } + std::string variable = CO_vec[3]; + const char* compiler_option = variable.c_str(); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size, + Combination_CO)}; + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option)}; + }size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + if (-1 != log.find("warning")) { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("WARNING IS GENERATED WHICH IS NOT EXPECTED"); + WARN(compiler_option); + return 0; + } else { + return 1; + } + } else { + return 1; + } +} + +bool check_max_thread(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "max_thread"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + std::string default_CO = get_string_parameters("kernel_name", block_name); + picojson::array Target_Thrd_Vals = get_array_parameters("Target_Vals", + block_name); + picojson::array Input_Thrd_Vals = get_array_parameters("Input_Vals", + block_name); + picojson::array Expected_Results = get_array_parameters("Expected_Results", + block_name); + const char* kername = kernel_name.c_str(); + std::string compiler_option = get_string_parameters("compiler_option", + block_name); + if (compiler_option == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::vector double_vec_target; + for (auto& indx : Target_Thrd_Vals) { + double_vec_target.push_back(indx.get()); + } + std::vector Target_Thrd_Vals_int; + for (auto& indx : double_vec_target) { + Target_Thrd_Vals_int.push_back(static_cast(indx)); + } + int a = 0; + std::vector variable(Target_Thrd_Vals_int.size(), ""); + const char** appended_compiler_options = + new const char*[Target_Thrd_Vals_int.size()]; + for (int i = 0; i < Target_Thrd_Vals_int.size() ; i++) { + variable[i] = compiler_option + std::to_string(Target_Thrd_Vals_int[i]); + appended_compiler_options[i] = variable[i].c_str(); + } + std::vector double_vec_input; + for (auto& indx : Input_Thrd_Vals) { + double_vec_input.push_back(indx.get()); + } + std::vector Input_Thrd_Vals_int; + for (auto& indx : double_vec_input) { + Input_Thrd_Vals_int.push_back(static_cast(indx)); + } + std::vector double_vec_expected; + for (auto& indx : Expected_Results) { + double_vec_expected.push_back(indx.get()); + } + std::vector Expected_Results_int; + for (auto& indx : double_vec_expected) { + Expected_Results_int.push_back(static_cast(indx)); + } + int pass_count = 0; + int inc = (Input_Thrd_Vals_int.size()/Target_Thrd_Vals_int.size()); + int start = 0; + int check, test_case; + for (int senario = 0; senario < Target_Thrd_Vals_int.size(); senario++) { + if (Target_Thrd_Vals_int[senario] == 0) { + check = 0; + for (test_case = start; test_case< (start+inc); test_case++) { + if (check == Expected_Results_int[test_case]) { + pass_count++; + } + } + start+= inc; + continue; + } + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + std::string max_thread_string = variable[senario]; + Combination_CO[max_thread_pos] = max_thread_string.c_str(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << appended_compiler_options[senario]); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &appended_compiler_options[senario])}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << appended_compiler_options[senario]); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + for (test_case = start; test_case< (start+inc); test_case++) { + int num_threads_h = 0; + int* ptr_num_threads_h = &num_threads_h; + int* Thread_count_d; + HIP_CHECK(hipMalloc(&Thread_count_d, sizeof(int))); + HIP_CHECK(hipMemcpy(Thread_count_d, ptr_num_threads_h, sizeof(int), + hipMemcpyHostToDevice)); + void* kernelParam[] = {Thread_count_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + hipError_t status = hipModuleLaunchKernel(function, 1, 1, 1, + Input_Thrd_Vals_int[test_case], + 1, 1, 0, 0, nullptr, + kernel_parameter); + HIP_CHECK(hipMemcpy(ptr_num_threads_h, Thread_count_d, sizeof(int), + hipMemcpyDeviceToHost)); + if ((status == hipSuccess) && + (num_threads_h <= Target_Thrd_Vals_int[senario])) { + check = 1; + } else { + check = 0; + } + if (check != Expected_Results_int[test_case]) { + WARN("Compiler Option : " << appended_compiler_options[senario]); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + std::string max_thread_string = variable[senario]; + Combination_CO[max_thread_pos] = max_thread_string.c_str(); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED RESULT DOES NOT MATCH FOR " << test_case); + WARN("th ITERATION (start iteration is 0 ) "); + WARN("IP THREAD VAL: " << Input_Thrd_Vals_int[test_case]); + WARN("EXPECTED OP: "<< Expected_Results_int[test_case]); + WARN("OBTAINED OP: "<< check); + return 0; + } + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + } + start+=inc; + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + } + return 1; +} + +bool check_unsafe_atomic_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "unsafe_atomic"; + std::string compiler_option = get_string_parameters("compiler_option", + block_name); + if (compiler_option == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + const char *compiler_option_cstr = compiler_option.c_str(); + float *A_d; + const int N = 1000; + float A_h[N]; + float Nbytes = N * sizeof(float); + double sum_w = 0, sum_wo = 0, sum_tocheck = 0; + for (int i = 0; i < N; i++) { + A_h[i] = 0.1f; + sum_tocheck += A_h[i] + 0.2f; + } + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + for (int senario = 0; senario < 2; senario ++) { + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, + &compiler_option_cstr)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + void* kernelParam[] = {A_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0, + nullptr, kernel_parameter)); + HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); + for (int i = 0; i < N; i++) { + if (senario == 0) { + sum_wo += A_h[i]; + } else { + sum_w += A_h[i]; + } + } + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + } + if (sum_w != sum_tocheck) { + return 1; + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED : " << sum_w << " != " << sum_tocheck); + return 0; + } +} + +bool check_unsafe_atomic_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "unsafe_atomic"; + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + const char* compiler_option = retrieved_CO.c_str(); + float *A_d; + const int N = 1000; + float A_h[N]; + float Nbytes = N * sizeof(float); + double sum = 0, sum_tocheck = 0; + for (int i = 0; i < N; i++) { + A_h[i] = 0.1f; + sum_tocheck += A_h[i] + 0.2f; + } + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string, + kername, 0, NULL, NULL)); + if (Combination_CO_size != -1) { + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_size, + Combination_CO)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } else { + hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_option)}; + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler Option : " << compiler_option); + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return 0; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + void* kernelParam[] = {A_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0, + nullptr, kernel_parameter)); + HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); + for (int i = 0; i < N; i++) { + sum += A_h[i]; + } + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + if (sum == sum_tocheck) { + return 1; + } else { + WARN("Compiler Option : " << compiler_option); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("EXPECTED RESULT IS NOT OBTAINED "); + WARN("EXPECTED RESULT: "<< sum_tocheck); + WARN("OBTAINED RESULT: "<< sum); + return 0; + } +} + +bool check_infinite_num_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "infinite_num"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present != -1) { + if (fast_math_present == 0 && data.find("contract") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'contract' "); + return 0; + } + } else { + if (data.find("ninf")!= -1) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'ninf' "); + return 0; + } else { + return 1; + } + } +} + +bool check_infinite_num_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "infinite_num"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present != -1) { + if (fast_math_present == 1 && data.find("fmul fast")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast' "); + return 0; + } + } else { + if (data.find("ninf")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'ninf' "); + return 0; + } + } +} + +bool check_NAN_num_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "NAN_num"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME "); + WARN(block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 0 && data.find("contract")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'contract' "); + return 0; + } + } else { + if (data.find("nnan")!= -1) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'nnan' "); + return 0; + } else { + return 1; + } + } +} + +bool check_NAN_num_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "NAN_num"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 1 && data.find("fmul fast")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast' "); + return 0; + } + } else { + if (data.find("nnan")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'nnan' "); + return 0; + } + } +} + +bool check_finite_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "finite_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 1 && data.find("fmul fast")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast'"); + return 0; + } + } else { + if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'nnan' or 'ninf' or both "); + return 0; + } + } +} + +bool check_finite_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "finite_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 0 && data.find("contract")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'contract'"); + return 0; + } + } else { + if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN 'nnan' or 'ninf' or both WHICH IS NOT EXPECTED "); + return 0; + } else { + return 1; + } + } +} + +bool check_associative_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "associative_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 4, a = 0; + const char** CO_IRadded = new const char*[4]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-fno-signed-zeros"; + CO_IRadded[2] = "-mllvm"; + CO_IRadded[3] = "-print-after=constmerge"; + std::string data; + if (Combination_CO_size != -1) { + int Combination_CO_IRadded_size = Combination_CO_size+1; + int b = 0; + std::vector add_ir_forcombi(Combination_CO_size + 1, ""); + const char** Combination_CO_IRadded = + new const char*[Combination_CO_size+1]; + for (int i = 0; i < Combination_CO_size+1; ++i) { + if (i == Combination_CO_size) { + Combination_CO_IRadded[i] = "-fno-signed-zeros"; + break; + } + add_ir_forcombi[i] = Combination_CO[b]; + Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); + b++; + } + data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO_IRadded, + Combination_CO_IRadded_size); + } else { + data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO, + Combination_CO_size); + } + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 1 && data.find("fmul fast")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast' "); + return 0; + } + } else { + if (data.find("reassoc") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'reassoc' "); + WARN(data); + return 0; + } + } +} + +bool check_associative_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "associative_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 4, a = 0; + const char** CO_IRadded = new const char*[4]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-fno-signed-zeros"; + CO_IRadded[2] = "-mllvm"; + CO_IRadded[3] = "-print-after=constmerge"; + std::string data; + if (Combination_CO_size != -1) { + int Combination_CO_IRadded_size = Combination_CO_size+1; + int b = 0; + std::vector add_ir_forcombi(Combination_CO_size + 1, ""); + const char** Combination_CO_IRadded = + new const char*[Combination_CO_size+1]; + for (int i = 0; i < Combination_CO_size+1; ++i) { + if (i == Combination_CO_size) { + Combination_CO_IRadded[i] = "-fno-signed-zeros"; + break; + } + add_ir_forcombi[i] = Combination_CO[b]; + Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); + b++; + } + data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO_IRadded, + Combination_CO_IRadded_size); + } else { + data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO, + Combination_CO_size); + } + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 0 && data.find("contract")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'contract' "); + return 0; + } + } else { + if (data.find("reassoc")!= -1) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN 'reassoc' WHICH IS NOT EXPECTED "); + return 0; + } else { + return 1; + } + } +} + +bool check_signed_zeros_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "signed_zeros"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 0 && data.find("contract")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'contract' "); + return 0; + } + } else { + if (data.find("nsz") != -1) { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR CONTAIN 'nsz' WHICH IS NOT EXPECTED "); + return 0; + } else { + return 1; + } + } +} + +bool check_signed_zeros_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "signed_zeros"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (fast_math_present!= -1) { + if (fast_math_present == 1 && data.find("fmul fast")!= -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'fmul fast' "); + return 0; + } + } else { + if (data.find("nsz") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN 'nsz' "); + return 0; + } + } +} + +bool check_trapping_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "trapping_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (data.find("\"no-trapping-math\"=\"true\"") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'"); + return 0; + } +} + +bool check_trapping_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present) { + std::string block_name = "trapping_math"; + std::string kernel_name = get_string_parameters("kernel_name", block_name); + const char* kername = kernel_name.c_str(); + std::string retrieved_CO = get_string_parameters("reverse_compiler_option", + block_name); + if (retrieved_CO == "") { + WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + return 0; + } + int CO_IRadded_size = 3, a = 0; + const char** CO_IRadded = new const char*[3]; + CO_IRadded[0] = retrieved_CO.c_str(); + CO_IRadded[1] = "-mllvm"; + CO_IRadded[2] = "-print-after=constmerge"; + std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size, + Combination_CO, Combination_CO_size); + if (data == "") { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR NOT GENERATED"); + return 0; + } + if (data.find("\"no-trapping-math\"=\"true\"") != -1) { + return 1; + } else { + WARN("Compiler option : " << retrieved_CO); + if (Combination_CO_size != -1) { + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + } + WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'"); + return 0; + } +} + +std::string checking_IR(const char* kername, const char** extra_CO_IRadded, + int extra_CO_IRadded_size, const char** Combination_CO, + int Combination_CO_size) { + float *A_d, *B_d, *C_d; + float *A_h, *B_h, *C_h, *result; + float Nbytes = sizeof(float); + A_h = new float[1]; + B_h = new float[1]; + C_h = new float[1]; + result = new float[1]; + for (int i = 0; i < 1; i++) { + A_h[i] = 0.1f; + B_h[i] = 0.1f; + C_h[i] = 0.1f; + result[i] = 0.2f; + } + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&B_d, Nbytes)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(C_d, C_h, Nbytes, hipMemcpyHostToDevice)); + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, ffp_contract_string, + kername, 0, NULL, NULL)); + int Combination_CO_IRadded_size; + CaptureStream capture(stderr); + if (Combination_CO_size != -1) { + Combination_CO_IRadded_size = Combination_CO_size+2; + int b = 0; + std::vector add_ir_forcombi(Combination_CO_size + 2, ""); + const char** Combination_CO_IRadded = + new const char*[Combination_CO_size+2]; + for (int i = 0; i < Combination_CO_size+2; ++i) { + if (i == Combination_CO_size) { + Combination_CO_IRadded[i] = "-mllvm"; + Combination_CO_IRadded[i+1] = "-print-after=constmerge"; + break; + } + add_ir_forcombi[i] = Combination_CO[b]; + Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str(); + b++; + } + capture.Begin(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, + Combination_CO_IRadded_size, + Combination_CO_IRadded)}; + capture.End(); + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("Compiler option : " << extra_CO_IRadded[0]); + WARN("FAILED IN COMBINATION :"); + for (int i = 0; i < Combination_CO_size; i++) { + WARN(Combination_CO[i]); + } + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return ""; + } + } else { + capture.Begin(); + hiprtcResult compileResult{hiprtcCompileProgram(prog, + extra_CO_IRadded_size, + extra_CO_IRadded)}; + capture.End(); + if (!(compileResult == HIPRTC_SUCCESS)) { + WARN("hiprtcCompileProgram() api failed!! with error code: "); + WARN(compileResult); + size_t logSize; + HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize)); + if (logSize) { + std::string log(logSize, '\0'); + HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0])); + WARN(log); + } + return""; + } + } + size_t codeSize; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize)); + std::vector codec(codeSize); + HIPRTC_CHECK(hiprtcGetCode(prog, codec.data())); + void* kernelParam[] = {A_d, B_d, C_d}; + auto size = sizeof(kernelParam); + void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END}; + hipModule_t module; + hipFunction_t function; + HIP_CHECK(hipModuleLoadData(&module, codec.data())); + HIP_CHECK(hipModuleGetFunction(&function, module, kername)); + HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr, + kernel_parameter)); + HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost)); + for (int i = 0; i< 1; i++) { + if (result[i] != ((A_h[i] * B_h[i]) + C_h[i])) { + return ""; + } + } + std::string data = capture.getData(); + std::stringstream dataStream; + HIP_CHECK(hipModuleUnload(module)); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + return data; +} diff --git a/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp b/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp index 3caccdb4a5..cc89f5b4cf 100644 --- a/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp +++ b/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp @@ -1,508 +1,508 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sindxl -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* -This file has definition of functions for the following functinality: - -1) get_combi_string_vec() : Retrieve the combination string which contains -contains the combination of block name which indicate the respective compiler -option seperated by ':' from RtcConfig.jason file and returns them in the -form of vectors. - -2) split_comb_string() : The combination of blockname which are seperated by -':' has to split so that their respective compiler option can be retrieved -from the json file. This functn internally calls calling_combination_function() -for each of the combination of compiler options. This function returns a -int value i.e the total failed cases in that combination which is obtained -by calling_combination_function() function. - -3) calling_combination_function() : This function takes the combination of -blockname as the input. The respective compiler option for that block name is -retrieved from the json file and store the compiler options in a array. -calling_resp_function() is called which mapps the compiler option function -which has to be called with a set of required parameters -(combination of compiler options is one among them). this function returns -the status of execution ie 1 or 0 (bool). - -4) getblock_fromconfig() : This function is used to open the RtcConfig.json -file and return the blocks. - -5) get_string_parameters() and get_array_parameters() : retrieved the -parameters of the respective block name. - -*/ - -#include -#include -#include -#include -#include -#include -#include -#include "headers/RtcUtility.h" -#include "headers/RtcFunctions.h" -#include "headers/RtcKernels.h" -#include -#include "headers/printf_common.h" - -#pragma clang diagnostic ignored "-Wunused-but-set-variable" - -std::vector get_combi_string_vec() { - picojson::array combi_string = get_array_parameters("Combi_CO", - "all_compier_options"); - std::vector combi_string_list; - for (auto& indx : combi_string) { - combi_string_list.push_back(indx.get()); - } - return combi_string_list; -} - -int split_comb_string(std::string option) { - int start_collon_index = option.find(':'); - int start_index = 0; - std::vector combi_block_name; - while (start_collon_index != std::string::npos) { - std::string singleoption = option.substr(start_index, - start_collon_index - start_index); - combi_block_name.push_back(singleoption); - start_index = start_collon_index + 1; - start_collon_index = option.find(':', start_index); - } - std::string last_option = option.substr(start_index, - option.length() - start_index); - combi_block_name.push_back(last_option); - return calling_combination_function(combi_block_name); -} - -int calling_combination_function(std::vector combi_vec_list) { - int combi_size = combi_vec_list.size(); - int fast_math_present = -1, undef_present = 0; - int max_thread_position; - std::vector hold_CO(combi_size, ""); - const char** Combination_CO = new const char*[combi_size]; - picojson::array undef_compiler_option = get_array_parameters( - "compiler_option", "undef_macro"); - std::vector undef_CO_vec; - for (auto& indx : undef_compiler_option) { - undef_CO_vec.push_back(indx.get()); - } - for (int i=0; i< combi_size; i++) { - if (combi_vec_list[i] == "max_thread") { - std::string ready_CO = get_string_parameters("ready_compiler_option", - combi_vec_list[i]); - hold_CO[i] = ready_CO; - if (combi_vec_list[i] == "max_thread") { - max_thread_position = i; - } - } else if (combi_vec_list[i] == "header_dir") { - std::string retrived_CO = get_string_parameters("compiler_option", - "header_dir"); - std::string str = "pwd"; - const char *cmd = str.c_str(); - CaptureStream capture(stdout); - capture.Begin(); - system(cmd); - capture.End(); - std::string wor_dir = capture.getData(); - std::string break_dir = wor_dir.substr(0, wor_dir.find("build")); - std::string append_str = "catch/unit/rtc/headers"; - std::string CO = retrived_CO + " " + break_dir + append_str; - hold_CO[i] = CO; - } else if (combi_vec_list[i] == "architecture") { - std::string retrived_CO = get_string_parameters("compiler_option", - "architecture"); - hipDeviceProp_t prop; - HIP_CHECK(hipGetDeviceProperties(&prop, 0)); - std::string actual_architecture = prop.gcnArchName; - std::string complete_CO = retrived_CO + actual_architecture; - hold_CO[i] = complete_CO; - } else if (check_positive_CO_present(combi_vec_list[i]) == 1) { - std::string positive_CO = get_string_parameters("compiler_option", - combi_vec_list[i]); - hold_CO[i] = positive_CO; - if (combi_vec_list[i] == "fast_math") - fast_math_present = 1; - } else if (check_negative_CO_present(combi_vec_list[i]) == 1) { - std::string split_block_name = combi_vec_list[i].substr(3, - combi_vec_list[i].length() - 3); - std::string negative_CO = get_string_parameters( - "reverse_compiler_option", split_block_name); - hold_CO[i] = negative_CO; - if (split_block_name == "fast_math") - fast_math_present = 0; - } else if ( combi_vec_list[i] == "conversion_error" - || combi_vec_list[i] == "conversion_no_error" - || combi_vec_list[i] == "conversion_no_warning" - || combi_vec_list[i] == "conversion_warning") { - picojson::array compiler_option = get_array_parameters("compiler_option", - "error"); - std::vector CO_vec; - for (auto& indx : compiler_option) { - CO_vec.push_back(indx.get()); - } - if (combi_vec_list[i] == "conversion_error") { - hold_CO[i] = CO_vec[0]; - } else if (combi_vec_list[i] == "conversion_no_error") { - hold_CO[i] = CO_vec[1]; - } else if (combi_vec_list[i] == "conversion_warning") { - hold_CO[i] = CO_vec[2]; - } else if (combi_vec_list[i] == "conversion_no_warning") { - hold_CO[i] = CO_vec[3]; - } - } else if (combi_vec_list[i] == "off_ffp_contract" - || combi_vec_list[i] == "on_ffp_contract" - || combi_vec_list[i] == "fast_ffp_contract" - || combi_vec_list[i] == "pragmas_ffp_contract") { - picojson::array compiler_option = get_array_parameters("compiler_option", - "ffp_contract"); - std::vector CO_vec; - for (auto& indx : compiler_option) { - CO_vec.push_back(indx.get()); - } - if (combi_vec_list[i] == "off_ffp_contract") { - hold_CO[i] = CO_vec[0]; - } else if (combi_vec_list[i] == "on_ffp_contract") { - hold_CO[i] = CO_vec[1]; - } else if (combi_vec_list[i] == "fast_ffp_contract") { - hold_CO[i] = CO_vec[2]; - } else if (combi_vec_list[i] == "pragmas_ffp_contract") { - hold_CO[i] = CO_vec[3]; - } - } else if (combi_vec_list[i] =="undef_macro") { - hold_CO[i] = undef_CO_vec[1].c_str(); - undef_present = 1; - } else { - WARN("BLOCK NAME " << combi_vec_list[i] << " NOT PRESENT"); - } - Combination_CO[i] = hold_CO[i].c_str(); - } - int errors = 0; - for (int j = 0; j< combi_size; j++) { - std::string block_name = combi_vec_list[j].c_str(); - if (!calling_resp_function(block_name, Combination_CO, combi_size, - max_thread_position, fast_math_present)) { - errors++; - } - Combination_CO[j] = hold_CO[j].c_str(); - } - return errors; -} - -int check_positive_CO_present(std::string find_string) { - static std::vector positive_CO = {"macro", "warning", "rdc", - "denormals", "fp32_div_sqrt", - "Rpass_inline", "fast_math", - "slp_vectorize", - "amdgpu_ieee", - "unsafe_atomic", - "infinite_num", "NAN_num", - "slp_vectorize", "math_errno", - "associative_math", - "signed_zeros", "finite_math", - "trapping_math"}; - if (std::find(positive_CO.begin(), positive_CO.end(), - find_string) != positive_CO.end()) - return 1; - else - return 0; -} - -int check_negative_CO_present(std::string find_string) { - static std::vector negative_CO = {"no_fast_math", - "no_fp32_div_sqrt", - "no_denormals", - "no_slp_vectorize", - "no_amdgpu_ieee", - "no_unsafe_atomic", - "no_infinite_num", - "no_slp_vectorize", - "no_NAN_num", - "no_math_errno", - "no_associative_math", - "no_signed_zeros", - "no_finite_math", - "no_trapping_math"}; - if (std::find(negative_CO.begin(), negative_CO.end(), - find_string) != negative_CO.end()) - return 1; - else - return 0; -} - -bool calling_resp_function(const std::string block_name, - const char** Combination_CO, - int Combination_CO_size, int max_thread_position, - int fast_math_present) { - if (block_name == "max_thread") { - return check_max_thread(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "architecture") { - return check_architecture(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "rdc") { - return check_rdc(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "denormals") { - return check_denormals_enabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "no_denormals") { - return check_denormals_disabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "warning") { - return check_warning(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "conversion_error") { - return check_conversionerror_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "conversion_no_error") { - return check_conversionerror_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "conversion_warning") { - return check_conversionwarning_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "conversion_no_warning") { - return check_conversionwarning_disabled(Combination_CO, - Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "Rpass_inline") { - return check_Rpass_inline(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "macro") { - return check_macro(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "undef_macro") { - return check_undef_macro(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "header_dir") { - return check_header_dir(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "no_fast_math") { - return check_fast_math_disabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "fast_math") { - return check_fast_math_enabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "off_ffp_contract") { - return check_ffp_contract_off(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "on_ffp_contract") { - return check_ffp_contract_on(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "fast_ffp_contract") { - return check_ffp_contract_fast(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "no_unsafe_atomic") { - return check_unsafe_atomic_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "unsafe_atomic") { - return check_unsafe_atomic_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "no_slp_vectorize") { - return check_slp_vectorize_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "slp_vectorize") { - return check_slp_vectorize_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "infinite_num") { - return check_infinite_num_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "no_infinite_num") { - return check_infinite_num_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "NAN_num") { - return check_NAN_num_enabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "no_NAN_num") { - return check_NAN_num_disabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "finite_math") { - return check_finite_math_enabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "no_finite_math") { - return check_finite_math_disabled(Combination_CO, Combination_CO_size, - max_thread_position, fast_math_present); - } else if (block_name == "associative_math") { - return check_associative_math_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "no_associative_math") { - return check_associative_math_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "signed_zeros") { - return check_signed_zeros_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "no_signed_zeros") { - return check_signed_zeros_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "trapping_math") { - return check_trapping_math_enabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else if (block_name == "no_trapping_math") { - return check_trapping_math_disabled(Combination_CO, Combination_CO_size, - max_thread_position, - fast_math_present); - } else { - WARN("BLOCK NAME '" << block_name << "' not found"); - return 0; - } -} - -picojson::array getblock_fromconfig() { - std::string str = "pwd"; - const char *cmd = str.c_str(); - CaptureStream capture(stdout); - capture.Begin(); - system(cmd); - capture.End(); - std::string wor_dir = capture.getData(); - std::string break_dir = wor_dir.substr(0, wor_dir.find("build")); - std::string append_str = "catch/unit/rtc/RtcConfig.json"; - std::string config_path = break_dir + append_str; - std::string returnValue = ""; - std::ifstream json_file(config_path.c_str()); - if (!json_file.is_open()) { - WARN("Error loading config.jason"); - exit(0); - } - std::string json_str((std::istreambuf_iterator(json_file)), - std::istreambuf_iterator()); - picojson::value v; - std::string err = picojson::parse(v, json_str); - if (!err.empty()) { - WARN("empty config.jason"); - exit(0); - } - picojson::array& blocks = v.get(); - return blocks; -} - -std::string get_string_parameters(std::string para_name_to_retrieve, - std::string block_name) { - std::string returnValue = ""; - picojson::array blocks = getblock_fromconfig(); - for (picojson::value& block : blocks) { - picojson::object& block_obj = block.get(); - std::string blk_name = block_obj.at("block_name").get(); - if (blk_name == block_name) { - if (para_name_to_retrieve == "compiler_option") { - std::string compiler_opt = - block_obj.at("compiler_option").get(); - returnValue += compiler_opt; - } else if (para_name_to_retrieve == "Target_Vals") { - std::string Target_Vals = - block_obj.at("Target_Vals").get(); - returnValue += Target_Vals; - } else if (para_name_to_retrieve == "kernel_name") { - std::string ker_name = block_obj.at("kernel_name").get(); - returnValue += ker_name; - } else if (para_name_to_retrieve == "reverse_compiler_option") { - std::string reverse = - block_obj.at("reverse_compiler_option").get(); - returnValue += reverse; - } else if (para_name_to_retrieve == "ready_compiler_option") { - std::string ready_CO = - block_obj.at("ready_compiler_option").get(); - returnValue += ready_CO; - } else { - WARN("REQUESTED FIELD not present : " << para_name_to_retrieve); - } - } else { - continue; - } - } - return returnValue; -} - -picojson::array get_array_parameters(std::string para_name_to_retrieve, - std::string block_name) { - std::string returnValue = ""; - picojson::array blocks = getblock_fromconfig(); - for (picojson::value& block : blocks) { - picojson::object& block_obj = block.get(); - std::string blk_name = block_obj.at("block_name").get(); - if (blk_name == block_name) { - if (para_name_to_retrieve == "Target_Vals") { - picojson::array& Target_Vals = - block_obj.at("Target_Vals").get(); - return Target_Vals; - } else if (para_name_to_retrieve == "single_CO") { - picojson::array& single_CO = - block_obj.at("single_CO").get(); - return single_CO; - } else if (para_name_to_retrieve == "Combi_CO") { - picojson::array& Combi_CO = - block_obj.at("Combi_CO").get(); - return Combi_CO; - } else if (para_name_to_retrieve == "Input_Vals") { - picojson::array& Input_Vals = - block_obj.at("Input_Vals").get(); - return Input_Vals; - } else if (para_name_to_retrieve == "Expected_Results") { - picojson::array& Expected = - block_obj.at("Expected_Results").get(); - return Expected; - } else if (para_name_to_retrieve == "Expected_Results_for_no") { - picojson::array& Expected_for_no = - block_obj.at("Expected_Results_for_no").get(); - return Expected_for_no; - } else if (para_name_to_retrieve == "compiler_option") { - picojson::array& compiler_option = - block_obj.at("compiler_option").get(); - return compiler_option; - } else if (para_name_to_retrieve == "reverse_compiler_option") { - picojson::array& reverse_compiler_option = - block_obj.at("reverse_compiler_option").get(); - return reverse_compiler_option; - } else if (para_name_to_retrieve == "Headers") { - picojson::array& Headers = - block_obj.at("Headers").get(); - return Headers; - } else if (para_name_to_retrieve == "Src_headers") { - picojson::array& Src_headers = - block_obj.at("Src_headers").get(); - return Src_headers; - } else if (para_name_to_retrieve == "depending_comp_optn") { - picojson::array& depending_comp_optn = - block_obj.at("depending_comp_optn").get(); - return depending_comp_optn; - } else { - WARN("REQUESTED FIELD not present : " << para_name_to_retrieve); - return picojson::array(); - } - } else { - continue; - } - } - WARN("REQUESTED BLOCK " << block_name << " is not present "); - return picojson::array(); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sindxl +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This file has definition of functions for the following functinality: + +1) get_combi_string_vec() : Retrieve the combination string which contains +contains the combination of block name which indicate the respective compiler +option seperated by ':' from RtcConfig.jason file and returns them in the +form of vectors. + +2) split_comb_string() : The combination of blockname which are seperated by +':' has to split so that their respective compiler option can be retrieved +from the json file. This functn internally calls calling_combination_function() +for each of the combination of compiler options. This function returns a +int value i.e the total failed cases in that combination which is obtained +by calling_combination_function() function. + +3) calling_combination_function() : This function takes the combination of +blockname as the input. The respective compiler option for that block name is +retrieved from the json file and store the compiler options in a array. +calling_resp_function() is called which mapps the compiler option function +which has to be called with a set of required parameters +(combination of compiler options is one among them). this function returns +the status of execution ie 1 or 0 (bool). + +4) getblock_fromconfig() : This function is used to open the RtcConfig.json +file and return the blocks. + +5) get_string_parameters() and get_array_parameters() : retrieved the +parameters of the respective block name. + +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "headers/RtcUtility.h" +#include "headers/RtcFunctions.h" +#include "headers/RtcKernels.h" +#include +#include "headers/printf_common.h" + +#pragma clang diagnostic ignored "-Wunused-but-set-variable" + +std::vector get_combi_string_vec() { + picojson::array combi_string = get_array_parameters("Combi_CO", + "all_compier_options"); + std::vector combi_string_list; + for (auto& indx : combi_string) { + combi_string_list.push_back(indx.get()); + } + return combi_string_list; +} + +int split_comb_string(std::string option) { + int start_collon_index = option.find(':'); + int start_index = 0; + std::vector combi_block_name; + while (start_collon_index != std::string::npos) { + std::string singleoption = option.substr(start_index, + start_collon_index - start_index); + combi_block_name.push_back(singleoption); + start_index = start_collon_index + 1; + start_collon_index = option.find(':', start_index); + } + std::string last_option = option.substr(start_index, + option.length() - start_index); + combi_block_name.push_back(last_option); + return calling_combination_function(combi_block_name); +} + +int calling_combination_function(std::vector combi_vec_list) { + int combi_size = combi_vec_list.size(); + int fast_math_present = -1, undef_present = 0; + int max_thread_position; + std::vector hold_CO(combi_size, ""); + const char** Combination_CO = new const char*[combi_size]; + picojson::array undef_compiler_option = get_array_parameters( + "compiler_option", "undef_macro"); + std::vector undef_CO_vec; + for (auto& indx : undef_compiler_option) { + undef_CO_vec.push_back(indx.get()); + } + for (int i=0; i< combi_size; i++) { + if (combi_vec_list[i] == "max_thread") { + std::string ready_CO = get_string_parameters("ready_compiler_option", + combi_vec_list[i]); + hold_CO[i] = ready_CO; + if (combi_vec_list[i] == "max_thread") { + max_thread_position = i; + } + } else if (combi_vec_list[i] == "header_dir") { + std::string retrived_CO = get_string_parameters("compiler_option", + "header_dir"); + std::string str = "pwd"; + const char *cmd = str.c_str(); + CaptureStream capture(stdout); + capture.Begin(); + system(cmd); + capture.End(); + std::string wor_dir = capture.getData(); + std::string break_dir = wor_dir.substr(0, wor_dir.find("build")); + std::string append_str = "catch/unit/rtc/headers"; + std::string CO = retrived_CO + " " + break_dir + append_str; + hold_CO[i] = CO; + } else if (combi_vec_list[i] == "architecture") { + std::string retrived_CO = get_string_parameters("compiler_option", + "architecture"); + hipDeviceProp_t prop; + HIP_CHECK(hipGetDeviceProperties(&prop, 0)); + std::string actual_architecture = prop.gcnArchName; + std::string complete_CO = retrived_CO + actual_architecture; + hold_CO[i] = complete_CO; + } else if (check_positive_CO_present(combi_vec_list[i]) == 1) { + std::string positive_CO = get_string_parameters("compiler_option", + combi_vec_list[i]); + hold_CO[i] = positive_CO; + if (combi_vec_list[i] == "fast_math") + fast_math_present = 1; + } else if (check_negative_CO_present(combi_vec_list[i]) == 1) { + std::string split_block_name = combi_vec_list[i].substr(3, + combi_vec_list[i].length() - 3); + std::string negative_CO = get_string_parameters( + "reverse_compiler_option", split_block_name); + hold_CO[i] = negative_CO; + if (split_block_name == "fast_math") + fast_math_present = 0; + } else if ( combi_vec_list[i] == "conversion_error" + || combi_vec_list[i] == "conversion_no_error" + || combi_vec_list[i] == "conversion_no_warning" + || combi_vec_list[i] == "conversion_warning") { + picojson::array compiler_option = get_array_parameters("compiler_option", + "error"); + std::vector CO_vec; + for (auto& indx : compiler_option) { + CO_vec.push_back(indx.get()); + } + if (combi_vec_list[i] == "conversion_error") { + hold_CO[i] = CO_vec[0]; + } else if (combi_vec_list[i] == "conversion_no_error") { + hold_CO[i] = CO_vec[1]; + } else if (combi_vec_list[i] == "conversion_warning") { + hold_CO[i] = CO_vec[2]; + } else if (combi_vec_list[i] == "conversion_no_warning") { + hold_CO[i] = CO_vec[3]; + } + } else if (combi_vec_list[i] == "off_ffp_contract" + || combi_vec_list[i] == "on_ffp_contract" + || combi_vec_list[i] == "fast_ffp_contract" + || combi_vec_list[i] == "pragmas_ffp_contract") { + picojson::array compiler_option = get_array_parameters("compiler_option", + "ffp_contract"); + std::vector CO_vec; + for (auto& indx : compiler_option) { + CO_vec.push_back(indx.get()); + } + if (combi_vec_list[i] == "off_ffp_contract") { + hold_CO[i] = CO_vec[0]; + } else if (combi_vec_list[i] == "on_ffp_contract") { + hold_CO[i] = CO_vec[1]; + } else if (combi_vec_list[i] == "fast_ffp_contract") { + hold_CO[i] = CO_vec[2]; + } else if (combi_vec_list[i] == "pragmas_ffp_contract") { + hold_CO[i] = CO_vec[3]; + } + } else if (combi_vec_list[i] =="undef_macro") { + hold_CO[i] = undef_CO_vec[1].c_str(); + undef_present = 1; + } else { + WARN("BLOCK NAME " << combi_vec_list[i] << " NOT PRESENT"); + } + Combination_CO[i] = hold_CO[i].c_str(); + } + int errors = 0; + for (int j = 0; j< combi_size; j++) { + std::string block_name = combi_vec_list[j].c_str(); + if (!calling_resp_function(block_name, Combination_CO, combi_size, + max_thread_position, fast_math_present)) { + errors++; + } + Combination_CO[j] = hold_CO[j].c_str(); + } + return errors; +} + +int check_positive_CO_present(std::string find_string) { + static std::vector positive_CO = {"macro", "warning", "rdc", + "denormals", "fp32_div_sqrt", + "Rpass_inline", "fast_math", + "slp_vectorize", + "amdgpu_ieee", + "unsafe_atomic", + "infinite_num", "NAN_num", + "slp_vectorize", "math_errno", + "associative_math", + "signed_zeros", "finite_math", + "trapping_math"}; + if (std::find(positive_CO.begin(), positive_CO.end(), + find_string) != positive_CO.end()) + return 1; + else + return 0; +} + +int check_negative_CO_present(std::string find_string) { + static std::vector negative_CO = {"no_fast_math", + "no_fp32_div_sqrt", + "no_denormals", + "no_slp_vectorize", + "no_amdgpu_ieee", + "no_unsafe_atomic", + "no_infinite_num", + "no_slp_vectorize", + "no_NAN_num", + "no_math_errno", + "no_associative_math", + "no_signed_zeros", + "no_finite_math", + "no_trapping_math"}; + if (std::find(negative_CO.begin(), negative_CO.end(), + find_string) != negative_CO.end()) + return 1; + else + return 0; +} + +bool calling_resp_function(const std::string block_name, + const char** Combination_CO, + int Combination_CO_size, int max_thread_position, + int fast_math_present) { + if (block_name == "max_thread") { + return check_max_thread(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "architecture") { + return check_architecture(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "rdc") { + return check_rdc(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "denormals") { + return check_denormals_enabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "no_denormals") { + return check_denormals_disabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "warning") { + return check_warning(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "conversion_error") { + return check_conversionerror_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "conversion_no_error") { + return check_conversionerror_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "conversion_warning") { + return check_conversionwarning_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "conversion_no_warning") { + return check_conversionwarning_disabled(Combination_CO, + Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "Rpass_inline") { + return check_Rpass_inline(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "macro") { + return check_macro(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "undef_macro") { + return check_undef_macro(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "header_dir") { + return check_header_dir(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "no_fast_math") { + return check_fast_math_disabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "fast_math") { + return check_fast_math_enabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "off_ffp_contract") { + return check_ffp_contract_off(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "on_ffp_contract") { + return check_ffp_contract_on(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "fast_ffp_contract") { + return check_ffp_contract_fast(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "no_unsafe_atomic") { + return check_unsafe_atomic_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "unsafe_atomic") { + return check_unsafe_atomic_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "no_slp_vectorize") { + return check_slp_vectorize_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "slp_vectorize") { + return check_slp_vectorize_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "infinite_num") { + return check_infinite_num_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "no_infinite_num") { + return check_infinite_num_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "NAN_num") { + return check_NAN_num_enabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "no_NAN_num") { + return check_NAN_num_disabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "finite_math") { + return check_finite_math_enabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "no_finite_math") { + return check_finite_math_disabled(Combination_CO, Combination_CO_size, + max_thread_position, fast_math_present); + } else if (block_name == "associative_math") { + return check_associative_math_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "no_associative_math") { + return check_associative_math_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "signed_zeros") { + return check_signed_zeros_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "no_signed_zeros") { + return check_signed_zeros_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "trapping_math") { + return check_trapping_math_enabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else if (block_name == "no_trapping_math") { + return check_trapping_math_disabled(Combination_CO, Combination_CO_size, + max_thread_position, + fast_math_present); + } else { + WARN("BLOCK NAME '" << block_name << "' not found"); + return 0; + } +} + +picojson::array getblock_fromconfig() { + std::string str = "pwd"; + const char *cmd = str.c_str(); + CaptureStream capture(stdout); + capture.Begin(); + system(cmd); + capture.End(); + std::string wor_dir = capture.getData(); + std::string break_dir = wor_dir.substr(0, wor_dir.find("build")); + std::string append_str = "catch/unit/rtc/RtcConfig.json"; + std::string config_path = break_dir + append_str; + std::string returnValue = ""; + std::ifstream json_file(config_path.c_str()); + if (!json_file.is_open()) { + WARN("Error loading config.jason"); + exit(0); + } + std::string json_str((std::istreambuf_iterator(json_file)), + std::istreambuf_iterator()); + picojson::value v; + std::string err = picojson::parse(v, json_str); + if (!err.empty()) { + WARN("empty config.jason"); + exit(0); + } + picojson::array& blocks = v.get(); + return blocks; +} + +std::string get_string_parameters(std::string para_name_to_retrieve, + std::string block_name) { + std::string returnValue = ""; + picojson::array blocks = getblock_fromconfig(); + for (picojson::value& block : blocks) { + picojson::object& block_obj = block.get(); + std::string blk_name = block_obj.at("block_name").get(); + if (blk_name == block_name) { + if (para_name_to_retrieve == "compiler_option") { + std::string compiler_opt = + block_obj.at("compiler_option").get(); + returnValue += compiler_opt; + } else if (para_name_to_retrieve == "Target_Vals") { + std::string Target_Vals = + block_obj.at("Target_Vals").get(); + returnValue += Target_Vals; + } else if (para_name_to_retrieve == "kernel_name") { + std::string ker_name = block_obj.at("kernel_name").get(); + returnValue += ker_name; + } else if (para_name_to_retrieve == "reverse_compiler_option") { + std::string reverse = + block_obj.at("reverse_compiler_option").get(); + returnValue += reverse; + } else if (para_name_to_retrieve == "ready_compiler_option") { + std::string ready_CO = + block_obj.at("ready_compiler_option").get(); + returnValue += ready_CO; + } else { + WARN("REQUESTED FIELD not present : " << para_name_to_retrieve); + } + } else { + continue; + } + } + return returnValue; +} + +picojson::array get_array_parameters(std::string para_name_to_retrieve, + std::string block_name) { + std::string returnValue = ""; + picojson::array blocks = getblock_fromconfig(); + for (picojson::value& block : blocks) { + picojson::object& block_obj = block.get(); + std::string blk_name = block_obj.at("block_name").get(); + if (blk_name == block_name) { + if (para_name_to_retrieve == "Target_Vals") { + picojson::array& Target_Vals = + block_obj.at("Target_Vals").get(); + return Target_Vals; + } else if (para_name_to_retrieve == "single_CO") { + picojson::array& single_CO = + block_obj.at("single_CO").get(); + return single_CO; + } else if (para_name_to_retrieve == "Combi_CO") { + picojson::array& Combi_CO = + block_obj.at("Combi_CO").get(); + return Combi_CO; + } else if (para_name_to_retrieve == "Input_Vals") { + picojson::array& Input_Vals = + block_obj.at("Input_Vals").get(); + return Input_Vals; + } else if (para_name_to_retrieve == "Expected_Results") { + picojson::array& Expected = + block_obj.at("Expected_Results").get(); + return Expected; + } else if (para_name_to_retrieve == "Expected_Results_for_no") { + picojson::array& Expected_for_no = + block_obj.at("Expected_Results_for_no").get(); + return Expected_for_no; + } else if (para_name_to_retrieve == "compiler_option") { + picojson::array& compiler_option = + block_obj.at("compiler_option").get(); + return compiler_option; + } else if (para_name_to_retrieve == "reverse_compiler_option") { + picojson::array& reverse_compiler_option = + block_obj.at("reverse_compiler_option").get(); + return reverse_compiler_option; + } else if (para_name_to_retrieve == "Headers") { + picojson::array& Headers = + block_obj.at("Headers").get(); + return Headers; + } else if (para_name_to_retrieve == "Src_headers") { + picojson::array& Src_headers = + block_obj.at("Src_headers").get(); + return Src_headers; + } else if (para_name_to_retrieve == "depending_comp_optn") { + picojson::array& depending_comp_optn = + block_obj.at("depending_comp_optn").get(); + return depending_comp_optn; + } else { + WARN("REQUESTED FIELD not present : " << para_name_to_retrieve); + return picojson::array(); + } + } else { + continue; + } + } + WARN("REQUESTED BLOCK " << block_name << " is not present "); + return picojson::array(); +} diff --git a/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h b/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h index 46f0d27810..d9bd325575 100644 --- a/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h +++ b/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h @@ -1,178 +1,178 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* -The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h. -*/ - -#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_ -#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_ -#include - -bool check_architecture(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_rdc(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_denormals_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_denormals_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_ffp_contract_off(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_ffp_contract_on(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_ffp_contract_fast(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_fast_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_fast_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_slp_vectorize_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_slp_vectorize_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_macro(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_undef_macro(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_header_dir(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_warning(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_Rpass_inline(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_conversionerror_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_conversionerror_disabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present); - -bool check_conversionwarning_enabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present); - -bool check_conversionwarning_disabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present); - -bool check_max_thread(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_unsafe_atomic_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_unsafe_atomic_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_infinite_num_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_infinite_num_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_NAN_num_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_NAN_num_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_finite_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_finite_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_associative_math_enabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present); - -bool check_associative_math_disabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present); - -bool check_signed_zeros_enabled(const char** Combination_CO, - int Combination_CO_size, - int max_thread_pos, - int fast_math_present); - -bool check_signed_zeros_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_trapping_math_enabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -bool check_trapping_math_disabled(const char** Combination_CO, - int Combination_CO_size, int max_thread_pos, - int fast_math_present); - -std::string checking_IR(const char* kername, const char** extra_CO_IRadded, - int extra_CO_IRadded_size, const char** Combination_CO, - int Combination_CO_size); - -#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h. +*/ + +#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_ +#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_ +#include + +bool check_architecture(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_rdc(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_denormals_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_denormals_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_ffp_contract_off(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_ffp_contract_on(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_ffp_contract_fast(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_fast_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_fast_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_slp_vectorize_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_slp_vectorize_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_macro(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_undef_macro(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_header_dir(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_warning(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_Rpass_inline(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_conversionerror_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_conversionerror_disabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present); + +bool check_conversionwarning_enabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present); + +bool check_conversionwarning_disabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present); + +bool check_max_thread(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_unsafe_atomic_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_unsafe_atomic_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_infinite_num_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_infinite_num_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_NAN_num_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_NAN_num_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_finite_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_finite_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_associative_math_enabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present); + +bool check_associative_math_disabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present); + +bool check_signed_zeros_enabled(const char** Combination_CO, + int Combination_CO_size, + int max_thread_pos, + int fast_math_present); + +bool check_signed_zeros_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_trapping_math_enabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +bool check_trapping_math_disabled(const char** Combination_CO, + int Combination_CO_size, int max_thread_pos, + int fast_math_present); + +std::string checking_IR(const char* kername, const char** extra_CO_IRadded, + int extra_CO_IRadded_size, const char** Combination_CO, + int Combination_CO_size); + +#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_ diff --git a/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h b/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h index f3ff6aac83..392f6ddb11 100644 --- a/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h +++ b/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h @@ -1,163 +1,163 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* -RtcKernels.h contains the string's with the which includes the kernel code. -They are utilized by the compiler option functions, defined in RtcFunctions.cpp -*/ - -#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_ -#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_ -#include -#include -#include - -static constexpr auto max_thread_string { -R"( -extern "C" -__global__ void max_thread(int* a) { - int BD = blockDim.x; - *a = BD; -} -)"}; - -static constexpr auto denormals_string { -R"( -extern "C" -__global__ void denormals(double* base, double* power, double* result) { - float denorm = powf(*base, *power); - if (*result == 0 || *result ==1 ) - *result = (denorm==0) ? 0 : 1; - else - *result = powf(*base, *power); -} -)"}; - -static constexpr auto warning_string { -R"( -extern "C" -__global__ void warning() { - #warning "Just printing a WARNING message onto the terminal"; -} -)"}; - -static constexpr auto fp32_div_sqrt_string { -R"( -extern "C" -__global__ void fp32_div_sqrt(float* result) { - float input = 109.6209; - *result = sqrt(input); -} -)"}; - -static constexpr auto error_string { -R"( -extern "C" -__global__ void error() { - unsigned int a = -1; - unsigned int b = +1; - signed int c = -1; - signed int d = +1; -} -)"}; - -static constexpr auto macro_string { -R"( -extern "C" -__global__ void macro(int *result) { - *result = PI; -} -)"}; - -static constexpr auto undef_macro_string { -R"( -extern "C" -__global__ void undef_macro() { - int a = Z; -} -)"}; - -static constexpr auto header_dir_string { -R"( -#include "RtcFact.h" -extern "C" -__global__ void header_dir(int* a, int* val) { - *a = fact(*val); -} -)"}; - -static constexpr auto rdc_string { -R"( -extern "C" -__global__ void rdc(float* a, float* b, float* c) { - *c = *a * *b; -} -)"}; - -static constexpr auto ffp_contract_string { -R"( -extern "C" -__global__ void ffp_contract(float* a, float* b, float* c) { - *c = *a * *b + *c; -} -)"}; - -static constexpr auto slp_vectorize_string { -R"( -extern "C" -__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) { - (*y).data.x = x.data.x + a.data.x; - (*y).data.y = x.data.y + a.data.y; -} -)"}; - -static constexpr auto unsafe_atomic_string { -R"( -extern "C" -__global__ void unsafe_atomic(float* a) { - int id = threadIdx.x + blockIdx.x * blockDim.x; - if (id < 1000) { - unsafeAtomicAdd(&a[id], 0.2f); - } -} -)"}; - -static constexpr auto amdgpu_ieee_string { -R"( -extern "C" -__global__ void amdgpu_ieee(float* a, float* b, float* c) { - *c = sqrt(*a / *b); - printf("sqrt(a * b) = %f\n", *c); -} -)"}; - -static constexpr auto associative_math_string { -R"( -extern "C" -__global__ void associative_math(int* check) { - double x = 0.1f; - double y = 0.2f; - double z = 0.3f; - if((x*y)*z != x*(y*z)) - *check = 1; - else *check = 0; -} -)"}; - -#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +RtcKernels.h contains the string's with the which includes the kernel code. +They are utilized by the compiler option functions, defined in RtcFunctions.cpp +*/ + +#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_ +#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_ +#include +#include +#include + +static constexpr auto max_thread_string { +R"( +extern "C" +__global__ void max_thread(int* a) { + int BD = blockDim.x; + *a = BD; +} +)"}; + +static constexpr auto denormals_string { +R"( +extern "C" +__global__ void denormals(double* base, double* power, double* result) { + float denorm = powf(*base, *power); + if (*result == 0 || *result ==1 ) + *result = (denorm==0) ? 0 : 1; + else + *result = powf(*base, *power); +} +)"}; + +static constexpr auto warning_string { +R"( +extern "C" +__global__ void warning() { + #warning "Just printing a WARNING message onto the terminal"; +} +)"}; + +static constexpr auto fp32_div_sqrt_string { +R"( +extern "C" +__global__ void fp32_div_sqrt(float* result) { + float input = 109.6209; + *result = sqrt(input); +} +)"}; + +static constexpr auto error_string { +R"( +extern "C" +__global__ void error() { + unsigned int a = -1; + unsigned int b = +1; + signed int c = -1; + signed int d = +1; +} +)"}; + +static constexpr auto macro_string { +R"( +extern "C" +__global__ void macro(int *result) { + *result = PI; +} +)"}; + +static constexpr auto undef_macro_string { +R"( +extern "C" +__global__ void undef_macro() { + int a = Z; +} +)"}; + +static constexpr auto header_dir_string { +R"( +#include "RtcFact.h" +extern "C" +__global__ void header_dir(int* a, int* val) { + *a = fact(*val); +} +)"}; + +static constexpr auto rdc_string { +R"( +extern "C" +__global__ void rdc(float* a, float* b, float* c) { + *c = *a * *b; +} +)"}; + +static constexpr auto ffp_contract_string { +R"( +extern "C" +__global__ void ffp_contract(float* a, float* b, float* c) { + *c = *a * *b + *c; +} +)"}; + +static constexpr auto slp_vectorize_string { +R"( +extern "C" +__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) { + (*y).data.x = x.data.x + a.data.x; + (*y).data.y = x.data.y + a.data.y; +} +)"}; + +static constexpr auto unsafe_atomic_string { +R"( +extern "C" +__global__ void unsafe_atomic(float* a) { + int id = threadIdx.x + blockIdx.x * blockDim.x; + if (id < 1000) { + unsafeAtomicAdd(&a[id], 0.2f); + } +} +)"}; + +static constexpr auto amdgpu_ieee_string { +R"( +extern "C" +__global__ void amdgpu_ieee(float* a, float* b, float* c) { + *c = sqrt(*a / *b); + printf("sqrt(a * b) = %f\n", *c); +} +)"}; + +static constexpr auto associative_math_string { +R"( +extern "C" +__global__ void associative_math(int* check) { + double x = 0.1f; + double y = 0.2f; + double z = 0.3f; + if((x*y)*z != x*(y*z)) + *check = 1; + else *check = 0; +} +)"}; + +#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_ diff --git a/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h b/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h index c7fdd71372..f9e1e04a89 100644 --- a/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h +++ b/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h @@ -1,53 +1,53 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* -The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h. -*/ - -#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_ -#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_ -#include -#include -#include - -std::vector get_combi_string_vec(); - -int split_comb_string(std::string option); - -int calling_combination_function(std::vector combi_vec_list); - -int check_positive_CO_present(std::string find_string); - -int check_negative_CO_present(std::string find_string); - -bool calling_resp_function(const std::string block_name, - const char** Combination_CO, - int Combination_CO_size, int max_thread_position, - int fast_math_present); - -picojson::array getblock_fromconfig(); - -std::string get_string_parameters(std::string para_name_to_retrieve, - std::string block_name); - -picojson::array get_array_parameters(std::string para_name_to_retrieve, - std::string block_name); - -#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h. +*/ + +#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_ +#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_ +#include +#include +#include + +std::vector get_combi_string_vec(); + +int split_comb_string(std::string option); + +int calling_combination_function(std::vector combi_vec_list); + +int check_positive_CO_present(std::string find_string); + +int check_negative_CO_present(std::string find_string); + +bool calling_resp_function(const std::string block_name, + const char** Combination_CO, + int Combination_CO_size, int max_thread_position, + int fast_math_present); + +picojson::array getblock_fromconfig(); + +std::string get_string_parameters(std::string para_name_to_retrieve, + std::string block_name); + +picojson::array get_array_parameters(std::string para_name_to_retrieve, + std::string block_name); + +#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_ diff --git a/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt b/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt index 0bff39f301..50018e5c43 100644 --- a/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt @@ -1,25 +1,25 @@ -# Common Tests - Test independent of all platforms -set(TEST_SRC - copy_coherency.cc -) -add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} - ${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o - ${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I - ${HIP_PATH}/include -I - ${CMAKE_CURRENT_SOURCE_DIR}/../../include -L - ${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH}) -# only for AMD -if(HIP_PLATFORM MATCHES "amd") - set(AMD_SRC - cache_coherency_cpu_gpu.cc - cache_coherency_gpu_gpu.cc - ) - set(TEST_SRC ${TEST_SRC} ${AMD_SRC}) -endif() - -hip_add_exe_to_target(NAME synchronizationTests - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++14) -add_dependencies(synchronizationTests memcpyInt.hsaco) - +# Common Tests - Test independent of all platforms +set(TEST_SRC + copy_coherency.cc +) +add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} + ${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o + ${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I + ${HIP_PATH}/include -I + ${CMAKE_CURRENT_SOURCE_DIR}/../../include -L + ${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH}) +# only for AMD +if(HIP_PLATFORM MATCHES "amd") + set(AMD_SRC + cache_coherency_cpu_gpu.cc + cache_coherency_gpu_gpu.cc + ) + set(TEST_SRC ${TEST_SRC} ${AMD_SRC}) +endif() + +hip_add_exe_to_target(NAME synchronizationTests + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS -std=c++14) +add_dependencies(synchronizationTests memcpyInt.hsaco) + diff --git a/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc b/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc index c33eff82da..0d4c9c0136 100644 --- a/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc +++ b/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc @@ -1,282 +1,282 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for Fine Grained CPU-GPU coherency. - -#include -#include - -typedef _Atomic(unsigned int) atomic_uint; - -// Helper function to spin on address until address equals value. -// If the address holds the value of -1, abort because the other thread failed. -__device__ int -gpu_spin_loop_or_abort_on_negative_one(unsigned int* address, - unsigned int value) { - unsigned int compare; - bool check = false; - do { - compare = value; - check = __opencl_atomic_compare_exchange_strong( - reinterpret_cast(address), /*expected=*/ &compare, - /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, - /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - if (compare == -1) - return -1; - } while (!check); - return 0; -} - -// This kernel requires a single block, single thread dispatch. -__global__ void -gpu_kernel(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) { - for (size_t i = 0; i < N; i++) { - // Store data into A, system fence, and atomically mark flag. - // This guarantees this global write is visible by device 1. - A[i] = X[i]; - __opencl_atomic_fetch_add(reinterpret_cast(AA1), 1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on device 1's global write to B. - if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) { - *dresult = -1; - break; - } - - // Check device 1 properly stored Y into B. - bool stored_data_matches = (B[i] == Y[i]); - if (!stored_data_matches) { - // If the data does not match, alert other thread and abort. - printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n", - i, B[i], Y[i]); - __opencl_atomic_exchange(reinterpret_cast(AA2), -1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - *dresult = -1; - } - // Otherwise tell the other thread to continue. - __opencl_atomic_fetch_add(reinterpret_cast(AA2), 1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on kernel gpu_cache1 to finish checking X is stored in A. - if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) { - *dresult = -1; - break; - } - } - *dresult = 0; -} - -__host__ int -cpu_spin_loop_or_abort_on_negative_one(unsigned int* address, - unsigned int value) { - unsigned int compare; - bool check = false; - do { - compare = value; - check = __atomic_compare_exchange_n( - address, /*expected=*/ &compare, /*desired=*/ value, - /*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); - if (compare == -1) - return -1; - } while (!check); - return 0; -} - -// This host thread runs only on a single CPU thread. -__host__ void -cpu_thread(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) { - for (size_t i = 0; i < N; i++) { - B[i] = Y[i]; - __atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE); - if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) { - *hresult = -1; - break; - } - - bool stored_data_matches = (A[i] == X[i]); - if (!stored_data_matches) { - printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n", - i, A[i], X[i]); - __atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE); - *hresult = -1; - break; - } - __atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE); - if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) { - *hresult = -1; - break; - } - } - *hresult = 0; -} - -static bool cpu_to_gpu_coherency() { - int *A_d, *B_d, *X_d, *Y_d; - int *A_res, *A_h, *B_h, *X_h, *Y_h; - unsigned int hresult, dresult; - size_t N = 1024; - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices < 1) { - HipTest::HIP_SKIP_TEST("Skipping because devices < 1"); - return 0; - } - - // Skip this test if feature is not supported. - static int device0 = 0; - hipDeviceProp_t props; - HIP_CHECK(hipGetDeviceProperties(&props, device0)); - if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 && - strncmp(props.gcnArchName, "gfx940", 6) != 0) { - printf("info: skipping test on devices other than gfx90a and gfx940.\n"); - return true; - } - - // Allocate Host Side Memory. Coherent Fine-grained Memory for array B. - printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - HIP_CHECK(hipHostMalloc(&B_h, Nbytes, - (hipHostMallocCoherent | hipHostMallocMapped))); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&B_d), B_h, 0)); - X_h = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess); - Y_h = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Initialize the arrays and atomic variables. - for (size_t i = 0; i < N; i++) { - X_h[i] = 100000000 + i; - Y_h[i] = 300000000 + i; - } - - // Initialize shared atomic flags between CPU and GPU. - unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h; - unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d; - HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA1_d), - AA1_h, 0)); - *AA1_h = 0; - HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA2_d), - AA2_h, 0)); - *AA2_h = 0; - HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA1_d), - BA1_h, 0)); - *BA1_h = 0; - HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA2_d), - BA2_h, 0)); - *BA2_h = 0; - - // Skip the first stream, ensure stream is non-blocking. - hipStream_t stream[2]; - HIP_CHECK(hipStreamCreate(&stream[0])); - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); - - // Allocate Device Side Memory. Coherent Fine-grained Memory for array A. - printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - hipError_t status = hipExtMallocWithFlags(reinterpret_cast(&A_d), - Nbytes, hipDeviceMallocFinegrained); - REQUIRE(status == hipSuccess); - // SVM memory - host pointer is the same as device pointer to array A. - A_h = A_d; - HIP_CHECK(hipMalloc(&X_d, Nbytes)); - HIP_CHECK(hipMalloc(&Y_d, Nbytes)); - - HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice)); - - // Launch the GPU kernel. - const unsigned blocks = 1; - const unsigned threadsPerBlock = 1; - hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock), - 0, stream[1], - A_d, B_d, X_d, Y_d, N, - AA1_d, AA2_d, BA1_d, BA2_d, &dresult); - // Check if launch failed. - HIP_CHECK(hipGetLastError()); - REQUIRE(dresult == 0); - - // Do not sync the launched stream, instead run the cpu_thread. - std::thread host_thread(cpu_thread, - A_h, B_h, X_h, Y_h, N, - AA1_h, AA2_h, BA1_h, BA2_h, &hresult); - host_thread.detach(); - REQUIRE(hresult == 0); - // Wait for Device side to finish. - HIP_CHECK(hipStreamSynchronize(stream[1])); - - // Evaluate the resultant arrays A and B. - A_res = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess); - HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost)); - - for (size_t i = 0; i < N; i++) { - REQUIRE(A_res[i] == (100000000 + i)); - REQUIRE(B_h[i] == (300000000 + i)); - } - - // Free all the device and host memory allocated. - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(X_d)); - HIP_CHECK(hipFree(Y_d)); - HIP_CHECK(hipHostFree(AA1_h)); - HIP_CHECK(hipHostFree(AA2_h)); - HIP_CHECK(hipHostFree(BA1_h)); - HIP_CHECK(hipHostFree(BA2_h)); - HIP_CHECK(hipHostFree(B_h)); - free(X_h); - free(Y_h); - free(A_res); - - return true; -} - -/** - * Test Description - * ------------------------ - * - This test runs on devices where XGMI enables fine-grained communication - * between GPUs. This performs a message passing test. - * Array A is allocated on Device 0, and remotely on host. - * Device 0 also increments atomic ints AA1 and AA2. - * Array B is allocated on host, and remotely on Device 0. - * Host also increments atomic ints BA1 and BA2. - * Kernel will launch on Device 0, and store array X into array A. - * Host Thread will store array Y into array B. - * Kernel will validate that the correct values of array Y are stored in B. - * Host Thread will validate that the correct values of array X are stored in A. - - * Test source - * ------------------------ - * - catch/unit/synchronization/cache_coherency_cpu_gpu.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - * - Test to be run only on AMD. - */ - -TEST_CASE("Unit_cache_coherency_cpu_gpu") { - bool passed = true; - // Coherency between CPU and GPU sharing host and device memory. - REQUIRE(passed == cpu_to_gpu_coherency()); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Simple test for Fine Grained CPU-GPU coherency. + +#include +#include + +typedef _Atomic(unsigned int) atomic_uint; + +// Helper function to spin on address until address equals value. +// If the address holds the value of -1, abort because the other thread failed. +__device__ int +gpu_spin_loop_or_abort_on_negative_one(unsigned int* address, + unsigned int value) { + unsigned int compare; + bool check = false; + do { + compare = value; + check = __opencl_atomic_compare_exchange_strong( + reinterpret_cast(address), /*expected=*/ &compare, + /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + if (compare == -1) + return -1; + } while (!check); + return 0; +} + +// This kernel requires a single block, single thread dispatch. +__global__ void +gpu_kernel(int *A, int *B, int *X, int *Y, size_t N, + unsigned int *AA1, unsigned int *AA2, + unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) { + for (size_t i = 0; i < N; i++) { + // Store data into A, system fence, and atomically mark flag. + // This guarantees this global write is visible by device 1. + A[i] = X[i]; + __opencl_atomic_fetch_add(reinterpret_cast(AA1), 1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + // Wait on device 1's global write to B. + if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) { + *dresult = -1; + break; + } + + // Check device 1 properly stored Y into B. + bool stored_data_matches = (B[i] == Y[i]); + if (!stored_data_matches) { + // If the data does not match, alert other thread and abort. + printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n", + i, B[i], Y[i]); + __opencl_atomic_exchange(reinterpret_cast(AA2), -1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + *dresult = -1; + } + // Otherwise tell the other thread to continue. + __opencl_atomic_fetch_add(reinterpret_cast(AA2), 1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + // Wait on kernel gpu_cache1 to finish checking X is stored in A. + if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) { + *dresult = -1; + break; + } + } + *dresult = 0; +} + +__host__ int +cpu_spin_loop_or_abort_on_negative_one(unsigned int* address, + unsigned int value) { + unsigned int compare; + bool check = false; + do { + compare = value; + check = __atomic_compare_exchange_n( + address, /*expected=*/ &compare, /*desired=*/ value, + /*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); + if (compare == -1) + return -1; + } while (!check); + return 0; +} + +// This host thread runs only on a single CPU thread. +__host__ void +cpu_thread(int *A, int *B, int *X, int *Y, size_t N, + unsigned int *AA1, unsigned int *AA2, + unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) { + for (size_t i = 0; i < N; i++) { + B[i] = Y[i]; + __atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE); + if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) { + *hresult = -1; + break; + } + + bool stored_data_matches = (A[i] == X[i]); + if (!stored_data_matches) { + printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n", + i, A[i], X[i]); + __atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE); + *hresult = -1; + break; + } + __atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE); + if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) { + *hresult = -1; + break; + } + } + *hresult = 0; +} + +static bool cpu_to_gpu_coherency() { + int *A_d, *B_d, *X_d, *Y_d; + int *A_res, *A_h, *B_h, *X_h, *Y_h; + unsigned int hresult, dresult; + size_t N = 1024; + size_t Nbytes = N * sizeof(int); + int numDevices = 0; + + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices < 1) { + HipTest::HIP_SKIP_TEST("Skipping because devices < 1"); + return 0; + } + + // Skip this test if feature is not supported. + static int device0 = 0; + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, device0)); + if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 && + strncmp(props.gcnArchName, "gfx940", 6) != 0) { + printf("info: skipping test on devices other than gfx90a and gfx940.\n"); + return true; + } + + // Allocate Host Side Memory. Coherent Fine-grained Memory for array B. + printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + HIP_CHECK(hipHostMalloc(&B_h, Nbytes, + (hipHostMallocCoherent | hipHostMallocMapped))); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&B_d), B_h, 0)); + X_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess); + Y_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Initialize the arrays and atomic variables. + for (size_t i = 0; i < N; i++) { + X_h[i] = 100000000 + i; + Y_h[i] = 300000000 + i; + } + + // Initialize shared atomic flags between CPU and GPU. + unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h; + unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d; + HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA1_d), + AA1_h, 0)); + *AA1_h = 0; + HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA2_d), + AA2_h, 0)); + *AA2_h = 0; + HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA1_d), + BA1_h, 0)); + *BA1_h = 0; + HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA2_d), + BA2_h, 0)); + *BA2_h = 0; + + // Skip the first stream, ensure stream is non-blocking. + hipStream_t stream[2]; + HIP_CHECK(hipStreamCreate(&stream[0])); + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); + + // Allocate Device Side Memory. Coherent Fine-grained Memory for array A. + printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + hipError_t status = hipExtMallocWithFlags(reinterpret_cast(&A_d), + Nbytes, hipDeviceMallocFinegrained); + REQUIRE(status == hipSuccess); + // SVM memory - host pointer is the same as device pointer to array A. + A_h = A_d; + HIP_CHECK(hipMalloc(&X_d, Nbytes)); + HIP_CHECK(hipMalloc(&Y_d, Nbytes)); + + HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice)); + + // Launch the GPU kernel. + const unsigned blocks = 1; + const unsigned threadsPerBlock = 1; + hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock), + 0, stream[1], + A_d, B_d, X_d, Y_d, N, + AA1_d, AA2_d, BA1_d, BA2_d, &dresult); + // Check if launch failed. + HIP_CHECK(hipGetLastError()); + REQUIRE(dresult == 0); + + // Do not sync the launched stream, instead run the cpu_thread. + std::thread host_thread(cpu_thread, + A_h, B_h, X_h, Y_h, N, + AA1_h, AA2_h, BA1_h, BA2_h, &hresult); + host_thread.detach(); + REQUIRE(hresult == 0); + // Wait for Device side to finish. + HIP_CHECK(hipStreamSynchronize(stream[1])); + + // Evaluate the resultant arrays A and B. + A_res = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess); + HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < N; i++) { + REQUIRE(A_res[i] == (100000000 + i)); + REQUIRE(B_h[i] == (300000000 + i)); + } + + // Free all the device and host memory allocated. + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(X_d)); + HIP_CHECK(hipFree(Y_d)); + HIP_CHECK(hipHostFree(AA1_h)); + HIP_CHECK(hipHostFree(AA2_h)); + HIP_CHECK(hipHostFree(BA1_h)); + HIP_CHECK(hipHostFree(BA2_h)); + HIP_CHECK(hipHostFree(B_h)); + free(X_h); + free(Y_h); + free(A_res); + + return true; +} + +/** + * Test Description + * ------------------------ + * - This test runs on devices where XGMI enables fine-grained communication + * between GPUs. This performs a message passing test. + * Array A is allocated on Device 0, and remotely on host. + * Device 0 also increments atomic ints AA1 and AA2. + * Array B is allocated on host, and remotely on Device 0. + * Host also increments atomic ints BA1 and BA2. + * Kernel will launch on Device 0, and store array X into array A. + * Host Thread will store array Y into array B. + * Kernel will validate that the correct values of array Y are stored in B. + * Host Thread will validate that the correct values of array X are stored in A. + + * Test source + * ------------------------ + * - catch/unit/synchronization/cache_coherency_cpu_gpu.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + * - Test to be run only on AMD. + */ + +TEST_CASE("Unit_cache_coherency_cpu_gpu") { + bool passed = true; + // Coherency between CPU and GPU sharing host and device memory. + REQUIRE(passed == cpu_to_gpu_coherency()); +} diff --git a/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc b/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc index 3a645c2c39..42df8266ad 100644 --- a/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc +++ b/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc @@ -1,294 +1,294 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -// Simple test for Fine Grained GPU-GPU coherency. - -#include -#include - -typedef _Atomic(unsigned int) atomic_uint; - -// Helper function to spin on address until address equals value. -// If the address holds the value of -1, abort because the other thread failed. -__device__ int -gpu_spin_loop_or_abort_on_negative_one(unsigned int* address, - unsigned int value) { - unsigned int compare; - bool check = false; - do { - compare = value; - check = __opencl_atomic_compare_exchange_strong( - reinterpret_cast(address), /*expected=*/ &compare, - /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, - /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - if (compare == -1) - return -1; - } while (!check); - return 0; -} - -// This kernel requires a single block, single thread dispatch. -__global__ void -gpu_cache0(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) { - for (size_t i = 0; i < N; i++) { - // Store data into A, system fence, and atomically mark flag. - // This guarantees this global write is visible by device 1. - A[i] = X[i]; - __opencl_atomic_fetch_add(reinterpret_cast(AA1), 1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on device 1's global write to B. - if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) { - *cache0_result = -1; - break; - } - - // Check device 1 properly stored Y into B. - bool stored_data_matches = (B[i] == Y[i]); - if (!stored_data_matches) { - // If the data does not match, alert other thread and abort. - printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n", - i, B[i], Y[i]); - __opencl_atomic_exchange(reinterpret_cast(AA2), -1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - *cache0_result = -1; - } - // Otherwise tell the other thread to continue. - __opencl_atomic_fetch_add(reinterpret_cast(AA2), 1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - // Wait on kernel gpu_cache1 to finish checking X is stored in A. - if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) { - *cache0_result = -1; - break; - } - } - *cache0_result = 0; -} - -// This kernel requires a single block, single thread dispatch. -__global__ void -gpu_cache1(int *A, int *B, int *X, int *Y, size_t N, - unsigned int *AA1, unsigned int *AA2, - unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) { - for (size_t i = 0; i < N; i++) { - B[i] = Y[i]; - __opencl_atomic_fetch_add(reinterpret_cast(BA1), 1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) { - *cache1_result = -1; - break; - } - - bool stored_data_matches = (A[i] == X[i]); - if (!stored_data_matches) { - printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n", - i, A[i], X[i]); - __opencl_atomic_exchange(reinterpret_cast(BA2), -1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - *cache1_result = -1; - } - __opencl_atomic_fetch_add(reinterpret_cast(BA2), 1, - __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); - if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) { - *cache1_result = -1; - break; - } - } - *cache1_result = 0; -} - -static bool gpu_to_gpu_coherency() { - int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1; - int *A_h, *B_h, *X_h, *Y_h; - unsigned int cache0_result, cache1_result; - size_t N = 1024; - size_t Nbytes = N * sizeof(int); - int numDevices = 0; - int numTestDevices = 2; - - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices < numTestDevices) { - HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); - return 0; - } - - // Skip this test if either device does not support this feature. - hipDeviceProp_t props0, props1; - HIP_CHECK(hipGetDeviceProperties(&props0, 0)); - HIP_CHECK(hipGetDeviceProperties(&props1, 1)); - if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 || - strncmp(props1.gcnArchName, "gfx90a", 6) != 0) && - (strncmp(props0.gcnArchName, "gfx940", 6) != 0 || - strncmp(props1.gcnArchName, "gfx940", 6) != 0)) { - printf("info: skipping test on devices other than gfx90a and gfx940.\n"); - return true; - } - - // Allocate Host Side Memory. - printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - A_h = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); - B_h = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess); - X_h = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess); - Y_h = reinterpret_cast(malloc(Nbytes)); - HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess); - - // Initialize the arrays and atomic variables. - for (size_t i = 0; i < N; i++) { - X_h[i] = 100000000 + i; - Y_h[i] = 300000000 + i; - } - - // Initialize shared atomic flags on host coherent memory. - unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h; - unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d; - HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA1_d), - AA1_h, 0)); - *AA1_h = 0; - HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA2_d), - AA2_h, 0)); - *AA2_h = 0; - HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA1_d), - BA1_h, 0)); - *BA1_h = 0; - HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA2_d), - BA2_h, 0)); - *BA2_h = 0; - - // Skip the first stream. - hipStream_t stream[3]; - HIP_CHECK(hipStreamCreate(&stream[0])); - - // Set-up Device 0. - HIP_CHECK(hipSetDevice(0)); - // Enable P2P access to Device 1. - HIP_CHECK(hipDeviceEnablePeerAccess(1, 0)); - HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); - // Allocating Coherent Memory for Array A_d on Device 0. - printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - hipError_t status = hipExtMallocWithFlags(reinterpret_cast(&A_d), - Nbytes, hipDeviceMallocFinegrained); - REQUIRE(status == hipSuccess); - HIP_CHECK(hipMalloc(&X_d0, Nbytes)); - HIP_CHECK(hipMalloc(&Y_d0, Nbytes)); - - // Set-up Device 1. - HIP_CHECK(hipSetDevice(1)); - // Enable P2P access to Device 0. - HIP_CHECK(hipDeviceEnablePeerAccess(0, 0)); - HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking)); - // Allocating Coherent Memory for Array B_d on Device 1. - printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); - status = hipExtMallocWithFlags(reinterpret_cast(&B_d), - Nbytes, hipDeviceMallocFinegrained); - REQUIRE(status == hipSuccess); - HIP_CHECK(hipMalloc(&X_d1, Nbytes)); - HIP_CHECK(hipMalloc(&Y_d1, Nbytes)); - - // Transfer initialized data onto the device arrays. - HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice)); - - // Prepare and launch the device kernels. - const unsigned blocks = 1; - const unsigned threadsPerBlock = 1; - HIP_CHECK(hipSetDevice(0)); - hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock), - 0, stream[1], - A_d, B_d, X_d0, Y_d0, N, - AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result); - // Check if launch failed. - HIP_CHECK(hipGetLastError()); - REQUIRE(cache0_result == 0); - HIP_CHECK(hipSetDevice(1)); - hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock), - 0, stream[2], - A_d, B_d, X_d1, Y_d1, N, - AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result); - HIP_CHECK(hipGetLastError()); - REQUIRE(cache1_result == 0); - - // Wait for kernels on both devices. - HIP_CHECK(hipStreamSynchronize(stream[1])); - HIP_CHECK(hipStreamSynchronize(stream[2])); - - // Evaluate the resultant arrays A and B. - HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); - HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost)); - - for (size_t i = 0; i < N; i++) { - REQUIRE(A_h[i] == (100000000 + i)); - REQUIRE(B_h[i] == (300000000 + i)); - } - - // Free all the device and host memory allocated. - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - HIP_CHECK(hipFree(X_d0)); - HIP_CHECK(hipFree(Y_d0)); - HIP_CHECK(hipFree(X_d1)); - HIP_CHECK(hipFree(Y_d1)); - HIP_CHECK(hipHostFree(AA1_h)); - HIP_CHECK(hipHostFree(AA2_h)); - HIP_CHECK(hipHostFree(BA1_h)); - HIP_CHECK(hipHostFree(BA2_h)); - free(A_h); - free(B_h); - free(X_h); - free(Y_h); - - return true; -} - -/** - * Test Description - * ------------------------ - * - This test runs on devices where XGMI enables fine-grained communication - * between GPUs. This performs a message passing test. - * Array A is allocated on Device 0, and remotely on Device 1. - * Device 0 also increments atomic ints AA1 and AA2. - * Array B is allocated on Device 1, and remotely on Device 0. - * Device 1 also increments atomic ints BA1 and BA2. - * Kernel 0 will launch on Device 0, and store array X into array A. - * Kernel 1 will launch on Device 1, and store array Y into array B. - * Kernel 0 will validate that the correct values of array Y are stored in B. - * Kernel 1 will validate that the correct values of array X are stored in A. - - * Test source - * ------------------------ - * - catch/unit/synchronization/cache_coherency_gpu_gpu.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - * - Test to be run only on AMD. - */ - -TEST_CASE("Unit_cache_coherency_gpu_gpu") { - bool passed = true; - // Coherency between GPUs accessing local or remote FB. - REQUIRE(passed == gpu_to_gpu_coherency()); -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Simple test for Fine Grained GPU-GPU coherency. + +#include +#include + +typedef _Atomic(unsigned int) atomic_uint; + +// Helper function to spin on address until address equals value. +// If the address holds the value of -1, abort because the other thread failed. +__device__ int +gpu_spin_loop_or_abort_on_negative_one(unsigned int* address, + unsigned int value) { + unsigned int compare; + bool check = false; + do { + compare = value; + check = __opencl_atomic_compare_exchange_strong( + reinterpret_cast(address), /*expected=*/ &compare, + /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + if (compare == -1) + return -1; + } while (!check); + return 0; +} + +// This kernel requires a single block, single thread dispatch. +__global__ void +gpu_cache0(int *A, int *B, int *X, int *Y, size_t N, + unsigned int *AA1, unsigned int *AA2, + unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) { + for (size_t i = 0; i < N; i++) { + // Store data into A, system fence, and atomically mark flag. + // This guarantees this global write is visible by device 1. + A[i] = X[i]; + __opencl_atomic_fetch_add(reinterpret_cast(AA1), 1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + // Wait on device 1's global write to B. + if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) { + *cache0_result = -1; + break; + } + + // Check device 1 properly stored Y into B. + bool stored_data_matches = (B[i] == Y[i]); + if (!stored_data_matches) { + // If the data does not match, alert other thread and abort. + printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n", + i, B[i], Y[i]); + __opencl_atomic_exchange(reinterpret_cast(AA2), -1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + *cache0_result = -1; + } + // Otherwise tell the other thread to continue. + __opencl_atomic_fetch_add(reinterpret_cast(AA2), 1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + // Wait on kernel gpu_cache1 to finish checking X is stored in A. + if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) { + *cache0_result = -1; + break; + } + } + *cache0_result = 0; +} + +// This kernel requires a single block, single thread dispatch. +__global__ void +gpu_cache1(int *A, int *B, int *X, int *Y, size_t N, + unsigned int *AA1, unsigned int *AA2, + unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) { + for (size_t i = 0; i < N; i++) { + B[i] = Y[i]; + __opencl_atomic_fetch_add(reinterpret_cast(BA1), 1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) { + *cache1_result = -1; + break; + } + + bool stored_data_matches = (A[i] == X[i]); + if (!stored_data_matches) { + printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n", + i, A[i], X[i]); + __opencl_atomic_exchange(reinterpret_cast(BA2), -1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + *cache1_result = -1; + } + __opencl_atomic_fetch_add(reinterpret_cast(BA2), 1, + __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES); + if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) { + *cache1_result = -1; + break; + } + } + *cache1_result = 0; +} + +static bool gpu_to_gpu_coherency() { + int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1; + int *A_h, *B_h, *X_h, *Y_h; + unsigned int cache0_result, cache1_result; + size_t N = 1024; + size_t Nbytes = N * sizeof(int); + int numDevices = 0; + int numTestDevices = 2; + + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices < numTestDevices) { + HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); + return 0; + } + + // Skip this test if either device does not support this feature. + hipDeviceProp_t props0, props1; + HIP_CHECK(hipGetDeviceProperties(&props0, 0)); + HIP_CHECK(hipGetDeviceProperties(&props1, 1)); + if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 || + strncmp(props1.gcnArchName, "gfx90a", 6) != 0) && + (strncmp(props0.gcnArchName, "gfx940", 6) != 0 || + strncmp(props1.gcnArchName, "gfx940", 6) != 0)) { + printf("info: skipping test on devices other than gfx90a and gfx940.\n"); + return true; + } + + // Allocate Host Side Memory. + printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + A_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + B_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess); + X_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess); + Y_h = reinterpret_cast(malloc(Nbytes)); + HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Initialize the arrays and atomic variables. + for (size_t i = 0; i < N; i++) { + X_h[i] = 100000000 + i; + Y_h[i] = 300000000 + i; + } + + // Initialize shared atomic flags on host coherent memory. + unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h; + unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d; + HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA1_d), + AA1_h, 0)); + *AA1_h = 0; + HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&AA2_d), + AA2_h, 0)); + *AA2_h = 0; + HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA1_d), + BA1_h, 0)); + *BA1_h = 0; + HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&BA2_d), + BA2_h, 0)); + *BA2_h = 0; + + // Skip the first stream. + hipStream_t stream[3]; + HIP_CHECK(hipStreamCreate(&stream[0])); + + // Set-up Device 0. + HIP_CHECK(hipSetDevice(0)); + // Enable P2P access to Device 1. + HIP_CHECK(hipDeviceEnablePeerAccess(1, 0)); + HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); + // Allocating Coherent Memory for Array A_d on Device 0. + printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + hipError_t status = hipExtMallocWithFlags(reinterpret_cast(&A_d), + Nbytes, hipDeviceMallocFinegrained); + REQUIRE(status == hipSuccess); + HIP_CHECK(hipMalloc(&X_d0, Nbytes)); + HIP_CHECK(hipMalloc(&Y_d0, Nbytes)); + + // Set-up Device 1. + HIP_CHECK(hipSetDevice(1)); + // Enable P2P access to Device 0. + HIP_CHECK(hipDeviceEnablePeerAccess(0, 0)); + HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking)); + // Allocating Coherent Memory for Array B_d on Device 1. + printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0); + status = hipExtMallocWithFlags(reinterpret_cast(&B_d), + Nbytes, hipDeviceMallocFinegrained); + REQUIRE(status == hipSuccess); + HIP_CHECK(hipMalloc(&X_d1, Nbytes)); + HIP_CHECK(hipMalloc(&Y_d1, Nbytes)); + + // Transfer initialized data onto the device arrays. + HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice)); + + // Prepare and launch the device kernels. + const unsigned blocks = 1; + const unsigned threadsPerBlock = 1; + HIP_CHECK(hipSetDevice(0)); + hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock), + 0, stream[1], + A_d, B_d, X_d0, Y_d0, N, + AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result); + // Check if launch failed. + HIP_CHECK(hipGetLastError()); + REQUIRE(cache0_result == 0); + HIP_CHECK(hipSetDevice(1)); + hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock), + 0, stream[2], + A_d, B_d, X_d1, Y_d1, N, + AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result); + HIP_CHECK(hipGetLastError()); + REQUIRE(cache1_result == 0); + + // Wait for kernels on both devices. + HIP_CHECK(hipStreamSynchronize(stream[1])); + HIP_CHECK(hipStreamSynchronize(stream[2])); + + // Evaluate the resultant arrays A and B. + HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost)); + + for (size_t i = 0; i < N; i++) { + REQUIRE(A_h[i] == (100000000 + i)); + REQUIRE(B_h[i] == (300000000 + i)); + } + + // Free all the device and host memory allocated. + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + HIP_CHECK(hipFree(X_d0)); + HIP_CHECK(hipFree(Y_d0)); + HIP_CHECK(hipFree(X_d1)); + HIP_CHECK(hipFree(Y_d1)); + HIP_CHECK(hipHostFree(AA1_h)); + HIP_CHECK(hipHostFree(AA2_h)); + HIP_CHECK(hipHostFree(BA1_h)); + HIP_CHECK(hipHostFree(BA2_h)); + free(A_h); + free(B_h); + free(X_h); + free(Y_h); + + return true; +} + +/** + * Test Description + * ------------------------ + * - This test runs on devices where XGMI enables fine-grained communication + * between GPUs. This performs a message passing test. + * Array A is allocated on Device 0, and remotely on Device 1. + * Device 0 also increments atomic ints AA1 and AA2. + * Array B is allocated on Device 1, and remotely on Device 0. + * Device 1 also increments atomic ints BA1 and BA2. + * Kernel 0 will launch on Device 0, and store array X into array A. + * Kernel 1 will launch on Device 1, and store array Y into array B. + * Kernel 0 will validate that the correct values of array Y are stored in B. + * Kernel 1 will validate that the correct values of array X are stored in A. + + * Test source + * ------------------------ + * - catch/unit/synchronization/cache_coherency_gpu_gpu.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + * - Test to be run only on AMD. + */ + +TEST_CASE("Unit_cache_coherency_gpu_gpu") { + bool passed = true; + // Coherency between GPUs accessing local or remote FB. + REQUIRE(passed == gpu_to_gpu_coherency()); +} diff --git a/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc b/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc index 1e57fa6815..ed2da3e94e 100644 --- a/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc +++ b/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc @@ -1,340 +1,340 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include - -unsigned threadsPerBlock = 256; -unsigned blocksPerCU = 6; - -class MemcpyFunction { - public: - MemcpyFunction(const char* fileName, const char* functionName) { - load(fileName, functionName); - } - void load(const char* fileName, const char* functionName); - void launch(int* dst, const int* src, size_t numElements, hipStream_t s); - - private: - hipFunction_t _function; - hipModule_t _module; -}; - - -void MemcpyFunction::load(const char* fileName, const char* functionName) { - HIP_CHECK(hipModuleLoad(&_module, fileName)); - HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName)); -} - -void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT - struct { - int* _dst; - const int* _src; - size_t _numElements; - } args; - - args._dst = dst; - args._src = src; - args._numElements = numElements; - - size_t size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, - numElements); - HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock, - 1, 1, 0, s, NULL, - reinterpret_cast(&config))); -} - -bool g_warnOnFail = true; -int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000}; - -// Set value of array to specified 32-bit integer: -__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - ptr[i] = val; - } -} - -__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) { - int gid = (blockIdx.x * blockDim.x + threadIdx.x); - int stride = blockDim.x * gridDim.x; - for (size_t i = gid; i < numElements; i += stride) { - dst[i] = src[i]; - } -} - -// Check arrays in reverse order, to more easily detect cases where -// the copy is "partially" done. -void checkReverse(const int* ptr, int numElements, int expected) { - int mismatchCnt = 0; - for (int i = numElements - 1; i >= 0; i--) { - if (!g_warnOnFail) { - REQUIRE(ptr[i] == expected); - } - if (++mismatchCnt >= 10) { - break; - } - } -} - -#define ENUM_CASE_STR(x) \ - case x: \ - return #x - -enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType }; - -const char* CmdTypeStr(CmdType c) { - switch (c) { - ENUM_CASE_STR(COPY); - ENUM_CASE_STR(KERNEL); - ENUM_CASE_STR(MODULE_KERNEL); - default: - return "UNKNOWN"; - } -} - -enum SyncType { - NONE, - EVENT_QUERY, - EVENT_SYNC, - STREAM_WAIT_EVENT, - STREAM_QUERY, - STREAM_SYNC, - DEVICE_SYNC, - MAX_SyncType -}; - -const char* SyncTypeStr(SyncType s) { - switch (s) { - ENUM_CASE_STR(NONE); - ENUM_CASE_STR(EVENT_QUERY); - ENUM_CASE_STR(EVENT_SYNC); - ENUM_CASE_STR(STREAM_WAIT_EVENT); - ENUM_CASE_STR(STREAM_QUERY); - ENUM_CASE_STR(STREAM_SYNC); - ENUM_CASE_STR(DEVICE_SYNC); - default: - return "UNKNOWN"; - } -} - -void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s, - size_t numElements) { - switch (cmd) { - case COPY: - HIP_CHECK( - hipMemcpyAsync(dst, src, numElements * sizeof(int), - hipMemcpyDeviceToDevice, s)); - break; - case KERNEL: { - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, - threadsPerBlock, numElements); - hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock), - 0, s, dst, src, numElements); - } break; - case MODULE_KERNEL: { - MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); - g_moduleMemcpy.launch(dst, src, numElements, s); - } break; - default: - printf("Info:unknown cmd=%d type", cmd); - } -} - -void resetInputs(int* Ad, int* Bd, int* Ch, - size_t numElements, int expected) { - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, - threadsPerBlock, numElements); - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), - 0, hipStream_t(0), Ad, expected, numElements); - // poison with bad value to ensure is overwritten correctly - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), - 0, hipStream_t(0), Bd, 0xDEADBEEF, numElements); - hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), - 0, hipStream_t(0), Bd, 0xF000BA55, numElements); - memset(Ch, 13, numElements * sizeof(int)); - HIP_CHECK(hipDeviceSynchronize()); -} - -// Intended to test proper synchronization and cache flushing -// between CMDA and CMDB. CMD are of type CmdType. All command copy memory, -// using either hipMemcpyAsync or kernel implementations. -// Some form of synchronization is applied. Then cmdB copies from Bd to Cd. -// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy. -// Correct result at the end is that Ch contains the -// contents originally in Ad (integer 0x42) - -void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, - hipStream_t stream1, hipStream_t stream2, int numElements, - int* Ad, int* Bd, int* Cd, int* Ch, int expected) { - hipEvent_t e; - HIP_CHECK(hipEventCreateWithFlags(&e, 0)); - - resetInputs(Ad, Bd, Ch, numElements, expected); - - const size_t sizeElements = numElements * sizeof(int); - fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT - sizeElements, static_cast(sizeElements / 1024.0), - CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType)); - - /*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT - fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT - return; - }*/ - - // Step A: - runCmd(cmdAType, Bd, Ad, stream1, numElements); - - // Sync in-between? - switch (syncType) { - case NONE: - break; - case EVENT_QUERY: { - hipError_t st = hipErrorNotReady; - HIP_CHECK(hipEventRecord(e, stream1)); - do { - st = hipEventQuery(e); - } while (st == hipErrorNotReady); - HIP_CHECK(st); - } break; - case EVENT_SYNC: - HIP_CHECK(hipEventRecord(e, stream1)); - HIP_CHECK(hipEventSynchronize(e)); - break; - case STREAM_WAIT_EVENT: - HIP_CHECK(hipEventRecord(e, stream1)); - HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); - break; - case STREAM_QUERY: { - hipError_t st = hipErrorNotReady; - do { - st = hipStreamQuery(stream1); - } while (st == hipErrorNotReady); - HIP_CHECK(st); - } break; - case STREAM_SYNC: - HIP_CHECK(hipStreamSynchronize(stream1)); - break; - case DEVICE_SYNC: - HIP_CHECK(hipDeviceSynchronize()); - break; - default: - fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType)); - return; - } - runCmd(cmdBType, Cd, Bd, stream2, numElements); - - // Copy back to host, use async copy to avoid any extra synchronization - // that might mask issues. - HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost, - stream2)); - HIP_CHECK(hipStreamSynchronize(stream2)); - - checkReverse(Ch, numElements, expected); - - HIP_CHECK(hipEventDestroy(e)); -} - -void testWrapper(size_t numElements) { - const size_t sizeElements = numElements * sizeof(int); - const int expected = 0x42; - int *Ad, *Bd, *Cd, *Ch; - - HIP_CHECK(hipMalloc(&Ad, sizeElements)); - HIP_CHECK(hipMalloc(&Bd, sizeElements)); - HIP_CHECK(hipMalloc(&Cd, sizeElements)); - HIP_CHECK(hipHostMalloc(&Ch, sizeElements)); - - hipStream_t stream1, stream2; - - HIP_CHECK(hipStreamCreate(&stream1)); - HIP_CHECK(hipStreamCreate(&stream2)); - HIP_CHECK(hipDeviceSynchronize()); - - runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements, - Ad, Bd, Cd, Ch, expected); - - for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) { - for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) { - for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) { - switch (syncMode) { - // case NONE:: - case EVENT_QUERY: - case EVENT_SYNC: - case STREAM_WAIT_EVENT: - // case STREAM_QUERY: - case STREAM_SYNC: - case DEVICE_SYNC: - runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB), - stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); - break; - default: - break; - } - } - } - } - -#if 0 - runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2, - numElements, Ad, Bd, Cd, Ch, expected); - runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements, - Ad, Bd, Cd, Ch, expected); - runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2, - numElements, Ad, Bd, Cd, Ch, expected); - runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements, - Ad, Bd, Cd, Ch, expected); -#endif - - HIP_CHECK(hipFree(Ad)); - HIP_CHECK(hipFree(Bd)); - HIP_CHECK(hipFree(Cd)); - HIP_CHECK(hipHostFree(Ch)); - - HIP_CHECK(hipStreamDestroy(stream1)); - HIP_CHECK(hipStreamDestroy(stream2)); -} - -/** - * Test Description - * ------------------------ - * - Test cache management (fences) and synchronization between - * kernel and copy commands. Exhaustively tests 3 command types - * (copy, kernel, module kernel), many sync types (see SyncType), followed by - * another command, across a sweep of data sizes designed to stress - * various levels of the memory hierarchy. - - * Test source - * ------------------------ - * - catch/unit/synchronization/copy_coherency.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.5 - */ - -TEST_CASE("Unit_Copy_Coherency") { - for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) { - size_t numElements = g_elementSizes[index]; - testWrapper(numElements); - } -} +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +unsigned threadsPerBlock = 256; +unsigned blocksPerCU = 6; + +class MemcpyFunction { + public: + MemcpyFunction(const char* fileName, const char* functionName) { + load(fileName, functionName); + } + void load(const char* fileName, const char* functionName); + void launch(int* dst, const int* src, size_t numElements, hipStream_t s); + + private: + hipFunction_t _function; + hipModule_t _module; +}; + + +void MemcpyFunction::load(const char* fileName, const char* functionName) { + HIP_CHECK(hipModuleLoad(&_module, fileName)); + HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName)); +} + +void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT + struct { + int* _dst; + const int* _src; + size_t _numElements; + } args; + + args._dst = dst; + args._src = src; + args._numElements = numElements; + + size_t size = sizeof(args); + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, + numElements); + HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock, + 1, 1, 0, s, NULL, + reinterpret_cast(&config))); +} + +bool g_warnOnFail = true; +int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000}; + +// Set value of array to specified 32-bit integer: +__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) { + int gid = (blockIdx.x * blockDim.x + threadIdx.x); + int stride = blockDim.x * gridDim.x; + for (size_t i = gid; i < numElements; i += stride) { + ptr[i] = val; + } +} + +__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) { + int gid = (blockIdx.x * blockDim.x + threadIdx.x); + int stride = blockDim.x * gridDim.x; + for (size_t i = gid; i < numElements; i += stride) { + dst[i] = src[i]; + } +} + +// Check arrays in reverse order, to more easily detect cases where +// the copy is "partially" done. +void checkReverse(const int* ptr, int numElements, int expected) { + int mismatchCnt = 0; + for (int i = numElements - 1; i >= 0; i--) { + if (!g_warnOnFail) { + REQUIRE(ptr[i] == expected); + } + if (++mismatchCnt >= 10) { + break; + } + } +} + +#define ENUM_CASE_STR(x) \ + case x: \ + return #x + +enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType }; + +const char* CmdTypeStr(CmdType c) { + switch (c) { + ENUM_CASE_STR(COPY); + ENUM_CASE_STR(KERNEL); + ENUM_CASE_STR(MODULE_KERNEL); + default: + return "UNKNOWN"; + } +} + +enum SyncType { + NONE, + EVENT_QUERY, + EVENT_SYNC, + STREAM_WAIT_EVENT, + STREAM_QUERY, + STREAM_SYNC, + DEVICE_SYNC, + MAX_SyncType +}; + +const char* SyncTypeStr(SyncType s) { + switch (s) { + ENUM_CASE_STR(NONE); + ENUM_CASE_STR(EVENT_QUERY); + ENUM_CASE_STR(EVENT_SYNC); + ENUM_CASE_STR(STREAM_WAIT_EVENT); + ENUM_CASE_STR(STREAM_QUERY); + ENUM_CASE_STR(STREAM_SYNC); + ENUM_CASE_STR(DEVICE_SYNC); + default: + return "UNKNOWN"; + } +} + +void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s, + size_t numElements) { + switch (cmd) { + case COPY: + HIP_CHECK( + hipMemcpyAsync(dst, src, numElements * sizeof(int), + hipMemcpyDeviceToDevice, s)); + break; + case KERNEL: { + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, + threadsPerBlock, numElements); + hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock), + 0, s, dst, src, numElements); + } break; + case MODULE_KERNEL: { + MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel"); + g_moduleMemcpy.launch(dst, src, numElements, s); + } break; + default: + printf("Info:unknown cmd=%d type", cmd); + } +} + +void resetInputs(int* Ad, int* Bd, int* Ch, + size_t numElements, int expected) { + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, + threadsPerBlock, numElements); + hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), + 0, hipStream_t(0), Ad, expected, numElements); + // poison with bad value to ensure is overwritten correctly + hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), + 0, hipStream_t(0), Bd, 0xDEADBEEF, numElements); + hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock), + 0, hipStream_t(0), Bd, 0xF000BA55, numElements); + memset(Ch, 13, numElements * sizeof(int)); + HIP_CHECK(hipDeviceSynchronize()); +} + +// Intended to test proper synchronization and cache flushing +// between CMDA and CMDB. CMD are of type CmdType. All command copy memory, +// using either hipMemcpyAsync or kernel implementations. +// Some form of synchronization is applied. Then cmdB copies from Bd to Cd. +// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy. +// Correct result at the end is that Ch contains the +// contents originally in Ad (integer 0x42) + +void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType, + hipStream_t stream1, hipStream_t stream2, int numElements, + int* Ad, int* Bd, int* Cd, int* Ch, int expected) { + hipEvent_t e; + HIP_CHECK(hipEventCreateWithFlags(&e, 0)); + + resetInputs(Ad, Bd, Ch, numElements, expected); + + const size_t sizeElements = numElements * sizeof(int); + fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT + sizeElements, static_cast(sizeElements / 1024.0), + CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType)); + + /*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT + fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT + return; + }*/ + + // Step A: + runCmd(cmdAType, Bd, Ad, stream1, numElements); + + // Sync in-between? + switch (syncType) { + case NONE: + break; + case EVENT_QUERY: { + hipError_t st = hipErrorNotReady; + HIP_CHECK(hipEventRecord(e, stream1)); + do { + st = hipEventQuery(e); + } while (st == hipErrorNotReady); + HIP_CHECK(st); + } break; + case EVENT_SYNC: + HIP_CHECK(hipEventRecord(e, stream1)); + HIP_CHECK(hipEventSynchronize(e)); + break; + case STREAM_WAIT_EVENT: + HIP_CHECK(hipEventRecord(e, stream1)); + HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); + break; + case STREAM_QUERY: { + hipError_t st = hipErrorNotReady; + do { + st = hipStreamQuery(stream1); + } while (st == hipErrorNotReady); + HIP_CHECK(st); + } break; + case STREAM_SYNC: + HIP_CHECK(hipStreamSynchronize(stream1)); + break; + case DEVICE_SYNC: + HIP_CHECK(hipDeviceSynchronize()); + break; + default: + fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType)); + return; + } + runCmd(cmdBType, Cd, Bd, stream2, numElements); + + // Copy back to host, use async copy to avoid any extra synchronization + // that might mask issues. + HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost, + stream2)); + HIP_CHECK(hipStreamSynchronize(stream2)); + + checkReverse(Ch, numElements, expected); + + HIP_CHECK(hipEventDestroy(e)); +} + +void testWrapper(size_t numElements) { + const size_t sizeElements = numElements * sizeof(int); + const int expected = 0x42; + int *Ad, *Bd, *Cd, *Ch; + + HIP_CHECK(hipMalloc(&Ad, sizeElements)); + HIP_CHECK(hipMalloc(&Bd, sizeElements)); + HIP_CHECK(hipMalloc(&Cd, sizeElements)); + HIP_CHECK(hipHostMalloc(&Ch, sizeElements)); + + hipStream_t stream1, stream2; + + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + HIP_CHECK(hipDeviceSynchronize()); + + runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements, + Ad, Bd, Cd, Ch, expected); + + for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) { + for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) { + for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) { + switch (syncMode) { + // case NONE:: + case EVENT_QUERY: + case EVENT_SYNC: + case STREAM_WAIT_EVENT: + // case STREAM_QUERY: + case STREAM_SYNC: + case DEVICE_SYNC: + runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB), + stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected); + break; + default: + break; + } + } + } + } + +#if 0 + runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2, + numElements, Ad, Bd, Cd, Ch, expected); + runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements, + Ad, Bd, Cd, Ch, expected); + runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2, + numElements, Ad, Bd, Cd, Ch, expected); + runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements, + Ad, Bd, Cd, Ch, expected); +#endif + + HIP_CHECK(hipFree(Ad)); + HIP_CHECK(hipFree(Bd)); + HIP_CHECK(hipFree(Cd)); + HIP_CHECK(hipHostFree(Ch)); + + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); +} + +/** + * Test Description + * ------------------------ + * - Test cache management (fences) and synchronization between + * kernel and copy commands. Exhaustively tests 3 command types + * (copy, kernel, module kernel), many sync types (see SyncType), followed by + * another command, across a sweep of data sizes designed to stress + * various levels of the memory hierarchy. + + * Test source + * ------------------------ + * - catch/unit/synchronization/copy_coherency.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ + +TEST_CASE("Unit_Copy_Coherency") { + for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) { + size_t numElements = g_elementSizes[index]; + testWrapper(numElements); + } +} diff --git a/projects/hip-tests/catch/unit/warp/hipShflTests.cc b/projects/hip-tests/catch/unit/warp/hipShflTests.cc index af7faa4525..ed66571bca 100644 --- a/projects/hip-tests/catch/unit/warp/hipShflTests.cc +++ b/projects/hip-tests/catch/unit/warp/hipShflTests.cc @@ -1,182 +1,182 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include - -#define WIDTH 4 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -template __global__ void matrixTranspose(T* out, T* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - T val = in[x]; - for (int i = 0; i < width; i++) { - for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); - } -} - -// CPU implementation of matrix transpose -template -void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } -static void getFactor(float* fact) { *fact = 2.5; } -static void getFactor(__half* fact) { *fact = 2.5; } -static void getFactor(double* fact) { *fact = 2.5; } -static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } - -template int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { - errors++; - } - } - return errors; -} - -template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT - errors++; - } - } - return errors; -} - -template void init(T* Matrix) { - // initialize the input data - T factor; - getFactor(&factor); - for (int i = 0; i < NUM; i++) { - Matrix[i] = (T)i + factor; - } -} - -template <> void init(__half* Matrix) { - // initialize the input data - __half factor; - getFactor(&factor); - for (int i = 0; i < NUM; i++) { - Matrix[i] = i + __half2float(factor); - } -} - -template static void runTest() { - T* Matrix; - T* TransposeMatrix; - T* cpuTransposeMatrix; - - T* gpuMatrix; - T* gpuTransposeMatrix; - - hipDeviceProp_t devProp; - HIP_CHECK(hipGetDeviceProperties(&devProp, 0)); - - int errors = 0; - - Matrix = reinterpret_cast(malloc(NUM * sizeof(T))); - TransposeMatrix = reinterpret_cast(malloc(NUM * sizeof(T))); - cpuTransposeMatrix = reinterpret_cast(malloc(NUM * sizeof(T))); - - init(Matrix); - - // allocate the memory on the device side - HIP_CHECK(hipMalloc(reinterpret_cast(&gpuMatrix), NUM * sizeof(T))); - HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), NUM * sizeof(T))); - - // Memory transfer from host to device - HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice)); - - // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), - 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Memory transfer from device to host - HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost)); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix)); - // free the resources on device side - HIP_CHECK(hipFree(gpuMatrix)); - HIP_CHECK(hipFree(gpuTransposeMatrix)); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); -} - -/** - * @addtogroup __shfl __shfl - * @{ - * @ingroup ShflTest - * `T __shfl(T var, int srcLane, int width=warpSize)` - - * Contains wrap __shfl functions. - * @} - */ - -/** - * Test Description - * ------------------------ - * - Test case to verify __shfl warp functions for different datatypes. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipShflTests.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - */ - -TEST_CASE("Unit_hipShflTests") { - SECTION("run test for int") { runTest(); } - SECTION("run test for float") { runTest(); } - SECTION("run test for double") { runTest(); } - // Test added to support half datatype. - SECTION("run test for __half") { runTest<__half>(); } - SECTION("run test for int64_t") { runTest(); } - SECTION("run test for unsigned int") { runTest(); } - SECTION("run test for uint64_t") { runTest(); } -} - -/** -* End doxygen group ShflTest. -* @} -*/ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +#define WIDTH 4 + +#define NUM (WIDTH * WIDTH) + +#define THREADS_PER_BLOCK_X 4 +#define THREADS_PER_BLOCK_Y 4 +#define THREADS_PER_BLOCK_Z 1 + +// Device (Kernel) function, it must be void +template __global__ void matrixTranspose(T* out, T* in, const int width) { + int x = blockDim.x * blockIdx.x + threadIdx.x; + T val = in[x]; + for (int i = 0; i < width; i++) { + for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); + } +} + +// CPU implementation of matrix transpose +template +void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) { + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; + } + } +} + +static void getFactor(int* fact) { *fact = 101; } +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } +static void getFactor(float* fact) { *fact = 2.5; } +static void getFactor(__half* fact) { *fact = 2.5; } +static void getFactor(double* fact) { *fact = 2.5; } +static void getFactor(int64_t* fact) { *fact = 303; } +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } + +template int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { + int errors = 0; + for (int i = 0; i < NUM; i++) { + if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { + errors++; + } + } + return errors; +} + +template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { + int errors = 0; + for (int i = 0; i < NUM; i++) { + if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + errors++; + } + } + return errors; +} + +template void init(T* Matrix) { + // initialize the input data + T factor; + getFactor(&factor); + for (int i = 0; i < NUM; i++) { + Matrix[i] = (T)i + factor; + } +} + +template <> void init(__half* Matrix) { + // initialize the input data + __half factor; + getFactor(&factor); + for (int i = 0; i < NUM; i++) { + Matrix[i] = i + __half2float(factor); + } +} + +template static void runTest() { + T* Matrix; + T* TransposeMatrix; + T* cpuTransposeMatrix; + + T* gpuMatrix; + T* gpuTransposeMatrix; + + hipDeviceProp_t devProp; + HIP_CHECK(hipGetDeviceProperties(&devProp, 0)); + + int errors = 0; + + Matrix = reinterpret_cast(malloc(NUM * sizeof(T))); + TransposeMatrix = reinterpret_cast(malloc(NUM * sizeof(T))); + cpuTransposeMatrix = reinterpret_cast(malloc(NUM * sizeof(T))); + + init(Matrix); + + // allocate the memory on the device side + HIP_CHECK(hipMalloc(reinterpret_cast(&gpuMatrix), NUM * sizeof(T))); + HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), NUM * sizeof(T))); + + // Memory transfer from host to device + HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice)); + + // Lauching kernel from host + hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), + 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); + + // Memory transfer from device to host + HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost)); + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix)); + // free the resources on device side + HIP_CHECK(hipFree(gpuMatrix)); + HIP_CHECK(hipFree(gpuTransposeMatrix)); + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); +} + +/** + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl(T var, int srcLane, int width=warpSize)` - + * Contains wrap __shfl functions. + * @} + */ + +/** + * Test Description + * ------------------------ + * - Test case to verify __shfl warp functions for different datatypes. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipShflTests.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + */ + +TEST_CASE("Unit_hipShflTests") { + SECTION("run test for int") { runTest(); } + SECTION("run test for float") { runTest(); } + SECTION("run test for double") { runTest(); } + // Test added to support half datatype. + SECTION("run test for __half") { runTest<__half>(); } + SECTION("run test for int64_t") { runTest(); } + SECTION("run test for unsigned int") { runTest(); } + SECTION("run test for uint64_t") { runTest(); } +} + +/** +* End doxygen group ShflTest. +* @} +*/ diff --git a/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc b/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc index a06216f03d..0a95f52810 100644 --- a/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc +++ b/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc @@ -1,241 +1,241 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include - -const int size = 32; - -template __global__ void shflDownSum(T* a, int size) { - T val = a[threadIdx.x]; - for (int i = size / 2; i > 0; i /= 2) { - val += __shfl_down(val, i, size); - } - a[threadIdx.x] = val; -} - -template __global__ void shflUpSum(T* a, int size) { - T val = a[threadIdx.x]; - for (int i = size / 2; i > 0; i /= 2) { - val += __shfl_up(val, i, size); - } - a[threadIdx.x] = val; -} - -template __global__ void shflXorSum(T* a, int size) { - T val = a[threadIdx.x]; - for (int i = size / 2; i > 0; i /= 2) { - val += __shfl_xor(val, i, size); - } - a[threadIdx.x] = val; -} - -static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } -static void getFactor(float* fact) { *fact = 2.5; } -static void getFactor(double* fact) { *fact = 2.5; } -static void getFactor(__half* fact) { *fact = 2.5; } -static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } - -template T sum(T* a) { - T cpuSum = 0; - T factor; - getFactor(&factor); - for (int i = 0; i < size; i++) { - a[i] = i + factor; - cpuSum += a[i]; - } - return cpuSum; -} - -template <> __half sum(__half* a) { - __half cpuSum = 0; - __half factor; - getFactor(&factor); - for (int i = 0; i < size; i++) { - a[i] = i + __half2float(factor); - cpuSum = __half2float(cpuSum) + __half2float(a[i]); - } - return cpuSum; -} - -template bool compare(T gpuSum, T cpuSum) { - if (gpuSum != cpuSum) { - return true; - } - return false; -} - -template <> bool compare(__half gpuSum, __half cpuSum) { - if (__half2float(gpuSum) != __half2float(cpuSum)) { - return true; - } - return false; -} - -template static void runTestShflUp() { - const int size = 32; - T a[size]; - T cpuSum = sum(a); - T* d_a; - HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size)); - HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault)); - hipLaunchKernelGGL(shflUpSum, 1, size, 0, 0, d_a, size); - HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault)); - REQUIRE((compare(a[size - 1], cpuSum)) == 0); - HIP_CHECK(hipFree(d_a)); -} - -template static void runTestShflDown() { - T a[size]; - T cpuSum = sum(a); - T* d_a; - HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size)); - HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault)); - hipLaunchKernelGGL(shflDownSum, 1, size, 0, 0, d_a, size); - HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault)); - REQUIRE((compare(a[0], cpuSum)) == 0); - HIP_CHECK(hipFree(d_a)); -} - -template static void runTestShflXor() { - T a[size]; - T cpuSum = sum(a); - T* d_a; - HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size)); - HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault)); - hipLaunchKernelGGL(shflXorSum, 1, size, 0, 0, d_a, size); - HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault)); - REQUIRE((compare(a[0], cpuSum)) == 0); - HIP_CHECK(hipFree(d_a)); -} - -/** - * @addtogroup __shfl __shfl - * @{ - * @ingroup ShflTest - * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - - * Contains warp __shfl_up function - */ - -/** - * Test Description - * ------------------------ - * - Test case to verify __shfl_up warp functions for different datatypes. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipShflUpDownTest.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - * - Gaurding this test against cuda with refernce to mentioned - * ticket SWDEV-379177 - */ - -TEST_CASE("Unit_runTestShfl_up") { - SECTION("runTestShflUp for int") { runTestShflUp(); } - SECTION("runTestShflUp for float") { runTestShflUp(); } - SECTION("runTestShflUp for double") { runTestShflUp(); } - SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); } - SECTION("runTestShflUp for int64_t") { runTestShflUp(); } - SECTION("runTestShflUp for unsigned int") { runTestShflUp(); } - SECTION("runTestShflUp for uint64_t") { runTestShflUp(); } -} -/** - * End doxygen group __shfl. - * @} - */ - -/** - * @addtogroup __shfl __shfl - * @{ - * @ingroup ShflTest - * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - - * Contains warp __shfl_down function - */ - -/** - * Test Description - * ------------------------ - * - Test case to verify __shfl_down warp functions for different datatypes. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipShflUpDownTest.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - * - Gaurding this test against cuda with refernce to mentioned - * ticket SWDEV-379177 - */ - -TEST_CASE("Unit_runTestShfl_Down") { - SECTION("runTestShflDown for int") { runTestShflDown(); } - SECTION("runTestShflDown for float") { runTestShflDown(); } - SECTION("runTestShflDown for double") { runTestShflDown(); } - SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); } - SECTION("runTestShflDown for int64_t") { runTestShflDown(); } - SECTION("runTestShflDown for unsigned int") { runTestShflDown(); } - SECTION("runTestShflDown for uint64_t") { runTestShflDown(); } -} -/** - * End doxygen group __shfl. - * @} - */ - -/** - * @addtogroup __shfl __shfl - * @{ - * @ingroup ShflTest - * `T __shfl_xor(T var, int laneMask, int width=warpSize)` - - * Contains warp __shfl_xor function - */ - -/** - * Test Description - * ------------------------ - * - Test case to verify __shfl_xor warp functions for different datatypes. - - * Test source - * ------------------------ - * - catch/unit/kernel/hipShflUpDownTest.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.6 - * - Gaurding this test against cuda with refernce to mentioned - * ticket SWDEV-379177 - */ - -TEST_CASE("Unit_runTestShfl_Xor") { - SECTION("runTestShflXor for int") { runTestShflXor(); } - SECTION("runTestShflXor for float") { runTestShflXor(); } - SECTION("runTestShflXor for double") { runTestShflXor(); } - SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); } - SECTION("runTestShflXor for int64_t") { runTestShflXor(); } - SECTION("runTestShflXor for unsigned int") { runTestShflXor(); } - SECTION("runTestShflXor for uint64_t") { runTestShflXor(); } -} -/** - * End doxygen group __shfl. - * @} - */ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +const int size = 32; + +template __global__ void shflDownSum(T* a, int size) { + T val = a[threadIdx.x]; + for (int i = size / 2; i > 0; i /= 2) { + val += __shfl_down(val, i, size); + } + a[threadIdx.x] = val; +} + +template __global__ void shflUpSum(T* a, int size) { + T val = a[threadIdx.x]; + for (int i = size / 2; i > 0; i /= 2) { + val += __shfl_up(val, i, size); + } + a[threadIdx.x] = val; +} + +template __global__ void shflXorSum(T* a, int size) { + T val = a[threadIdx.x]; + for (int i = size / 2; i > 0; i /= 2) { + val += __shfl_xor(val, i, size); + } + a[threadIdx.x] = val; +} + +static void getFactor(int* fact) { *fact = 101; } +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } +static void getFactor(float* fact) { *fact = 2.5; } +static void getFactor(double* fact) { *fact = 2.5; } +static void getFactor(__half* fact) { *fact = 2.5; } +static void getFactor(int64_t* fact) { *fact = 303; } +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } + +template T sum(T* a) { + T cpuSum = 0; + T factor; + getFactor(&factor); + for (int i = 0; i < size; i++) { + a[i] = i + factor; + cpuSum += a[i]; + } + return cpuSum; +} + +template <> __half sum(__half* a) { + __half cpuSum = 0; + __half factor; + getFactor(&factor); + for (int i = 0; i < size; i++) { + a[i] = i + __half2float(factor); + cpuSum = __half2float(cpuSum) + __half2float(a[i]); + } + return cpuSum; +} + +template bool compare(T gpuSum, T cpuSum) { + if (gpuSum != cpuSum) { + return true; + } + return false; +} + +template <> bool compare(__half gpuSum, __half cpuSum) { + if (__half2float(gpuSum) != __half2float(cpuSum)) { + return true; + } + return false; +} + +template static void runTestShflUp() { + const int size = 32; + T a[size]; + T cpuSum = sum(a); + T* d_a; + HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size)); + HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault)); + hipLaunchKernelGGL(shflUpSum, 1, size, 0, 0, d_a, size); + HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault)); + REQUIRE((compare(a[size - 1], cpuSum)) == 0); + HIP_CHECK(hipFree(d_a)); +} + +template static void runTestShflDown() { + T a[size]; + T cpuSum = sum(a); + T* d_a; + HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size)); + HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault)); + hipLaunchKernelGGL(shflDownSum, 1, size, 0, 0, d_a, size); + HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault)); + REQUIRE((compare(a[0], cpuSum)) == 0); + HIP_CHECK(hipFree(d_a)); +} + +template static void runTestShflXor() { + T a[size]; + T cpuSum = sum(a); + T* d_a; + HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size)); + HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault)); + hipLaunchKernelGGL(shflXorSum, 1, size, 0, 0, d_a, size); + HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault)); + REQUIRE((compare(a[0], cpuSum)) == 0); + HIP_CHECK(hipFree(d_a)); +} + +/** + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_up function + */ + +/** + * Test Description + * ------------------------ + * - Test case to verify __shfl_up warp functions for different datatypes. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipShflUpDownTest.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + * - Gaurding this test against cuda with refernce to mentioned + * ticket SWDEV-379177 + */ + +TEST_CASE("Unit_runTestShfl_up") { + SECTION("runTestShflUp for int") { runTestShflUp(); } + SECTION("runTestShflUp for float") { runTestShflUp(); } + SECTION("runTestShflUp for double") { runTestShflUp(); } + SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); } + SECTION("runTestShflUp for int64_t") { runTestShflUp(); } + SECTION("runTestShflUp for unsigned int") { runTestShflUp(); } + SECTION("runTestShflUp for uint64_t") { runTestShflUp(); } +} +/** + * End doxygen group __shfl. + * @} + */ + +/** + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_down function + */ + +/** + * Test Description + * ------------------------ + * - Test case to verify __shfl_down warp functions for different datatypes. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipShflUpDownTest.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + * - Gaurding this test against cuda with refernce to mentioned + * ticket SWDEV-379177 + */ + +TEST_CASE("Unit_runTestShfl_Down") { + SECTION("runTestShflDown for int") { runTestShflDown(); } + SECTION("runTestShflDown for float") { runTestShflDown(); } + SECTION("runTestShflDown for double") { runTestShflDown(); } + SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); } + SECTION("runTestShflDown for int64_t") { runTestShflDown(); } + SECTION("runTestShflDown for unsigned int") { runTestShflDown(); } + SECTION("runTestShflDown for uint64_t") { runTestShflDown(); } +} +/** + * End doxygen group __shfl. + * @} + */ + +/** + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_xor(T var, int laneMask, int width=warpSize)` - + * Contains warp __shfl_xor function + */ + +/** + * Test Description + * ------------------------ + * - Test case to verify __shfl_xor warp functions for different datatypes. + + * Test source + * ------------------------ + * - catch/unit/kernel/hipShflUpDownTest.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.6 + * - Gaurding this test against cuda with refernce to mentioned + * ticket SWDEV-379177 + */ + +TEST_CASE("Unit_runTestShfl_Xor") { + SECTION("runTestShflXor for int") { runTestShflXor(); } + SECTION("runTestShflXor for float") { runTestShflXor(); } + SECTION("runTestShflXor for double") { runTestShflXor(); } + SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); } + SECTION("runTestShflXor for int64_t") { runTestShflXor(); } + SECTION("runTestShflXor for unsigned int") { runTestShflXor(); } + SECTION("runTestShflXor for uint64_t") { runTestShflXor(); } +} +/** + * End doxygen group __shfl. + * @} + */ diff --git a/projects/hip-tests/perftests/memory/hipPerfMemset.cpp b/projects/hip-tests/perftests/memory/hipPerfMemset.cpp index a2db4c690a..2df0c9727b 100644 --- a/projects/hip-tests/perftests/memory/hipPerfMemset.cpp +++ b/projects/hip-tests/perftests/memory/hipPerfMemset.cpp @@ -1,437 +1,437 @@ -/* - Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ - -/* HIT_START - * BUILD: %t %s ../../src/test_common.cpp - * TEST: %t - * HIT_END - */ - -#include "test_common.h" -#include -#include - -static unsigned int sizeList[] = { - 256, 512, 1024, 2048, 4096, 8192, -}; - -static unsigned int eleNumList[] = { - 0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, - 0x200000, 0x400000, 0x800000, 0x1000000 -}; - -typedef struct _dataType { -char memsetval = 0x42; -char memsetD8val = 0xDE; -int16_t memsetD16val = 0xDEAD; -int memsetD32val = 0xDEADBEEF; -}dataType; - -#define NUM_ITER 1000 - -enum MemsetType { - hipMemsetTypeDefault, - hipMemsetTypeD8, - hipMemsetTypeD16, - hipMemsetTypeD32, - hipMemsetTypeMax - -}; - -using namespace std; - -class hipPerfMemset { - private: - uint64_t bufSize_; - unsigned int num_elements_; - unsigned int testNumEle_; - unsigned int _numSubTests = 0; - unsigned int _numSubTests2D = 0; - unsigned int _numSubTests3D = 0; - unsigned int num_sizes_ =0; - - public: - hipPerfMemset() { - num_elements_ = sizeof(eleNumList) / sizeof(unsigned int); - _numSubTests = num_elements_ * hipMemsetTypeMax; - - num_sizes_ = sizeof(sizeList) / sizeof(unsigned int); - _numSubTests2D = num_sizes_; - _numSubTests3D = _numSubTests2D; - }; - - ~hipPerfMemset() {}; - - void open(int deviceID); - - template - void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async); - - template - void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async); - - template - void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async); - - uint getNumTests() { - return _numSubTests; - } - - uint getNumTests2D() { - return _numSubTests2D; - } - uint getNumTests3D() { - return _numSubTests3D; - } -}; - - -void hipPerfMemset::open(int deviceId) { - int nGpu = 0; - HIPCHECK(hipGetDeviceCount(&nGpu)); - if (nGpu < 1) { - failed("No GPU!"); - } - - HIPCHECK(hipSetDevice(deviceId)); - hipDeviceProp_t props = {0}; - HIPCHECK(hipGetDeviceProperties(&props, deviceId)); - std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name - << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId - << std::endl; -} - -template -void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) { - - T * A_h; - T * A_d; - - testNumEle_ = eleNumList[test % num_elements_]; - - bufSize_ = testNumEle_ * sizeof(uint32_t); - - HIPCHECK(hipMalloc(&A_d, bufSize_)); - - A_h = reinterpret_cast (malloc(bufSize_)); - - hipStream_t stream; - HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); - - // Warm-up - if (async) { - HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - } else { - HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_)); - HIPCHECK(hipDeviceSynchronize()); - } - - auto start = chrono::high_resolution_clock::now(); - for (uint i = 0; i < NUM_ITER; i++) { - if (type == hipMemsetTypeDefault && !async) { - HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_)); - } - else if (type == hipMemsetTypeDefault && async) { - HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream)); - } - else if (type == hipMemsetTypeD8 && !async){ - HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_)); - } - else if (type == hipMemsetTypeD8 && async) { - HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream)); - } - else if (type == hipMemsetTypeD16 && !async) { - HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T))); - } - else if (type == hipMemsetTypeD16 && async) { - HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream)); - } - else if (type == hipMemsetTypeD32 && !async) { - HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T))); - } - else if (type == hipMemsetTypeD32 && async) { - HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream)); - } - } - if (async) { - HIPCHECK(hipStreamSynchronize(stream)); - } else { - HIPCHECK(hipDeviceSynchronize()); - } - - auto end = chrono::high_resolution_clock::now(); - - HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) ); - - for (int i = 0; i < bufSize_ / sizeof(T); i++) { - if (A_h[i] != memsetval) { - cout << "mismatch at index " << i << " computed: " << static_cast (A_h[i]) - << ", memsetval: " << static_cast (memsetval) << endl; - break; - } - } - - HIPCHECK(hipFree(A_d)); - free(A_h); - - auto diff = std::chrono::duration(end - start); - auto sec = diff.count(); - - auto perf = static_cast((bufSize_ * NUM_ITER * (double)(1e-09)) / sec); - - cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4) - << " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl; -} - -template -void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) { - - bufSize_ = sizeList[test % num_sizes_]; - - size_t numH = bufSize_; - size_t numW = bufSize_; - size_t pitch_A; - size_t width = numW * sizeof(char); - size_t sizeElements = width * numH; - size_t elements = numW* numH; - - T * A_h; - T * A_d; - - HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width , - numH)); - A_h = reinterpret_cast(malloc(sizeElements)); - - for (size_t i=0; i < elements; i++) { - A_h[i] = 1; - } - - hipStream_t stream; - HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); - - // Warm-up - if (async) { - HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream)); - HIPCHECK(hipStreamSynchronize(stream)); - } else { - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH)); - HIPCHECK(hipDeviceSynchronize()); - } - - auto start = chrono::steady_clock::now(); - - for (uint i = 0; i < NUM_ITER; i++) { - if (type == hipMemsetTypeDefault && !async) { - HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH)); - } - else if (type == hipMemsetTypeDefault && async) { - HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream)); - } - } - - if (async) { - HIPCHECK(hipStreamSynchronize(stream)); - } else { - HIPCHECK(hipDeviceSynchronize()); - } - - auto end = chrono::steady_clock::now(); - - HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH, - hipMemcpyDeviceToHost)); - - for (int i=0; i < elements; i++) { - if (A_h[i] != memsetval) { - cout << "mismatch at index " << i << " computed: " << static_cast (A_h[i]) - << ", memsetval: " << static_cast (memsetval) << endl; - break; - } - } - - chrono::duration diff = end - start; - - auto sec = diff.count(); - - auto perf = static_cast((sizeElements* NUM_ITER * (double)(1e-09)) / sec); - - cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] " - << " " << "(GB/s) for " << setw(5) << bufSize_ - << " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl; - - HIPCHECK(hipStreamDestroy(stream)); - HIPCHECK(hipFree(A_d)); - free(A_h); -} - -template -void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) { - - bufSize_ = sizeList[test % num_sizes_]; - - size_t numH = bufSize_; - size_t numW = bufSize_; - size_t depth = 10; - size_t width = numW * sizeof(char); - size_t sizeElements = width * numH * depth; - size_t elements = numW* numH* depth; - - hipStream_t stream; - HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); - - T *A_h; - - hipExtent extent = make_hipExtent(width, numH, depth); - hipPitchedPtr devPitchedPtr; - - HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); - A_h = (char*)malloc(sizeElements); - HIPASSERT(A_h != NULL); - - for (size_t i=0; i (A_h[i]) - << ", memsetval: " << static_cast (memsetval) << endl; - break; - } - } - - chrono::duration diff = end - start; - - auto sec = diff.count(); - - auto perf = static_cast((sizeElements * NUM_ITER * (double)(1e-09)) / sec); - - cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " " - << "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5) - << bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl; - HIPCHECK(hipFree(devPitchedPtr.ptr)); - free(A_h); -} - -int main() { - hipPerfMemset hipPerfMemset; - - dataType pattern; - int deviceId = 0; - hipPerfMemset.open(deviceId); - MemsetType type; - - int numTests = hipPerfMemset.getNumTests(); - int numTests2D = hipPerfMemset.getNumTests2D(); - int numTests3D = hipPerfMemset.getNumTests3D(); - - - cout << "--------------------- 1D buffer -------------------" << endl; - bool async= false; - for (uint i = 0; i < 2 ; i++) { - cout << endl; - - for (auto testCase = 0; testCase < numTests; testCase++) { - if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) { - cout << "API: hipMemsetD8" << (async ? "Async " : " "); - hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async); - } - - else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) { - cout << "API: hipMemsetD16" << (async ? "Async" : " "); - hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async); - } - - else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) { - cout << "API: hipMemsetD32" << (async ? "Async" : " "); - hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async); - } - - else { - cout << "API: hipMemset" << (async ? "Async " : " "); - hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async); - } - } - async = true; - } - - cout << endl; - cout << "------------------ 2D buffer arrays ---------------" << endl; - - async = false; - for (uint i = 0; i < 2; i++) { - cout << endl; - for (uint test = 0; test < numTests2D; test++) { - hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async); - } - async = true; - } - - cout << endl; - cout << "------------------ 3D buffer arrays ---------------" << endl; - - async = false; - for (uint i = 0; i < 2; i++) { - cout << endl; - for (uint test =0; test < numTests3D; test++) { - hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async); - } - async = true; - } - - passed(); -} +/* + Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* HIT_START + * BUILD: %t %s ../../src/test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#include +#include + +static unsigned int sizeList[] = { + 256, 512, 1024, 2048, 4096, 8192, +}; + +static unsigned int eleNumList[] = { + 0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, + 0x200000, 0x400000, 0x800000, 0x1000000 +}; + +typedef struct _dataType { +char memsetval = 0x42; +char memsetD8val = 0xDE; +int16_t memsetD16val = 0xDEAD; +int memsetD32val = 0xDEADBEEF; +}dataType; + +#define NUM_ITER 1000 + +enum MemsetType { + hipMemsetTypeDefault, + hipMemsetTypeD8, + hipMemsetTypeD16, + hipMemsetTypeD32, + hipMemsetTypeMax + +}; + +using namespace std; + +class hipPerfMemset { + private: + uint64_t bufSize_; + unsigned int num_elements_; + unsigned int testNumEle_; + unsigned int _numSubTests = 0; + unsigned int _numSubTests2D = 0; + unsigned int _numSubTests3D = 0; + unsigned int num_sizes_ =0; + + public: + hipPerfMemset() { + num_elements_ = sizeof(eleNumList) / sizeof(unsigned int); + _numSubTests = num_elements_ * hipMemsetTypeMax; + + num_sizes_ = sizeof(sizeList) / sizeof(unsigned int); + _numSubTests2D = num_sizes_; + _numSubTests3D = _numSubTests2D; + }; + + ~hipPerfMemset() {}; + + void open(int deviceID); + + template + void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async); + + template + void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async); + + template + void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async); + + uint getNumTests() { + return _numSubTests; + } + + uint getNumTests2D() { + return _numSubTests2D; + } + uint getNumTests3D() { + return _numSubTests3D; + } +}; + + +void hipPerfMemset::open(int deviceId) { + int nGpu = 0; + HIPCHECK(hipGetDeviceCount(&nGpu)); + if (nGpu < 1) { + failed("No GPU!"); + } + + HIPCHECK(hipSetDevice(deviceId)); + hipDeviceProp_t props = {0}; + HIPCHECK(hipGetDeviceProperties(&props, deviceId)); + std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name + << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId + << std::endl; +} + +template +void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) { + + T * A_h; + T * A_d; + + testNumEle_ = eleNumList[test % num_elements_]; + + bufSize_ = testNumEle_ * sizeof(uint32_t); + + HIPCHECK(hipMalloc(&A_d, bufSize_)); + + A_h = reinterpret_cast (malloc(bufSize_)); + + hipStream_t stream; + HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); + + // Warm-up + if (async) { + HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + } else { + HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_)); + HIPCHECK(hipDeviceSynchronize()); + } + + auto start = chrono::high_resolution_clock::now(); + for (uint i = 0; i < NUM_ITER; i++) { + if (type == hipMemsetTypeDefault && !async) { + HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_)); + } + else if (type == hipMemsetTypeDefault && async) { + HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream)); + } + else if (type == hipMemsetTypeD8 && !async){ + HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_)); + } + else if (type == hipMemsetTypeD8 && async) { + HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream)); + } + else if (type == hipMemsetTypeD16 && !async) { + HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T))); + } + else if (type == hipMemsetTypeD16 && async) { + HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream)); + } + else if (type == hipMemsetTypeD32 && !async) { + HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T))); + } + else if (type == hipMemsetTypeD32 && async) { + HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream)); + } + } + if (async) { + HIPCHECK(hipStreamSynchronize(stream)); + } else { + HIPCHECK(hipDeviceSynchronize()); + } + + auto end = chrono::high_resolution_clock::now(); + + HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) ); + + for (int i = 0; i < bufSize_ / sizeof(T); i++) { + if (A_h[i] != memsetval) { + cout << "mismatch at index " << i << " computed: " << static_cast (A_h[i]) + << ", memsetval: " << static_cast (memsetval) << endl; + break; + } + } + + HIPCHECK(hipFree(A_d)); + free(A_h); + + auto diff = std::chrono::duration(end - start); + auto sec = diff.count(); + + auto perf = static_cast((bufSize_ * NUM_ITER * (double)(1e-09)) / sec); + + cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4) + << " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl; +} + +template +void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) { + + bufSize_ = sizeList[test % num_sizes_]; + + size_t numH = bufSize_; + size_t numW = bufSize_; + size_t pitch_A; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH; + size_t elements = numW* numH; + + T * A_h; + T * A_d; + + HIPCHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width , + numH)); + A_h = reinterpret_cast(malloc(sizeElements)); + + for (size_t i=0; i < elements; i++) { + A_h[i] = 1; + } + + hipStream_t stream; + HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); + + // Warm-up + if (async) { + HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream)); + HIPCHECK(hipStreamSynchronize(stream)); + } else { + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH)); + HIPCHECK(hipDeviceSynchronize()); + } + + auto start = chrono::steady_clock::now(); + + for (uint i = 0; i < NUM_ITER; i++) { + if (type == hipMemsetTypeDefault && !async) { + HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH)); + } + else if (type == hipMemsetTypeDefault && async) { + HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream)); + } + } + + if (async) { + HIPCHECK(hipStreamSynchronize(stream)); + } else { + HIPCHECK(hipDeviceSynchronize()); + } + + auto end = chrono::steady_clock::now(); + + HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH, + hipMemcpyDeviceToHost)); + + for (int i=0; i < elements; i++) { + if (A_h[i] != memsetval) { + cout << "mismatch at index " << i << " computed: " << static_cast (A_h[i]) + << ", memsetval: " << static_cast (memsetval) << endl; + break; + } + } + + chrono::duration diff = end - start; + + auto sec = diff.count(); + + auto perf = static_cast((sizeElements* NUM_ITER * (double)(1e-09)) / sec); + + cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] " + << " " << "(GB/s) for " << setw(5) << bufSize_ + << " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl; + + HIPCHECK(hipStreamDestroy(stream)); + HIPCHECK(hipFree(A_d)); + free(A_h); +} + +template +void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) { + + bufSize_ = sizeList[test % num_sizes_]; + + size_t numH = bufSize_; + size_t numW = bufSize_; + size_t depth = 10; + size_t width = numW * sizeof(char); + size_t sizeElements = width * numH * depth; + size_t elements = numW* numH* depth; + + hipStream_t stream; + HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); + + T *A_h; + + hipExtent extent = make_hipExtent(width, numH, depth); + hipPitchedPtr devPitchedPtr; + + HIPCHECK(hipMalloc3D(&devPitchedPtr, extent)); + A_h = (char*)malloc(sizeElements); + HIPASSERT(A_h != NULL); + + for (size_t i=0; i (A_h[i]) + << ", memsetval: " << static_cast (memsetval) << endl; + break; + } + } + + chrono::duration diff = end - start; + + auto sec = diff.count(); + + auto perf = static_cast((sizeElements * NUM_ITER * (double)(1e-09)) / sec); + + cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " " + << "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5) + << bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl; + HIPCHECK(hipFree(devPitchedPtr.ptr)); + free(A_h); +} + +int main() { + hipPerfMemset hipPerfMemset; + + dataType pattern; + int deviceId = 0; + hipPerfMemset.open(deviceId); + MemsetType type; + + int numTests = hipPerfMemset.getNumTests(); + int numTests2D = hipPerfMemset.getNumTests2D(); + int numTests3D = hipPerfMemset.getNumTests3D(); + + + cout << "--------------------- 1D buffer -------------------" << endl; + bool async= false; + for (uint i = 0; i < 2 ; i++) { + cout << endl; + + for (auto testCase = 0; testCase < numTests; testCase++) { + if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) { + cout << "API: hipMemsetD8" << (async ? "Async " : " "); + hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async); + } + + else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) { + cout << "API: hipMemsetD16" << (async ? "Async" : " "); + hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async); + } + + else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) { + cout << "API: hipMemsetD32" << (async ? "Async" : " "); + hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async); + } + + else { + cout << "API: hipMemset" << (async ? "Async " : " "); + hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async); + } + } + async = true; + } + + cout << endl; + cout << "------------------ 2D buffer arrays ---------------" << endl; + + async = false; + for (uint i = 0; i < 2; i++) { + cout << endl; + for (uint test = 0; test < numTests2D; test++) { + hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async); + } + async = true; + } + + cout << endl; + cout << "------------------ 3D buffer arrays ---------------" << endl; + + async = false; + for (uint i = 0; i < 2; i++) { + cout << endl; + for (uint test =0; test < numTests3D; test++) { + hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async); + } + async = true; + } + + passed(); +} diff --git a/projects/hip-tests/samples/README.md b/projects/hip-tests/samples/README.md index 709eee3e99..dcf178c883 100644 --- a/projects/hip-tests/samples/README.md +++ b/projects/hip-tests/samples/README.md @@ -41,4 +41,4 @@ cmake ../samples make package_samples -## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt. +## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.