SWDEV-472723 - Correct file format and remove trailing spaces
Change-Id: Ie40c763e9391fa36d6c890cd0a171659a1502a83
[ROCm/hip-tests commit: 5d042c80fa]
이 커밋은 다음에 포함됨:
@@ -0,0 +1,20 @@
|
||||
# Set the default behavior, in case people don't have core.autolf set.
|
||||
* text=auto
|
||||
|
||||
# Explicitly declare text files you want to always be normalized and converted
|
||||
# to have LF line endings on checkout.
|
||||
*.c text eol=lf
|
||||
*.cpp text eol=lf
|
||||
*.cc text eol=lf
|
||||
*.h text eol=lf
|
||||
*.hpp text eol=lf
|
||||
*.txt text eol=lf
|
||||
|
||||
# Define files to support auto-remove trailing white space
|
||||
# Need to run the command below, before add modified file(s) to the staging area
|
||||
# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"'
|
||||
*.cpp filter=trimspace
|
||||
*.c filter=trimspace
|
||||
*.h filter=trimspacecpp
|
||||
*.hpp filter=trimspace
|
||||
*.md filter=trimspace
|
||||
@@ -180,7 +180,7 @@ hipcc <path_to_test.cpp> -I<HIP_SRC_DIR>/tests/catch/include <HIP_SRC_DIR>/tests
|
||||
## Debugging support
|
||||
Catch2 allows multiple ways in which you can debug the test case.
|
||||
- `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
|
||||
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
|
||||
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
|
||||
- User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
|
||||
- User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
|
||||
|
||||
|
||||
@@ -1,119 +1,119 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
// Test case to validate atomicInc and atomicDec functions.
|
||||
// if TestToRun=1, then atomicInc function will be tested and validated
|
||||
// if TestToRun=2, then atomicDec function will be tested and validated.
|
||||
|
||||
|
||||
// kernel function for atomicInc
|
||||
static __global__ void AtomicCheckInc(int* g_ptr) {
|
||||
atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
|
||||
}
|
||||
|
||||
// kernel function for atomicDec
|
||||
static __global__ void AtomicCheckDec(int* g_ptr) {
|
||||
atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
|
||||
}
|
||||
|
||||
// verify results for atomicInc
|
||||
static int verifyResultInc(int value) {
|
||||
int limit = 17;
|
||||
value = (value >= limit) ? 0 : value + 1;
|
||||
return value;
|
||||
}
|
||||
|
||||
// verify results for atomicDec
|
||||
static int verifyResultDec(int value) {
|
||||
int limit = 25;
|
||||
value = ((value == 0) || (value > limit)) ? limit : value - 1;
|
||||
return value;
|
||||
}
|
||||
|
||||
// common fuction to launch atomic functions kernel.
|
||||
static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
|
||||
unsigned int memSize = sizeof(int) * 1;
|
||||
int *dptr{nullptr};
|
||||
// allocate device memory
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
|
||||
// copy host memory to device
|
||||
HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
|
||||
// launch kernel function
|
||||
if (TestToRun == 1) {
|
||||
AtomicCheckInc<<<1, 1>>>(dptr);
|
||||
} else if (TestToRun == 2) {
|
||||
AtomicCheckDec<<<1, 1>>>(dptr);
|
||||
}
|
||||
// copy back from device to host
|
||||
HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
|
||||
// verify the results.
|
||||
if (TestToRun == 1) {
|
||||
int result = verifyResultInc(val);
|
||||
REQUIRE(result == Hptr[0]);
|
||||
} else if (TestToRun == 2) {
|
||||
int result = verifyResultDec(val);
|
||||
REQUIRE(result == Hptr[0]);
|
||||
}
|
||||
// Cleanup memory
|
||||
HIP_CHECK(hipFree(dptr));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_AtomicFunctions_Inc") {
|
||||
int *Hptr{nullptr};
|
||||
int val;
|
||||
// Allocate Host memory
|
||||
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
||||
SECTION("Test case when value is lesser than limit") {
|
||||
val = Hptr[0] = 10;
|
||||
launchAtomicFunction(Hptr, val, 1);
|
||||
}
|
||||
SECTION("Test case when value is greater than limit") {
|
||||
val = Hptr[0] = 20;
|
||||
launchAtomicFunction(Hptr, val, 1);
|
||||
}
|
||||
SECTION("Test case when value is equal to the limit") {
|
||||
val = Hptr[0] = 17;
|
||||
launchAtomicFunction(Hptr, val, 1);
|
||||
}
|
||||
free(Hptr);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_AtomicFunctions_Dec") {
|
||||
int *Hptr{nullptr};
|
||||
int val;
|
||||
// Allocate Host memory
|
||||
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
||||
SECTION("Test case when value is less than limit") {
|
||||
val = Hptr[0] = 4;
|
||||
launchAtomicFunction(Hptr, val, 2);
|
||||
}
|
||||
SECTION("Test case when value is greater than limit") {
|
||||
val = Hptr[0] = 31;
|
||||
launchAtomicFunction(Hptr, val, 2);
|
||||
}
|
||||
SECTION("Test case when value is equal to the limit") {
|
||||
val = Hptr[0] = 25;
|
||||
launchAtomicFunction(Hptr, val, 2);
|
||||
}
|
||||
free(Hptr);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
// Test case to validate atomicInc and atomicDec functions.
|
||||
// if TestToRun=1, then atomicInc function will be tested and validated
|
||||
// if TestToRun=2, then atomicDec function will be tested and validated.
|
||||
|
||||
|
||||
// kernel function for atomicInc
|
||||
static __global__ void AtomicCheckInc(int* g_ptr) {
|
||||
atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
|
||||
}
|
||||
|
||||
// kernel function for atomicDec
|
||||
static __global__ void AtomicCheckDec(int* g_ptr) {
|
||||
atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
|
||||
}
|
||||
|
||||
// verify results for atomicInc
|
||||
static int verifyResultInc(int value) {
|
||||
int limit = 17;
|
||||
value = (value >= limit) ? 0 : value + 1;
|
||||
return value;
|
||||
}
|
||||
|
||||
// verify results for atomicDec
|
||||
static int verifyResultDec(int value) {
|
||||
int limit = 25;
|
||||
value = ((value == 0) || (value > limit)) ? limit : value - 1;
|
||||
return value;
|
||||
}
|
||||
|
||||
// common fuction to launch atomic functions kernel.
|
||||
static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
|
||||
unsigned int memSize = sizeof(int) * 1;
|
||||
int *dptr{nullptr};
|
||||
// allocate device memory
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
|
||||
// copy host memory to device
|
||||
HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
|
||||
// launch kernel function
|
||||
if (TestToRun == 1) {
|
||||
AtomicCheckInc<<<1, 1>>>(dptr);
|
||||
} else if (TestToRun == 2) {
|
||||
AtomicCheckDec<<<1, 1>>>(dptr);
|
||||
}
|
||||
// copy back from device to host
|
||||
HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
|
||||
// verify the results.
|
||||
if (TestToRun == 1) {
|
||||
int result = verifyResultInc(val);
|
||||
REQUIRE(result == Hptr[0]);
|
||||
} else if (TestToRun == 2) {
|
||||
int result = verifyResultDec(val);
|
||||
REQUIRE(result == Hptr[0]);
|
||||
}
|
||||
// Cleanup memory
|
||||
HIP_CHECK(hipFree(dptr));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_AtomicFunctions_Inc") {
|
||||
int *Hptr{nullptr};
|
||||
int val;
|
||||
// Allocate Host memory
|
||||
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
||||
SECTION("Test case when value is lesser than limit") {
|
||||
val = Hptr[0] = 10;
|
||||
launchAtomicFunction(Hptr, val, 1);
|
||||
}
|
||||
SECTION("Test case when value is greater than limit") {
|
||||
val = Hptr[0] = 20;
|
||||
launchAtomicFunction(Hptr, val, 1);
|
||||
}
|
||||
SECTION("Test case when value is equal to the limit") {
|
||||
val = Hptr[0] = 17;
|
||||
launchAtomicFunction(Hptr, val, 1);
|
||||
}
|
||||
free(Hptr);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_AtomicFunctions_Dec") {
|
||||
int *Hptr{nullptr};
|
||||
int val;
|
||||
// Allocate Host memory
|
||||
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
||||
SECTION("Test case when value is less than limit") {
|
||||
val = Hptr[0] = 4;
|
||||
launchAtomicFunction(Hptr, val, 2);
|
||||
}
|
||||
SECTION("Test case when value is greater than limit") {
|
||||
val = Hptr[0] = 31;
|
||||
launchAtomicFunction(Hptr, val, 2);
|
||||
}
|
||||
SECTION("Test case when value is equal to the limit") {
|
||||
val = Hptr[0] = 25;
|
||||
launchAtomicFunction(Hptr, val, 2);
|
||||
}
|
||||
free(Hptr);
|
||||
}
|
||||
|
||||
@@ -1,81 +1,81 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void double_precision_intrinsics() {
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_rd(0.0, 1.0);
|
||||
#endif
|
||||
__dadd_rn(0.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_ru(0.0, 1.0);
|
||||
__dadd_rz(0.0, 1.0);
|
||||
__ddiv_rd(0.0, 1.0);
|
||||
#endif
|
||||
__ddiv_rn(0.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__ddiv_ru(0.0, 1.0);
|
||||
__ddiv_rz(0.0, 1.0);
|
||||
__dmul_rd(1.0, 2.0);
|
||||
#endif
|
||||
__dmul_rn(1.0, 2.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dmul_ru(1.0, 2.0);
|
||||
__dmul_rz(1.0, 2.0);
|
||||
__drcp_rd(2.0);
|
||||
#endif
|
||||
__drcp_rn(2.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__drcp_ru(2.0);
|
||||
__drcp_rz(2.0);
|
||||
__dsqrt_rd(4.0);
|
||||
#endif
|
||||
__dsqrt_rn(4.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dsqrt_ru(4.0);
|
||||
__dsqrt_rz(4.0);
|
||||
__dsub_rd(2.0, 1.0);
|
||||
#endif
|
||||
__dsub_rn(2.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dsub_ru(2.0, 1.0);
|
||||
__dsub_rz(2.0, 1.0);
|
||||
__fma_rd(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
__fma_rn(1.0, 2.0, 3.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fma_ru(1.0, 2.0, 3.0);
|
||||
__fma_rz(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__global__ void compileDoublePrecisionIntrinsics(int) {
|
||||
double_precision_intrinsics();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_DoublePrecisionIntrinsics") {
|
||||
hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void double_precision_intrinsics() {
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_rd(0.0, 1.0);
|
||||
#endif
|
||||
__dadd_rn(0.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_ru(0.0, 1.0);
|
||||
__dadd_rz(0.0, 1.0);
|
||||
__ddiv_rd(0.0, 1.0);
|
||||
#endif
|
||||
__ddiv_rn(0.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__ddiv_ru(0.0, 1.0);
|
||||
__ddiv_rz(0.0, 1.0);
|
||||
__dmul_rd(1.0, 2.0);
|
||||
#endif
|
||||
__dmul_rn(1.0, 2.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dmul_ru(1.0, 2.0);
|
||||
__dmul_rz(1.0, 2.0);
|
||||
__drcp_rd(2.0);
|
||||
#endif
|
||||
__drcp_rn(2.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__drcp_ru(2.0);
|
||||
__drcp_rz(2.0);
|
||||
__dsqrt_rd(4.0);
|
||||
#endif
|
||||
__dsqrt_rn(4.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dsqrt_ru(4.0);
|
||||
__dsqrt_rz(4.0);
|
||||
__dsub_rd(2.0, 1.0);
|
||||
#endif
|
||||
__dsub_rn(2.0, 1.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dsub_ru(2.0, 1.0);
|
||||
__dsub_rz(2.0, 1.0);
|
||||
__fma_rd(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
__fma_rn(1.0, 2.0, 3.0);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fma_ru(1.0, 2.0, 3.0);
|
||||
__fma_rz(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__global__ void compileDoublePrecisionIntrinsics(int) {
|
||||
double_precision_intrinsics();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_DoublePrecisionIntrinsics") {
|
||||
hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
|
||||
@@ -1,133 +1,133 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void double_precision_math_functions() {
|
||||
int iX;
|
||||
double fX, fY;
|
||||
|
||||
acos(1.0);
|
||||
acosh(1.0);
|
||||
asin(0.0);
|
||||
asinh(0.0);
|
||||
atan(0.0);
|
||||
atan2(0.0, 1.0);
|
||||
atanh(0.0);
|
||||
cbrt(0.0);
|
||||
ceil(0.0);
|
||||
copysign(1.0, -2.0);
|
||||
cos(0.0);
|
||||
cosh(0.0);
|
||||
cospi(0.0);
|
||||
cyl_bessel_i0(0.0);
|
||||
cyl_bessel_i1(0.0);
|
||||
erf(0.0);
|
||||
erfc(0.0);
|
||||
erfcinv(2.0);
|
||||
erfcx(0.0);
|
||||
erfinv(1.0);
|
||||
exp(0.0);
|
||||
exp10(0.0);
|
||||
exp2(0.0);
|
||||
expm1(0.0);
|
||||
fabs(1.0);
|
||||
fdim(1.0, 0.0);
|
||||
floor(0.0);
|
||||
fma(1.0, 2.0, 3.0);
|
||||
fmax(0.0, 0.0);
|
||||
fmin(0.0, 0.0);
|
||||
fmod(0.0, 1.0);
|
||||
frexp(0.0, &iX);
|
||||
hypot(1.0, 0.0);
|
||||
ilogb(1.0);
|
||||
isfinite(0.0);
|
||||
isinf(0.0);
|
||||
isnan(0.0);
|
||||
j0(0.0);
|
||||
j1(0.0);
|
||||
jn(-1.0, 1.0);
|
||||
ldexp(0.0, 0);
|
||||
lgamma(1.0);
|
||||
llrint(0.0);
|
||||
llround(0.0);
|
||||
log(1.0);
|
||||
log10(1.0);
|
||||
log1p(-1.0);
|
||||
log2(1.0);
|
||||
logb(1.0);
|
||||
lrint(0.0);
|
||||
lround(0.0);
|
||||
modf(0.0, &fX);
|
||||
nan("1");
|
||||
nearbyint(0.0);
|
||||
nextafter(0.0, 0.0);
|
||||
fX = 1.0;
|
||||
norm(1, &fX);
|
||||
norm3d(1.0, 0.0, 0.0);
|
||||
norm4d(1.0, 0.0, 0.0, 0.0);
|
||||
normcdf(0.0);
|
||||
normcdfinv(1.0);
|
||||
pow(1.0, 0.0);
|
||||
rcbrt(1.0);
|
||||
remainder(2.0, 1.0);
|
||||
remquo(1.0, 2.0, &iX);
|
||||
rhypot(0.0, 1.0);
|
||||
rint(1.0);
|
||||
fX = 1.0;
|
||||
rnorm(1, &fX);
|
||||
rnorm3d(0.0, 0.0, 1.0);
|
||||
rnorm4d(0.0, 0.0, 0.0, 1.0);
|
||||
round(0.0);
|
||||
rsqrt(1.0);
|
||||
scalbln(0.0, 1);
|
||||
scalbn(0.0, 1);
|
||||
signbit(1.0);
|
||||
sin(0.0);
|
||||
#if HT_AMD
|
||||
// NV A100 has a bug in sincos(), so temporarily disbale it
|
||||
sincos(0.0, &fX, &fY);
|
||||
#endif
|
||||
sincospi(0.0, &fX, &fY);
|
||||
sinh(0.0);
|
||||
sinpi(0.0);
|
||||
sqrt(0.0);
|
||||
tan(0.0);
|
||||
tanh(0.0);
|
||||
tgamma(2.0);
|
||||
trunc(0.0);
|
||||
y0(1.0);
|
||||
y1(1.0);
|
||||
yn(1, 1.0);
|
||||
}
|
||||
|
||||
__global__ void compileDoublePrecisionMathOnDevice(int) {
|
||||
double_precision_math_functions();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_DoublePrecisionMathDevice") {
|
||||
hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void double_precision_math_functions() {
|
||||
int iX;
|
||||
double fX, fY;
|
||||
|
||||
acos(1.0);
|
||||
acosh(1.0);
|
||||
asin(0.0);
|
||||
asinh(0.0);
|
||||
atan(0.0);
|
||||
atan2(0.0, 1.0);
|
||||
atanh(0.0);
|
||||
cbrt(0.0);
|
||||
ceil(0.0);
|
||||
copysign(1.0, -2.0);
|
||||
cos(0.0);
|
||||
cosh(0.0);
|
||||
cospi(0.0);
|
||||
cyl_bessel_i0(0.0);
|
||||
cyl_bessel_i1(0.0);
|
||||
erf(0.0);
|
||||
erfc(0.0);
|
||||
erfcinv(2.0);
|
||||
erfcx(0.0);
|
||||
erfinv(1.0);
|
||||
exp(0.0);
|
||||
exp10(0.0);
|
||||
exp2(0.0);
|
||||
expm1(0.0);
|
||||
fabs(1.0);
|
||||
fdim(1.0, 0.0);
|
||||
floor(0.0);
|
||||
fma(1.0, 2.0, 3.0);
|
||||
fmax(0.0, 0.0);
|
||||
fmin(0.0, 0.0);
|
||||
fmod(0.0, 1.0);
|
||||
frexp(0.0, &iX);
|
||||
hypot(1.0, 0.0);
|
||||
ilogb(1.0);
|
||||
isfinite(0.0);
|
||||
isinf(0.0);
|
||||
isnan(0.0);
|
||||
j0(0.0);
|
||||
j1(0.0);
|
||||
jn(-1.0, 1.0);
|
||||
ldexp(0.0, 0);
|
||||
lgamma(1.0);
|
||||
llrint(0.0);
|
||||
llround(0.0);
|
||||
log(1.0);
|
||||
log10(1.0);
|
||||
log1p(-1.0);
|
||||
log2(1.0);
|
||||
logb(1.0);
|
||||
lrint(0.0);
|
||||
lround(0.0);
|
||||
modf(0.0, &fX);
|
||||
nan("1");
|
||||
nearbyint(0.0);
|
||||
nextafter(0.0, 0.0);
|
||||
fX = 1.0;
|
||||
norm(1, &fX);
|
||||
norm3d(1.0, 0.0, 0.0);
|
||||
norm4d(1.0, 0.0, 0.0, 0.0);
|
||||
normcdf(0.0);
|
||||
normcdfinv(1.0);
|
||||
pow(1.0, 0.0);
|
||||
rcbrt(1.0);
|
||||
remainder(2.0, 1.0);
|
||||
remquo(1.0, 2.0, &iX);
|
||||
rhypot(0.0, 1.0);
|
||||
rint(1.0);
|
||||
fX = 1.0;
|
||||
rnorm(1, &fX);
|
||||
rnorm3d(0.0, 0.0, 1.0);
|
||||
rnorm4d(0.0, 0.0, 0.0, 1.0);
|
||||
round(0.0);
|
||||
rsqrt(1.0);
|
||||
scalbln(0.0, 1);
|
||||
scalbn(0.0, 1);
|
||||
signbit(1.0);
|
||||
sin(0.0);
|
||||
#if HT_AMD
|
||||
// NV A100 has a bug in sincos(), so temporarily disbale it
|
||||
sincos(0.0, &fX, &fY);
|
||||
#endif
|
||||
sincospi(0.0, &fX, &fY);
|
||||
sinh(0.0);
|
||||
sinpi(0.0);
|
||||
sqrt(0.0);
|
||||
tan(0.0);
|
||||
tanh(0.0);
|
||||
tgamma(2.0);
|
||||
trunc(0.0);
|
||||
y0(1.0);
|
||||
y1(1.0);
|
||||
yn(1, 1.0);
|
||||
}
|
||||
|
||||
__global__ void compileDoublePrecisionMathOnDevice(int) {
|
||||
double_precision_math_functions();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_DoublePrecisionMathDevice") {
|
||||
hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
|
||||
@@ -1,117 +1,117 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <cmath>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__host__ static void double_precision_math_functions() {
|
||||
int iX;
|
||||
double fX, fY;
|
||||
|
||||
acos(1.0);
|
||||
acosh(1.0);
|
||||
asin(0.0);
|
||||
asinh(0.0);
|
||||
atan(0.0);
|
||||
atan2(0.0, 1.0);
|
||||
atanh(0.0);
|
||||
cbrt(0.0);
|
||||
ceil(0.0);
|
||||
copysign(1.0, -2.0);
|
||||
cos(0.0);
|
||||
cosh(0.0);
|
||||
erf(0.0);
|
||||
erfc(0.0);
|
||||
exp(0.0);
|
||||
#ifdef __unix__
|
||||
exp10(0.0);
|
||||
#endif
|
||||
exp2(0.0);
|
||||
expm1(0.0);
|
||||
fabs(1.0);
|
||||
fdim(1.0, 0.0);
|
||||
floor(0.0);
|
||||
fma(1.0, 2.0, 3.0);
|
||||
fmax(0.0, 0.0);
|
||||
fmin(0.0, 0.0);
|
||||
fmod(0.0, 1.0);
|
||||
frexp(0.0, &iX);
|
||||
hypot(1.0, 0.0);
|
||||
ilogb(1.0);
|
||||
std::isfinite(0.0);
|
||||
std::isinf(0.0);
|
||||
std::isnan(0.0);
|
||||
#ifdef __unix__
|
||||
j0(0.0);
|
||||
j1(0.0);
|
||||
jn(-1.0, 1.0);
|
||||
#elif _WIN64
|
||||
_j0(0.0);
|
||||
_j1(0.0);
|
||||
_jn(-1.0, 1.0);
|
||||
#endif
|
||||
ldexp(0.0, 0);
|
||||
llrint(0.0);
|
||||
llround(0.0);
|
||||
log(1.0);
|
||||
log10(1.0);
|
||||
log1p(-1.0);
|
||||
log2(1.0);
|
||||
logb(1.0);
|
||||
lrint(0.0);
|
||||
lround(0.0);
|
||||
modf(0.0, &fX);
|
||||
nan("1");
|
||||
nearbyint(0.0);
|
||||
fX = 1.0;
|
||||
pow(1.0, 0.0);
|
||||
remainder(2.0, 1.0);
|
||||
remquo(1.0, 2.0, &iX);
|
||||
rint(1.0);
|
||||
round(0.0);
|
||||
scalbln(0.0, 1);
|
||||
scalbn(0.0, 1);
|
||||
std::signbit(1.0);
|
||||
sin(0.0);
|
||||
#ifdef _unix__
|
||||
sincos(0.0, &fX, &fY);
|
||||
#endif
|
||||
sinh(0.0);
|
||||
sqrt(0.0);
|
||||
tan(0.0);
|
||||
tanh(0.0);
|
||||
tgamma(2.0);
|
||||
trunc(0.0);
|
||||
#ifdef __unix__
|
||||
y0(1.0);
|
||||
y1(1.0);
|
||||
yn(1, 1.0);
|
||||
#elif _WIN64
|
||||
_y0(1.0);
|
||||
_y1(1.0);
|
||||
_yn(1, 1.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_DoublePrecisionMathHost") {
|
||||
double_precision_math_functions();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <cmath>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__host__ static void double_precision_math_functions() {
|
||||
int iX;
|
||||
double fX, fY;
|
||||
|
||||
acos(1.0);
|
||||
acosh(1.0);
|
||||
asin(0.0);
|
||||
asinh(0.0);
|
||||
atan(0.0);
|
||||
atan2(0.0, 1.0);
|
||||
atanh(0.0);
|
||||
cbrt(0.0);
|
||||
ceil(0.0);
|
||||
copysign(1.0, -2.0);
|
||||
cos(0.0);
|
||||
cosh(0.0);
|
||||
erf(0.0);
|
||||
erfc(0.0);
|
||||
exp(0.0);
|
||||
#ifdef __unix__
|
||||
exp10(0.0);
|
||||
#endif
|
||||
exp2(0.0);
|
||||
expm1(0.0);
|
||||
fabs(1.0);
|
||||
fdim(1.0, 0.0);
|
||||
floor(0.0);
|
||||
fma(1.0, 2.0, 3.0);
|
||||
fmax(0.0, 0.0);
|
||||
fmin(0.0, 0.0);
|
||||
fmod(0.0, 1.0);
|
||||
frexp(0.0, &iX);
|
||||
hypot(1.0, 0.0);
|
||||
ilogb(1.0);
|
||||
std::isfinite(0.0);
|
||||
std::isinf(0.0);
|
||||
std::isnan(0.0);
|
||||
#ifdef __unix__
|
||||
j0(0.0);
|
||||
j1(0.0);
|
||||
jn(-1.0, 1.0);
|
||||
#elif _WIN64
|
||||
_j0(0.0);
|
||||
_j1(0.0);
|
||||
_jn(-1.0, 1.0);
|
||||
#endif
|
||||
ldexp(0.0, 0);
|
||||
llrint(0.0);
|
||||
llround(0.0);
|
||||
log(1.0);
|
||||
log10(1.0);
|
||||
log1p(-1.0);
|
||||
log2(1.0);
|
||||
logb(1.0);
|
||||
lrint(0.0);
|
||||
lround(0.0);
|
||||
modf(0.0, &fX);
|
||||
nan("1");
|
||||
nearbyint(0.0);
|
||||
fX = 1.0;
|
||||
pow(1.0, 0.0);
|
||||
remainder(2.0, 1.0);
|
||||
remquo(1.0, 2.0, &iX);
|
||||
rint(1.0);
|
||||
round(0.0);
|
||||
scalbln(0.0, 1);
|
||||
scalbn(0.0, 1);
|
||||
std::signbit(1.0);
|
||||
sin(0.0);
|
||||
#ifdef _unix__
|
||||
sincos(0.0, &fX, &fY);
|
||||
#endif
|
||||
sinh(0.0);
|
||||
sqrt(0.0);
|
||||
tan(0.0);
|
||||
tanh(0.0);
|
||||
tgamma(2.0);
|
||||
trunc(0.0);
|
||||
#ifdef __unix__
|
||||
y0(1.0);
|
||||
y1(1.0);
|
||||
yn(1, 1.0);
|
||||
#elif _WIN64
|
||||
_y0(1.0);
|
||||
_y1(1.0);
|
||||
_yn(1, 1.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_DoublePrecisionMathHost") {
|
||||
double_precision_math_functions();
|
||||
}
|
||||
|
||||
@@ -1,128 +1,128 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
__device__ void FloatMathPrecise() {
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
fX = ceilf(0.0f);
|
||||
fX = copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
cospif(0.0f);
|
||||
cyl_bessel_i0f(0.0f);
|
||||
cyl_bessel_i1f(0.0f);
|
||||
erfcf(0.0f);
|
||||
erfcinvf(2.0f);
|
||||
erfcxf(0.0f);
|
||||
erff(0.0f);
|
||||
erfinvf(1.0f);
|
||||
exp10f(0.0f);
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fX = fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
fdividef(0.0f, 1.0f);
|
||||
fX = floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fX = fmaxf(0.0f, 0.0f);
|
||||
fX = fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
isfinite(0.0f);
|
||||
fX = isinf(0.0f);
|
||||
fX = isnan(0.0f);
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
ldexpf(0.0f, 0);
|
||||
lgammaf(1.0f);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
modff(0.0f, &fX);
|
||||
fX = nanf("1");
|
||||
fX = nearbyintf(0.0f);
|
||||
nextafterf(0.0f, 0.0f);
|
||||
norm3df(1.0f, 0.0f, 0.0f);
|
||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
normcdff(0.0f);
|
||||
normcdfinvf(1.0f);
|
||||
fX = 1.0f;
|
||||
normf(1, &fX);
|
||||
powf(1.0f, 0.0f);
|
||||
rcbrtf(1.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
remquof(1.0f, 2.0f, &iX);
|
||||
rhypotf(0.0f, 1.0f);
|
||||
fY = rintf(1.0f);
|
||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
fX = 1.0f;
|
||||
rnormf(1, &fX);
|
||||
fY = roundf(0.0f);
|
||||
rsqrtf(1.0f);
|
||||
scalblnf(0.0f, 1);
|
||||
scalbnf(0.0f, 1);
|
||||
signbit(1.0f);
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
sincospif(0.0f, &fX, &fY);
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sinpif(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
fY = truncf(0.0f);
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
}
|
||||
|
||||
__global__ void CompileFloatMathPrecise(int) {
|
||||
FloatMathPrecise();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_FloatMathPrecise") {
|
||||
hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
__device__ void FloatMathPrecise() {
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
fX = ceilf(0.0f);
|
||||
fX = copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
cospif(0.0f);
|
||||
cyl_bessel_i0f(0.0f);
|
||||
cyl_bessel_i1f(0.0f);
|
||||
erfcf(0.0f);
|
||||
erfcinvf(2.0f);
|
||||
erfcxf(0.0f);
|
||||
erff(0.0f);
|
||||
erfinvf(1.0f);
|
||||
exp10f(0.0f);
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fX = fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
fdividef(0.0f, 1.0f);
|
||||
fX = floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fX = fmaxf(0.0f, 0.0f);
|
||||
fX = fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
isfinite(0.0f);
|
||||
fX = isinf(0.0f);
|
||||
fX = isnan(0.0f);
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
ldexpf(0.0f, 0);
|
||||
lgammaf(1.0f);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
modff(0.0f, &fX);
|
||||
fX = nanf("1");
|
||||
fX = nearbyintf(0.0f);
|
||||
nextafterf(0.0f, 0.0f);
|
||||
norm3df(1.0f, 0.0f, 0.0f);
|
||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
normcdff(0.0f);
|
||||
normcdfinvf(1.0f);
|
||||
fX = 1.0f;
|
||||
normf(1, &fX);
|
||||
powf(1.0f, 0.0f);
|
||||
rcbrtf(1.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
remquof(1.0f, 2.0f, &iX);
|
||||
rhypotf(0.0f, 1.0f);
|
||||
fY = rintf(1.0f);
|
||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
fX = 1.0f;
|
||||
rnormf(1, &fX);
|
||||
fY = roundf(0.0f);
|
||||
rsqrtf(1.0f);
|
||||
scalblnf(0.0f, 1);
|
||||
scalbnf(0.0f, 1);
|
||||
signbit(1.0f);
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
sincospif(0.0f, &fX, &fY);
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sinpif(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
fY = truncf(0.0f);
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
}
|
||||
|
||||
__global__ void CompileFloatMathPrecise(int) {
|
||||
FloatMathPrecise();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_FloatMathPrecise") {
|
||||
hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
|
||||
@@ -1,68 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/device_functions.h>
|
||||
#include <algorithm>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void integer_intrinsics() {
|
||||
__brev((unsigned int)10);
|
||||
__brevll((uint64_t)10);
|
||||
__byte_perm((unsigned int)0, (unsigned int)0, 0);
|
||||
__clz(static_cast<int>(10));
|
||||
__clzll((int64_t)10);
|
||||
__ffs(static_cast<int>(10));
|
||||
__ffsll((long long)(10)); // NOLINT
|
||||
__funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__hadd(static_cast<int>(1), static_cast<int>(3));
|
||||
__mul24(static_cast<int>(1), static_cast<int>(2));
|
||||
__mul64hi((int64_t)1, (int64_t)2);
|
||||
__mulhi(static_cast<int>(1), static_cast<int>(2));
|
||||
__popc((unsigned int)4);
|
||||
__popcll((uint64_t)4);
|
||||
int a = min(static_cast<int>(4), static_cast<int>(5));
|
||||
int b = max(static_cast<int>(4), static_cast<int>(5));
|
||||
__rhadd(static_cast<int>(1), static_cast<int>(2));
|
||||
__sad(static_cast<int>(1), static_cast<int>(2), 0);
|
||||
__uhadd((unsigned int)1, (unsigned int)3);
|
||||
__umul24((unsigned int)1, (unsigned int)2);
|
||||
__umul64hi((uint64_t)1, (uint64_t)2);
|
||||
__umulhi((unsigned int)1, (unsigned int)2);
|
||||
__urhadd((unsigned int)1, (unsigned int)2);
|
||||
__usad((unsigned int)1, (unsigned int)2, 0);
|
||||
|
||||
assert(1);
|
||||
}
|
||||
|
||||
__global__ void compileIntegerIntrinsics(int) {
|
||||
integer_intrinsics();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_IntegerIntrinsics") {
|
||||
hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/device_functions.h>
|
||||
#include <algorithm>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void integer_intrinsics() {
|
||||
__brev((unsigned int)10);
|
||||
__brevll((uint64_t)10);
|
||||
__byte_perm((unsigned int)0, (unsigned int)0, 0);
|
||||
__clz(static_cast<int>(10));
|
||||
__clzll((int64_t)10);
|
||||
__ffs(static_cast<int>(10));
|
||||
__ffsll((long long)(10)); // NOLINT
|
||||
__funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||
__hadd(static_cast<int>(1), static_cast<int>(3));
|
||||
__mul24(static_cast<int>(1), static_cast<int>(2));
|
||||
__mul64hi((int64_t)1, (int64_t)2);
|
||||
__mulhi(static_cast<int>(1), static_cast<int>(2));
|
||||
__popc((unsigned int)4);
|
||||
__popcll((uint64_t)4);
|
||||
int a = min(static_cast<int>(4), static_cast<int>(5));
|
||||
int b = max(static_cast<int>(4), static_cast<int>(5));
|
||||
__rhadd(static_cast<int>(1), static_cast<int>(2));
|
||||
__sad(static_cast<int>(1), static_cast<int>(2), 0);
|
||||
__uhadd((unsigned int)1, (unsigned int)3);
|
||||
__umul24((unsigned int)1, (unsigned int)2);
|
||||
__umul64hi((uint64_t)1, (uint64_t)2);
|
||||
__umulhi((unsigned int)1, (unsigned int)2);
|
||||
__urhadd((unsigned int)1, (unsigned int)2);
|
||||
__usad((unsigned int)1, (unsigned int)2, 0);
|
||||
|
||||
assert(1);
|
||||
}
|
||||
|
||||
__global__ void compileIntegerIntrinsics(int) {
|
||||
integer_intrinsics();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_IntegerIntrinsics") {
|
||||
hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
|
||||
@@ -1,298 +1,298 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
using namespace std;
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Auto-Verification Code
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool verifyBitwise(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||
bool verifyBitwise(T* gpuData, int len) {
|
||||
// Atomic and
|
||||
T val = 0xff;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// 9th element should be 1
|
||||
val &= (2 * i + 7);
|
||||
}
|
||||
REQUIRE(val == gpuData[8]);
|
||||
|
||||
// atomic Or
|
||||
val = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// 10th element should be 0xff
|
||||
val |= (1 << i);
|
||||
}
|
||||
REQUIRE(val == gpuData[9]);
|
||||
|
||||
// atomic Xor
|
||||
val = 0xff;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// 11th element should be 0xff
|
||||
val ^= i;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[10]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verifySub(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename enable_if<
|
||||
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
||||
bool verifySub(T* gpuData, int len) {
|
||||
T val = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val -= 10;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[1]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verifyExch(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
|
||||
bool computeExchExch(T* gpuData, int len) {
|
||||
T val = 0;
|
||||
|
||||
for (T i = 0; i < len; ++i) {
|
||||
if (i == gpuData[2]) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool VerifyIntegral(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||
bool VerifyIntegral(T* gpuData, int len) {
|
||||
// atomic Max
|
||||
T val = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// fourth element should be len-1
|
||||
val = max(val, static_cast<T>(i));
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[3]);
|
||||
|
||||
// atomic Min
|
||||
val = 1 << 8;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val = min(val, static_cast<T>(i));
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[4]);
|
||||
|
||||
// atomic Inc
|
||||
T limit = 17;
|
||||
val = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val = (val >= limit) ? 0 : val + 1;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[5]);
|
||||
|
||||
// atomic Dec
|
||||
limit = 137;
|
||||
val = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val = ((val == 0) || (val > limit)) ? limit : val - 1;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[6]);
|
||||
|
||||
// atomic CAS
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// eighth element should be a member of [0, len)
|
||||
if (static_cast<T>(i) == gpuData[7]) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool verifyData(T* gpuData, int len) {
|
||||
T val = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val += 10;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[0]);
|
||||
return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
|
||||
}
|
||||
|
||||
__device__
|
||||
void testKernelExch(...) {}
|
||||
|
||||
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
|
||||
__device__
|
||||
void testKernelExch(T* g_odata) {
|
||||
// access thread id
|
||||
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Atomic exchange
|
||||
atomicExch(&g_odata[2], tid);
|
||||
}
|
||||
|
||||
__device__
|
||||
void testKernelSub(...) {}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename enable_if<
|
||||
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
||||
__device__
|
||||
void testKernelSub(T* g_odata) {
|
||||
// Atomic subtraction (final should be 0)
|
||||
atomicSub(&g_odata[1], 10);
|
||||
}
|
||||
|
||||
__device__
|
||||
void testKernelIntegral(...) {}
|
||||
|
||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||
__device__
|
||||
void testKernelIntegral(T* g_odata) {
|
||||
// access thread id
|
||||
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Atomic maximum
|
||||
atomicMax(&g_odata[3], tid);
|
||||
|
||||
// Atomic minimum
|
||||
atomicMin(&g_odata[4], tid);
|
||||
|
||||
// Atomic increment (modulo 17+1)
|
||||
atomicInc((unsigned int*)&g_odata[5], 17);
|
||||
|
||||
// Atomic decrement
|
||||
atomicDec((unsigned int*)&g_odata[6], 137);
|
||||
|
||||
// Atomic compare-and-swap
|
||||
atomicCAS(&g_odata[7], tid - 1, tid);
|
||||
|
||||
// Bitwise atomic instructions
|
||||
|
||||
// Atomic AND
|
||||
atomicAnd(&g_odata[8], 2 * tid + 7);
|
||||
|
||||
// Atomic OR
|
||||
atomicOr(&g_odata[9], 1 << tid);
|
||||
|
||||
// Atomic XOR
|
||||
atomicXor(&g_odata[10], tid);
|
||||
|
||||
testKernelSub(g_odata);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void testKernel(T* g_odata) {
|
||||
// Atomic addition
|
||||
atomicAdd(&g_odata[0], 10);
|
||||
testKernelIntegral(g_odata);
|
||||
testKernelExch(g_odata);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void runTest() {
|
||||
bool testResult = true;
|
||||
unsigned int numThreads = 256;
|
||||
unsigned int numBlocks = 64;
|
||||
unsigned int numData = 11;
|
||||
unsigned int memSize = sizeof(T) * numData;
|
||||
|
||||
// allocate mem for the result on host side
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
|
||||
// initialize the memory
|
||||
for (unsigned int i = 0; i < numData; i++) {
|
||||
hOData[i] = 0;
|
||||
}
|
||||
// To make the AND and XOR tests generate something other than 0...
|
||||
hOData[8] = hOData[10] = 0xff;
|
||||
|
||||
// allocate device memory for result
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(
|
||||
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
|
||||
// Compute reference solution
|
||||
REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SimpleAtomicsTest") {
|
||||
SECTION("test for int") {
|
||||
runTest<int>();
|
||||
}
|
||||
SECTION("test for unsigned int") {
|
||||
runTest<unsigned int>();
|
||||
}
|
||||
SECTION("test for float") {
|
||||
runTest<float>();
|
||||
}
|
||||
#if HT_AMD
|
||||
SECTION("test for unsigned long long") {
|
||||
runTest<uint64_t>();
|
||||
}
|
||||
SECTION("test for double") {
|
||||
runTest<double>();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
using namespace std;
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Auto-Verification Code
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool verifyBitwise(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||
bool verifyBitwise(T* gpuData, int len) {
|
||||
// Atomic and
|
||||
T val = 0xff;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// 9th element should be 1
|
||||
val &= (2 * i + 7);
|
||||
}
|
||||
REQUIRE(val == gpuData[8]);
|
||||
|
||||
// atomic Or
|
||||
val = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// 10th element should be 0xff
|
||||
val |= (1 << i);
|
||||
}
|
||||
REQUIRE(val == gpuData[9]);
|
||||
|
||||
// atomic Xor
|
||||
val = 0xff;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// 11th element should be 0xff
|
||||
val ^= i;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[10]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verifySub(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename enable_if<
|
||||
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
||||
bool verifySub(T* gpuData, int len) {
|
||||
T val = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val -= 10;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[1]);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verifyExch(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
|
||||
bool computeExchExch(T* gpuData, int len) {
|
||||
T val = 0;
|
||||
|
||||
for (T i = 0; i < len; ++i) {
|
||||
if (i == gpuData[2]) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool VerifyIntegral(...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||
bool VerifyIntegral(T* gpuData, int len) {
|
||||
// atomic Max
|
||||
T val = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// fourth element should be len-1
|
||||
val = max(val, static_cast<T>(i));
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[3]);
|
||||
|
||||
// atomic Min
|
||||
val = 1 << 8;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val = min(val, static_cast<T>(i));
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[4]);
|
||||
|
||||
// atomic Inc
|
||||
T limit = 17;
|
||||
val = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val = (val >= limit) ? 0 : val + 1;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[5]);
|
||||
|
||||
// atomic Dec
|
||||
limit = 137;
|
||||
val = 0;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val = ((val == 0) || (val > limit)) ? limit : val - 1;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[6]);
|
||||
|
||||
// atomic CAS
|
||||
for (int i = 0; i < len; ++i) {
|
||||
// eighth element should be a member of [0, len)
|
||||
if (static_cast<T>(i) == gpuData[7]) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool verifyData(T* gpuData, int len) {
|
||||
T val = 0;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
val += 10;
|
||||
}
|
||||
|
||||
REQUIRE(val == gpuData[0]);
|
||||
return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
|
||||
}
|
||||
|
||||
__device__
|
||||
void testKernelExch(...) {}
|
||||
|
||||
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
|
||||
__device__
|
||||
void testKernelExch(T* g_odata) {
|
||||
// access thread id
|
||||
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Atomic exchange
|
||||
atomicExch(&g_odata[2], tid);
|
||||
}
|
||||
|
||||
__device__
|
||||
void testKernelSub(...) {}
|
||||
|
||||
template<
|
||||
typename T,
|
||||
typename enable_if<
|
||||
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
||||
__device__
|
||||
void testKernelSub(T* g_odata) {
|
||||
// Atomic subtraction (final should be 0)
|
||||
atomicSub(&g_odata[1], 10);
|
||||
}
|
||||
|
||||
__device__
|
||||
void testKernelIntegral(...) {}
|
||||
|
||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||
__device__
|
||||
void testKernelIntegral(T* g_odata) {
|
||||
// access thread id
|
||||
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Atomic maximum
|
||||
atomicMax(&g_odata[3], tid);
|
||||
|
||||
// Atomic minimum
|
||||
atomicMin(&g_odata[4], tid);
|
||||
|
||||
// Atomic increment (modulo 17+1)
|
||||
atomicInc((unsigned int*)&g_odata[5], 17);
|
||||
|
||||
// Atomic decrement
|
||||
atomicDec((unsigned int*)&g_odata[6], 137);
|
||||
|
||||
// Atomic compare-and-swap
|
||||
atomicCAS(&g_odata[7], tid - 1, tid);
|
||||
|
||||
// Bitwise atomic instructions
|
||||
|
||||
// Atomic AND
|
||||
atomicAnd(&g_odata[8], 2 * tid + 7);
|
||||
|
||||
// Atomic OR
|
||||
atomicOr(&g_odata[9], 1 << tid);
|
||||
|
||||
// Atomic XOR
|
||||
atomicXor(&g_odata[10], tid);
|
||||
|
||||
testKernelSub(g_odata);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void testKernel(T* g_odata) {
|
||||
// Atomic addition
|
||||
atomicAdd(&g_odata[0], 10);
|
||||
testKernelIntegral(g_odata);
|
||||
testKernelExch(g_odata);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void runTest() {
|
||||
bool testResult = true;
|
||||
unsigned int numThreads = 256;
|
||||
unsigned int numBlocks = 64;
|
||||
unsigned int numData = 11;
|
||||
unsigned int memSize = sizeof(T) * numData;
|
||||
|
||||
// allocate mem for the result on host side
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
|
||||
// initialize the memory
|
||||
for (unsigned int i = 0; i < numData; i++) {
|
||||
hOData[i] = 0;
|
||||
}
|
||||
// To make the AND and XOR tests generate something other than 0...
|
||||
hOData[8] = hOData[10] = 0xff;
|
||||
|
||||
// allocate device memory for result
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(
|
||||
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
|
||||
// Compute reference solution
|
||||
REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SimpleAtomicsTest") {
|
||||
SECTION("test for int") {
|
||||
runTest<int>();
|
||||
}
|
||||
SECTION("test for unsigned int") {
|
||||
runTest<unsigned int>();
|
||||
}
|
||||
SECTION("test for float") {
|
||||
runTest<float>();
|
||||
}
|
||||
#if HT_AMD
|
||||
SECTION("test for unsigned long long") {
|
||||
runTest<uint64_t>();
|
||||
}
|
||||
SECTION("test for double") {
|
||||
runTest<double>();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1,101 +1,101 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void single_precision_intrinsics() {
|
||||
float fX, fY;
|
||||
|
||||
__cosf(0.0f);
|
||||
__exp10f(0.0f);
|
||||
__expf(0.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_rd(0.0f, 1.0f);
|
||||
#endif
|
||||
__fadd_rn(0.0f, 1.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_ru(0.0f, 1.0f);
|
||||
__fadd_rz(0.0f, 1.0f);
|
||||
__fdiv_rd(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdiv_rn(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fdiv_ru(4.0f, 2.0f);
|
||||
__fdiv_rz(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdividef(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
#endif
|
||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rz(1.0f, 2.0f, 3.0f);
|
||||
__fmul_rd(1.0f, 2.0f);
|
||||
#endif
|
||||
__fmul_rn(1.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmul_ru(1.0f, 2.0f);
|
||||
__fmul_rz(1.0f, 2.0f);
|
||||
__frcp_rd(2.0f);
|
||||
#endif
|
||||
__frcp_rn(2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__frcp_ru(2.0f);
|
||||
__frcp_rz(2.0f);
|
||||
#endif
|
||||
__frsqrt_rn(4.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsqrt_rd(4.0f);
|
||||
#endif
|
||||
__fsqrt_rn(4.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsqrt_ru(4.0f);
|
||||
__fsqrt_rz(4.0f);
|
||||
__fsub_rd(2.0f, 1.0f);
|
||||
#endif
|
||||
__fsub_rn(2.0f, 1.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsub_ru(2.0f, 1.0f);
|
||||
__fsub_rz(2.0f, 1.0f);
|
||||
#endif
|
||||
__log10f(1.0f);
|
||||
__log2f(1.0f);
|
||||
__logf(1.0f);
|
||||
__powf(1.0f, 0.0f);
|
||||
__saturatef(0.1f);
|
||||
__sincosf(0.0f, &fX, &fY);
|
||||
__sinf(0.0f);
|
||||
__tanf(0.0f);
|
||||
}
|
||||
|
||||
__global__ void compileSinglePrecisionIntrinsics(int) {
|
||||
single_precision_intrinsics();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SinglePrecisionIntrinsics") {
|
||||
hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/device_functions.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void single_precision_intrinsics() {
|
||||
float fX, fY;
|
||||
|
||||
__cosf(0.0f);
|
||||
__exp10f(0.0f);
|
||||
__expf(0.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_rd(0.0f, 1.0f);
|
||||
#endif
|
||||
__fadd_rn(0.0f, 1.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_ru(0.0f, 1.0f);
|
||||
__fadd_rz(0.0f, 1.0f);
|
||||
__fdiv_rd(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdiv_rn(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fdiv_ru(4.0f, 2.0f);
|
||||
__fdiv_rz(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdividef(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
#endif
|
||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rz(1.0f, 2.0f, 3.0f);
|
||||
__fmul_rd(1.0f, 2.0f);
|
||||
#endif
|
||||
__fmul_rn(1.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmul_ru(1.0f, 2.0f);
|
||||
__fmul_rz(1.0f, 2.0f);
|
||||
__frcp_rd(2.0f);
|
||||
#endif
|
||||
__frcp_rn(2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__frcp_ru(2.0f);
|
||||
__frcp_rz(2.0f);
|
||||
#endif
|
||||
__frsqrt_rn(4.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsqrt_rd(4.0f);
|
||||
#endif
|
||||
__fsqrt_rn(4.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsqrt_ru(4.0f);
|
||||
__fsqrt_rz(4.0f);
|
||||
__fsub_rd(2.0f, 1.0f);
|
||||
#endif
|
||||
__fsub_rn(2.0f, 1.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fsub_ru(2.0f, 1.0f);
|
||||
__fsub_rz(2.0f, 1.0f);
|
||||
#endif
|
||||
__log10f(1.0f);
|
||||
__log2f(1.0f);
|
||||
__logf(1.0f);
|
||||
__powf(1.0f, 0.0f);
|
||||
__saturatef(0.1f);
|
||||
__sincosf(0.0f, &fX, &fY);
|
||||
__sinf(0.0f);
|
||||
__tanf(0.0f);
|
||||
}
|
||||
|
||||
__global__ void compileSinglePrecisionIntrinsics(int) {
|
||||
single_precision_intrinsics();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SinglePrecisionIntrinsics") {
|
||||
hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
|
||||
@@ -1,123 +1,123 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void single_precision_math_functions() {
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
ceilf(0.0f);
|
||||
copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
cospif(0.0f);
|
||||
erfcf(0.0f);
|
||||
erfcinvf(2.0f);
|
||||
erfcxf(0.0f);
|
||||
erff(0.0f);
|
||||
erfinvf(1.0f);
|
||||
exp10f(0.0f);
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
fdividef(0.0f, 1.0f);
|
||||
floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fmaxf(0.0f, 0.0f);
|
||||
fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
isfinite(0.0f);
|
||||
isinf(0.0f);
|
||||
isnan(0.0f);
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
ldexpf(0.0f, 0);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
nanf("1");
|
||||
nearbyintf(0.0f);
|
||||
norm3df(1.0f, 0.0f, 0.0f);
|
||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
normcdff(0.0f);
|
||||
normcdfinvf(1.0f);
|
||||
fX = 1.0f;
|
||||
normf(1, &fX);
|
||||
powf(1.0f, 0.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
rhypotf(0.0f, 1.0f);
|
||||
rintf(1.0f);
|
||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
fX = 1.0f;
|
||||
rnormf(1, &fX);
|
||||
roundf(0.0f);
|
||||
rsqrtf(1.0f);
|
||||
signbit(1.0f);
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
sincospif(0.0f, &fX, &fY);
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sinpif(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
truncf(0.0f);
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
}
|
||||
|
||||
__global__ void compileSinglePrecisionMathOnDevice(int) {
|
||||
single_precision_math_functions();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SinglePrecisionMathDevice") {
|
||||
hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void single_precision_math_functions() {
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
ceilf(0.0f);
|
||||
copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
cospif(0.0f);
|
||||
erfcf(0.0f);
|
||||
erfcinvf(2.0f);
|
||||
erfcxf(0.0f);
|
||||
erff(0.0f);
|
||||
erfinvf(1.0f);
|
||||
exp10f(0.0f);
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
fdividef(0.0f, 1.0f);
|
||||
floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fmaxf(0.0f, 0.0f);
|
||||
fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
isfinite(0.0f);
|
||||
isinf(0.0f);
|
||||
isnan(0.0f);
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
ldexpf(0.0f, 0);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
nanf("1");
|
||||
nearbyintf(0.0f);
|
||||
norm3df(1.0f, 0.0f, 0.0f);
|
||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
normcdff(0.0f);
|
||||
normcdfinvf(1.0f);
|
||||
fX = 1.0f;
|
||||
normf(1, &fX);
|
||||
powf(1.0f, 0.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
rhypotf(0.0f, 1.0f);
|
||||
rintf(1.0f);
|
||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
fX = 1.0f;
|
||||
rnormf(1, &fX);
|
||||
roundf(0.0f);
|
||||
rsqrtf(1.0f);
|
||||
signbit(1.0f);
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
sincospif(0.0f, &fX, &fY);
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sinpif(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
truncf(0.0f);
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
}
|
||||
|
||||
__global__ void compileSinglePrecisionMathOnDevice(int) {
|
||||
single_precision_math_functions();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SinglePrecisionMathDevice") {
|
||||
hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
|
||||
dim3(1, 1, 1), 0, 0, 1);
|
||||
}
|
||||
|
||||
@@ -1,113 +1,113 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <cmath>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__host__ static void single_precision_math_functions() {
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
ceilf(0.0f);
|
||||
copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
erfcf(0.0f);
|
||||
erff(0.0f);
|
||||
#ifdef __unix__
|
||||
exp10f(0.0f);
|
||||
#endif
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fmaxf(0.0f, 0.0f);
|
||||
fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
std::isfinite(0.0f);
|
||||
std::isinf(0.0f);
|
||||
std::isnan(0.0f);
|
||||
#ifdef __unix__
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
#endif
|
||||
ldexpf(0.0f, 0);
|
||||
lgammaf(1.0f);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
modff(0.0f, &fX);
|
||||
nanf("1");
|
||||
nearbyintf(0.0f);
|
||||
powf(1.0f, 0.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
remquof(1.0f, 2.0f, &iX);
|
||||
rintf(1.0f);
|
||||
#if HT_AMD
|
||||
fX = 1.0f;
|
||||
#endif
|
||||
roundf(0.0f);
|
||||
/// rsqrtf(1.0f);
|
||||
scalblnf(0.0f, 1);
|
||||
scalbnf(0.0f, 1);
|
||||
std::signbit(1.0f);
|
||||
#ifdef __unix__
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
#endif
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
truncf(0.0f);
|
||||
#ifdef __unix__
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SinglePrecisionMathHost") {
|
||||
single_precision_math_functions();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <cmath>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wall"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__host__ static void single_precision_math_functions() {
|
||||
int iX;
|
||||
float fX, fY;
|
||||
|
||||
acosf(1.0f);
|
||||
acoshf(1.0f);
|
||||
asinf(0.0f);
|
||||
asinhf(0.0f);
|
||||
atan2f(0.0f, 1.0f);
|
||||
atanf(0.0f);
|
||||
atanhf(0.0f);
|
||||
cbrtf(0.0f);
|
||||
ceilf(0.0f);
|
||||
copysignf(1.0f, -2.0f);
|
||||
cosf(0.0f);
|
||||
coshf(0.0f);
|
||||
erfcf(0.0f);
|
||||
erff(0.0f);
|
||||
#ifdef __unix__
|
||||
exp10f(0.0f);
|
||||
#endif
|
||||
exp2f(0.0f);
|
||||
expf(0.0f);
|
||||
expm1f(0.0f);
|
||||
fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fmaxf(0.0f, 0.0f);
|
||||
fminf(0.0f, 0.0f);
|
||||
fmodf(0.0f, 1.0f);
|
||||
frexpf(0.0f, &iX);
|
||||
hypotf(1.0f, 0.0f);
|
||||
ilogbf(1.0f);
|
||||
std::isfinite(0.0f);
|
||||
std::isinf(0.0f);
|
||||
std::isnan(0.0f);
|
||||
#ifdef __unix__
|
||||
j0f(0.0f);
|
||||
j1f(0.0f);
|
||||
jnf(-1.0f, 1.0f);
|
||||
#endif
|
||||
ldexpf(0.0f, 0);
|
||||
lgammaf(1.0f);
|
||||
llrintf(0.0f);
|
||||
llroundf(0.0f);
|
||||
log10f(1.0f);
|
||||
log1pf(-1.0f);
|
||||
log2f(1.0f);
|
||||
logbf(1.0f);
|
||||
logf(1.0f);
|
||||
lrintf(0.0f);
|
||||
lroundf(0.0f);
|
||||
modff(0.0f, &fX);
|
||||
nanf("1");
|
||||
nearbyintf(0.0f);
|
||||
powf(1.0f, 0.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
remquof(1.0f, 2.0f, &iX);
|
||||
rintf(1.0f);
|
||||
#if HT_AMD
|
||||
fX = 1.0f;
|
||||
#endif
|
||||
roundf(0.0f);
|
||||
/// rsqrtf(1.0f);
|
||||
scalblnf(0.0f, 1);
|
||||
scalbnf(0.0f, 1);
|
||||
std::signbit(1.0f);
|
||||
#ifdef __unix__
|
||||
sincosf(0.0f, &fX, &fY);
|
||||
#endif
|
||||
sinf(0.0f);
|
||||
sinhf(0.0f);
|
||||
sqrtf(0.0f);
|
||||
tanf(0.0f);
|
||||
tanhf(0.0f);
|
||||
tgammaf(2.0f);
|
||||
truncf(0.0f);
|
||||
#ifdef __unix__
|
||||
y0f(1.0f);
|
||||
y1f(1.0f);
|
||||
ynf(1, 1.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_SinglePrecisionMathHost") {
|
||||
single_precision_math_functions();
|
||||
}
|
||||
|
||||
@@ -1,153 +1,153 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <complex>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
// Tolerance for error
|
||||
const double tolerance = 1e-6;
|
||||
|
||||
#define LEN 64
|
||||
|
||||
#define ALL_FUN \
|
||||
OP(add) \
|
||||
OP(sub) \
|
||||
OP(mul) \
|
||||
OP(div) \
|
||||
OP(abs) \
|
||||
OP(arg) \
|
||||
OP(sin) \
|
||||
OP(cos)
|
||||
|
||||
#define OP(x) CK_##x,
|
||||
enum CalcKind {
|
||||
ALL_FUN
|
||||
};
|
||||
#undef OP
|
||||
|
||||
#define OP(x) case CK_##x: return #x;
|
||||
std::string getName(enum CalcKind CK) {
|
||||
switch (CK) {
|
||||
ALL_FUN
|
||||
}
|
||||
return ""; // To prevent compile warning
|
||||
}
|
||||
#undef OP
|
||||
|
||||
// Calculates function.
|
||||
// If the function has one argument, B is ignored.
|
||||
// If the function returns real number, converts it to a complex number.
|
||||
#define ONE_ARG(func) \
|
||||
case CK_##func: \
|
||||
return std::complex<FloatT>(func(A));
|
||||
|
||||
template<typename FloatT>
|
||||
__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
|
||||
std::complex<FloatT> B,
|
||||
enum CalcKind CK) {
|
||||
switch (CK) {
|
||||
case CK_add:
|
||||
return A + B;
|
||||
case CK_sub:
|
||||
return A - B;
|
||||
case CK_mul:
|
||||
return A * B;
|
||||
case CK_div:
|
||||
return A / B;
|
||||
|
||||
ONE_ARG(abs)
|
||||
ONE_ARG(arg)
|
||||
ONE_ARG(sin)
|
||||
ONE_ARG(cos)
|
||||
}
|
||||
return A; // To prevent compile warning
|
||||
}
|
||||
|
||||
template<typename FloatT>
|
||||
__global__ void kernel(std::complex<FloatT>* A,
|
||||
std::complex<FloatT>* B, std::complex<FloatT>* C,
|
||||
enum CalcKind CK) {
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
C[tx] = calc<FloatT>(A[tx], B[tx], CK);
|
||||
}
|
||||
|
||||
template<typename FloatT>
|
||||
void test() {
|
||||
typedef std::complex<FloatT> ComplexT;
|
||||
|
||||
ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
|
||||
A = new ComplexT[LEN];
|
||||
B = new ComplexT[LEN];
|
||||
C = new ComplexT[LEN];
|
||||
D = new ComplexT[LEN];
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
|
||||
B[i] = A[i];
|
||||
C[i] = A[i];
|
||||
}
|
||||
HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
||||
|
||||
// Run kernel for a calculation kind and verify by comparing with host
|
||||
// calculation result. Returns false if fails.
|
||||
auto test_fun = [&](enum CalcKind CK) {
|
||||
hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
|
||||
Ad, Bd, Cd, CK);
|
||||
HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
|
||||
bool pass = true;
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
ComplexT Expected = calc(A[i], B[i], CK);
|
||||
FloatT error = abs(C[i] - Expected);
|
||||
if (abs(Expected) > tolerance)
|
||||
error /= abs(Expected);
|
||||
pass &= error < tolerance;
|
||||
}
|
||||
return pass;
|
||||
};
|
||||
|
||||
#define OP(x) assert(test_fun(CK_##x));
|
||||
ALL_FUN
|
||||
#undef OP
|
||||
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(Cd));
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
delete[] C;
|
||||
delete[] D;
|
||||
}
|
||||
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_StdComplex") {
|
||||
SECTION("Test run with float") {
|
||||
test<float>();
|
||||
}
|
||||
SECTION("Test run with double") {
|
||||
test<double>();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <complex>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
// Tolerance for error
|
||||
const double tolerance = 1e-6;
|
||||
|
||||
#define LEN 64
|
||||
|
||||
#define ALL_FUN \
|
||||
OP(add) \
|
||||
OP(sub) \
|
||||
OP(mul) \
|
||||
OP(div) \
|
||||
OP(abs) \
|
||||
OP(arg) \
|
||||
OP(sin) \
|
||||
OP(cos)
|
||||
|
||||
#define OP(x) CK_##x,
|
||||
enum CalcKind {
|
||||
ALL_FUN
|
||||
};
|
||||
#undef OP
|
||||
|
||||
#define OP(x) case CK_##x: return #x;
|
||||
std::string getName(enum CalcKind CK) {
|
||||
switch (CK) {
|
||||
ALL_FUN
|
||||
}
|
||||
return ""; // To prevent compile warning
|
||||
}
|
||||
#undef OP
|
||||
|
||||
// Calculates function.
|
||||
// If the function has one argument, B is ignored.
|
||||
// If the function returns real number, converts it to a complex number.
|
||||
#define ONE_ARG(func) \
|
||||
case CK_##func: \
|
||||
return std::complex<FloatT>(func(A));
|
||||
|
||||
template<typename FloatT>
|
||||
__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
|
||||
std::complex<FloatT> B,
|
||||
enum CalcKind CK) {
|
||||
switch (CK) {
|
||||
case CK_add:
|
||||
return A + B;
|
||||
case CK_sub:
|
||||
return A - B;
|
||||
case CK_mul:
|
||||
return A * B;
|
||||
case CK_div:
|
||||
return A / B;
|
||||
|
||||
ONE_ARG(abs)
|
||||
ONE_ARG(arg)
|
||||
ONE_ARG(sin)
|
||||
ONE_ARG(cos)
|
||||
}
|
||||
return A; // To prevent compile warning
|
||||
}
|
||||
|
||||
template<typename FloatT>
|
||||
__global__ void kernel(std::complex<FloatT>* A,
|
||||
std::complex<FloatT>* B, std::complex<FloatT>* C,
|
||||
enum CalcKind CK) {
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
C[tx] = calc<FloatT>(A[tx], B[tx], CK);
|
||||
}
|
||||
|
||||
template<typename FloatT>
|
||||
void test() {
|
||||
typedef std::complex<FloatT> ComplexT;
|
||||
|
||||
ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
|
||||
A = new ComplexT[LEN];
|
||||
B = new ComplexT[LEN];
|
||||
C = new ComplexT[LEN];
|
||||
D = new ComplexT[LEN];
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
|
||||
|
||||
for (uint32_t i = 0; i < LEN; i++) {
|
||||
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
|
||||
B[i] = A[i];
|
||||
C[i] = A[i];
|
||||
}
|
||||
HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
||||
|
||||
// Run kernel for a calculation kind and verify by comparing with host
|
||||
// calculation result. Returns false if fails.
|
||||
auto test_fun = [&](enum CalcKind CK) {
|
||||
hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
|
||||
Ad, Bd, Cd, CK);
|
||||
HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
|
||||
bool pass = true;
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
ComplexT Expected = calc(A[i], B[i], CK);
|
||||
FloatT error = abs(C[i] - Expected);
|
||||
if (abs(Expected) > tolerance)
|
||||
error /= abs(Expected);
|
||||
pass &= error < tolerance;
|
||||
}
|
||||
return pass;
|
||||
};
|
||||
|
||||
#define OP(x) assert(test_fun(CK_##x));
|
||||
ALL_FUN
|
||||
#undef OP
|
||||
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(Cd));
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
delete[] C;
|
||||
delete[] D;
|
||||
}
|
||||
|
||||
#if HT_AMD
|
||||
TEST_CASE("Unit_StdComplex") {
|
||||
SECTION("Test run with float") {
|
||||
test<float>();
|
||||
}
|
||||
SECTION("Test run with double") {
|
||||
test<double>();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,220 +1,220 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
(TestCase 1)::
|
||||
1) Execute atomicAdd in multi threaded scenario by diverging the data across
|
||||
multiple threads and validate the output at the end of all operations.
|
||||
2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
|
||||
across multiple threads and validate the output at the end of all operations.
|
||||
(TestCase 2)::
|
||||
3) Execute atomicAdd API and validate the result.
|
||||
4) Execute atomicAddNoRet API and validate the result.
|
||||
(TestCase 3)::
|
||||
5) atomicadd/NoRet negative scenarios (TBD).
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
/*
|
||||
* Defines initial and increment values
|
||||
*/
|
||||
#define INCREMENT_VALUE 10
|
||||
#define INT_INITIAL_VALUE 10
|
||||
#define FLOAT_INITIAL_VALUE 10.50
|
||||
#define DOUBLE_INITIAL_VALUE 200.12
|
||||
#define LONG_INITIAL_VALUE 10000
|
||||
#define UNSIGNED_INITIAL_VALUE 20
|
||||
|
||||
#if HT_NVIDIA
|
||||
// atomicAddNoRet is unavailable in cuda
|
||||
template <typename T>
|
||||
__device__ void atomicAddNoRet(T* x, int y) {
|
||||
atomicAdd(x, static_cast<T>(y));
|
||||
}
|
||||
#endif
|
||||
|
||||
bool p_atomicNoRet = false;
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomicnoret_manywaves(T* C_d) {
|
||||
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomic_manywaves(T* C_d) {
|
||||
atomicAdd(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomicnoret_simple(T* C_d) {
|
||||
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomic_simple(T* C_d) {
|
||||
atomicAdd(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictest_manywaves(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 10;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+
|
||||
static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictestnoret_manywaves(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 10;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+
|
||||
(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictest_simple(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 1;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictestnoret_simple(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 1;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipTestAtomicAdd") {
|
||||
bool TestPassed = true;
|
||||
|
||||
SECTION("atomic tests with many waves") {
|
||||
REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
|
||||
#if HT_AMD
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
|
||||
#endif
|
||||
}
|
||||
SECTION("atomic tests with many waves and no return") {
|
||||
REQUIRE(TestPassed ==
|
||||
atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
|
||||
}
|
||||
SECTION("simple atomic tests") {
|
||||
REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
|
||||
#if HT_AMD
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
|
||||
#endif
|
||||
}
|
||||
SECTION("Simple atomic test with no return") {
|
||||
REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
|
||||
}
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
(TestCase 1)::
|
||||
1) Execute atomicAdd in multi threaded scenario by diverging the data across
|
||||
multiple threads and validate the output at the end of all operations.
|
||||
2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
|
||||
across multiple threads and validate the output at the end of all operations.
|
||||
(TestCase 2)::
|
||||
3) Execute atomicAdd API and validate the result.
|
||||
4) Execute atomicAddNoRet API and validate the result.
|
||||
(TestCase 3)::
|
||||
5) atomicadd/NoRet negative scenarios (TBD).
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
/*
|
||||
* Defines initial and increment values
|
||||
*/
|
||||
#define INCREMENT_VALUE 10
|
||||
#define INT_INITIAL_VALUE 10
|
||||
#define FLOAT_INITIAL_VALUE 10.50
|
||||
#define DOUBLE_INITIAL_VALUE 200.12
|
||||
#define LONG_INITIAL_VALUE 10000
|
||||
#define UNSIGNED_INITIAL_VALUE 20
|
||||
|
||||
#if HT_NVIDIA
|
||||
// atomicAddNoRet is unavailable in cuda
|
||||
template <typename T>
|
||||
__device__ void atomicAddNoRet(T* x, int y) {
|
||||
atomicAdd(x, static_cast<T>(y));
|
||||
}
|
||||
#endif
|
||||
|
||||
bool p_atomicNoRet = false;
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomicnoret_manywaves(T* C_d) {
|
||||
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomic_manywaves(T* C_d) {
|
||||
atomicAdd(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomicnoret_simple(T* C_d) {
|
||||
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void atomic_simple(T* C_d) {
|
||||
atomicAdd(C_d, INCREMENT_VALUE);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictest_manywaves(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 10;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+
|
||||
static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictestnoret_manywaves(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 10;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+
|
||||
(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictest_simple(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 1;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool atomictestnoret_simple(const T& initial_val) {
|
||||
unsigned int ThreadsperBlock = 1;
|
||||
unsigned int numBlocks = 1;
|
||||
T memSize = sizeof(T);
|
||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||
*hOData = initial_val;
|
||||
T* dOData;
|
||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||
// copy host memory to device to initialize to zero
|
||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||
|
||||
// execute the kernel
|
||||
hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
|
||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
||||
|
||||
// Cleanup memory
|
||||
free(hOData);
|
||||
HIP_CHECK(hipFree(dOData));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipTestAtomicAdd") {
|
||||
bool TestPassed = true;
|
||||
|
||||
SECTION("atomic tests with many waves") {
|
||||
REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
|
||||
#if HT_AMD
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
|
||||
#endif
|
||||
}
|
||||
SECTION("atomic tests with many waves and no return") {
|
||||
REQUIRE(TestPassed ==
|
||||
atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
|
||||
}
|
||||
SECTION("simple atomic tests") {
|
||||
REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
|
||||
#if HT_AMD
|
||||
REQUIRE(TestPassed ==
|
||||
atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
|
||||
REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
|
||||
#endif
|
||||
}
|
||||
SECTION("Simple atomic test with no return") {
|
||||
REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,51 +1,51 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE (LEN * sizeof(int64_t))
|
||||
|
||||
static __global__ void kernel1(int64_t* Ad) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tid] = clock() + clock64() + __clock() + __clock64();
|
||||
}
|
||||
|
||||
static __global__ void kernel2(int64_t* Ad) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipTestClock") {
|
||||
int64_t *A, *Ad;
|
||||
A = new int64_t[LEN];
|
||||
for (unsigned i = 0; i < LEN; i++) {
|
||||
A[i] = 0;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
||||
hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
||||
HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
for (unsigned i = 0; i < LEN; i++) {
|
||||
assert(0 != A[i]);
|
||||
}
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#define LEN 512
|
||||
#define SIZE (LEN * sizeof(int64_t))
|
||||
|
||||
static __global__ void kernel1(int64_t* Ad) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tid] = clock() + clock64() + __clock() + __clock64();
|
||||
}
|
||||
|
||||
static __global__ void kernel2(int64_t* Ad) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipTestClock") {
|
||||
int64_t *A, *Ad;
|
||||
A = new int64_t[LEN];
|
||||
for (unsigned i = 0; i < LEN; i++) {
|
||||
A[i] = 0;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
||||
hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
||||
HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
for (unsigned i = 0; i < LEN; i++) {
|
||||
assert(0 != A[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,88 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
|
||||
* @{
|
||||
* @ingroup ErrorTest
|
||||
* `hipDrvGetErrorName(hipError_t hip_error)` -
|
||||
* Return hip error as text string form.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that the correct string is returned for each supported
|
||||
* device error enumeration.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
|
||||
|
||||
REQUIRE(error_string != nullptr);
|
||||
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate handling of invalid arguments:
|
||||
* -# When error enumerator is invalid (-1)
|
||||
* - AMD expected output: return "hipErrorUnknown"
|
||||
* - NVIDIA expected output: return "cudaErrorUnknown"
|
||||
* -# When nullptr is passed as store location
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
|
||||
const char* error_string = nullptr;
|
||||
SECTION("pass unknown value to hipError") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("pass nullptr to error string") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group ErrorTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
|
||||
* @{
|
||||
* @ingroup ErrorTest
|
||||
* `hipDrvGetErrorName(hipError_t hip_error)` -
|
||||
* Return hip error as text string form.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that the correct string is returned for each supported
|
||||
* device error enumeration.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
|
||||
|
||||
REQUIRE(error_string != nullptr);
|
||||
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate handling of invalid arguments:
|
||||
* -# When error enumerator is invalid (-1)
|
||||
* - AMD expected output: return "hipErrorUnknown"
|
||||
* - NVIDIA expected output: return "cudaErrorUnknown"
|
||||
* -# When nullptr is passed as store location
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
|
||||
const char* error_string = nullptr;
|
||||
SECTION("pass unknown value to hipError") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("pass nullptr to error string") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group ErrorTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,88 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
|
||||
* @{
|
||||
* @ingroup ErrorTest
|
||||
* `hipDrvGetErrorString(hipError_t hipError)` -
|
||||
* Return handy text string message to explain the error which occurred.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that the correct string is returned for each supported
|
||||
* device error enumeration.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
|
||||
|
||||
REQUIRE(error_string != nullptr);
|
||||
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate handling of invalid arguments:
|
||||
* -# When error enumerator is invalid (-1)
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* -# When nullptr is passed as store location
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
|
||||
const char* error_string = nullptr;
|
||||
SECTION("pass unknown value to hipError") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("pass nullptr to error string") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group ErrorTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "error_handling_common.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
|
||||
* @{
|
||||
* @ingroup ErrorTest
|
||||
* `hipDrvGetErrorString(hipError_t hipError)` -
|
||||
* Return handy text string message to explain the error which occurred.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate that the correct string is returned for each supported
|
||||
* device error enumeration.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
|
||||
const char* error_string = nullptr;
|
||||
const auto enumerator =
|
||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||
INFO("Error: " << enumerator);
|
||||
|
||||
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
|
||||
|
||||
REQUIRE(error_string != nullptr);
|
||||
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validate handling of invalid arguments:
|
||||
* -# When error enumerator is invalid (-1)
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* -# When nullptr is passed as store location
|
||||
* - Expected output: return "hipErrorInvalidValue"
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.4
|
||||
*/
|
||||
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
|
||||
const char* error_string = nullptr;
|
||||
SECTION("pass unknown value to hipError") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_AMD // segfaults on NVIDIA
|
||||
SECTION("pass nullptr to error string") {
|
||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
|
||||
hipErrorInvalidValue);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group ErrorTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
# AMD specific test
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
if(UNIX)
|
||||
set(TEST_SRC
|
||||
hipMalloc.cc
|
||||
)
|
||||
# Creating Custom object file
|
||||
add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
|
||||
add_library(malloc_gpp OBJECT IMPORTED)
|
||||
set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
|
||||
|
||||
hip_add_exe_to_target(NAME gppTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS malloc_gpp)
|
||||
|
||||
add_dependencies(gppTests malloc_custom)
|
||||
endif()
|
||||
endif()
|
||||
# AMD specific test
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
if(UNIX)
|
||||
set(TEST_SRC
|
||||
hipMalloc.cc
|
||||
)
|
||||
# Creating Custom object file
|
||||
add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
|
||||
add_library(malloc_gpp OBJECT IMPORTED)
|
||||
set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
|
||||
|
||||
hip_add_exe_to_target(NAME gppTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS malloc_gpp)
|
||||
|
||||
add_dependencies(gppTests malloc_custom)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -1,54 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
* */
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "hipMalloc.h"
|
||||
/**
|
||||
* @addtogroup hipMalloc hipMalloc
|
||||
* @{
|
||||
* @ingroup MemoryTest
|
||||
* `hipError_t hipMalloc(void** ptr, size_t size)` -
|
||||
* Allocate memory on the default accelerator.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/g++/hipMalloc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMalloc_gpptest") {
|
||||
printf("calling cpp function from here\n");
|
||||
int result = MallocFunc();
|
||||
REQUIRE(result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group MemoryTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
* */
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "hipMalloc.h"
|
||||
/**
|
||||
* @addtogroup hipMalloc hipMalloc
|
||||
* @{
|
||||
* @ingroup MemoryTest
|
||||
* `hipError_t hipMalloc(void** ptr, size_t size)` -
|
||||
* Allocate memory on the default accelerator.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/g++/hipMalloc.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMalloc_gpptest") {
|
||||
printf("calling cpp function from here\n");
|
||||
int result = MallocFunc();
|
||||
REQUIRE(result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group MemoryTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
* */
|
||||
|
||||
#include <iostream>
|
||||
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
* */
|
||||
|
||||
#include <iostream>
|
||||
|
||||
extern int MallocFunc();
|
||||
@@ -1,28 +1,28 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
if(UNIX)
|
||||
set(TEST_SRC
|
||||
gccTest.cc
|
||||
gpu.cpp
|
||||
)
|
||||
# Creating Custom object file
|
||||
add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
|
||||
add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
|
||||
add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
|
||||
add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
|
||||
|
||||
add_library(LaunchKernel_lib OBJECT IMPORTED)
|
||||
add_library(hipMalloc_lib OBJECT IMPORTED)
|
||||
|
||||
set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
|
||||
set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
|
||||
|
||||
|
||||
hip_add_exe_to_target(NAME gccTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
|
||||
|
||||
add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
|
||||
endif()
|
||||
endif()
|
||||
# Common Tests - Test independent of all platforms
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
if(UNIX)
|
||||
set(TEST_SRC
|
||||
gccTest.cc
|
||||
gpu.cpp
|
||||
)
|
||||
# Creating Custom object file
|
||||
add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
|
||||
add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
|
||||
add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
|
||||
add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
|
||||
|
||||
add_library(LaunchKernel_lib OBJECT IMPORTED)
|
||||
add_library(hipMalloc_lib OBJECT IMPORTED)
|
||||
|
||||
set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
|
||||
set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
|
||||
|
||||
|
||||
hip_add_exe_to_target(NAME gccTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
|
||||
|
||||
add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -1,64 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
* */
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
extern "C" {
|
||||
#include "LaunchKernel.h"
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - calling launchKernel which is c function from catch2
|
||||
* and compile with gcc compiler and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/gcc/gccTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_LaunchKernelgccTests") {
|
||||
printf("Calling launchKernel files from here\n");
|
||||
int result = launchKernel();
|
||||
REQUIRE(result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calling hipMalloc which is c file from catch2 and compile
|
||||
* with gcc compiler and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/gcc/gccTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMallocgccTests") {
|
||||
printf("Calling hipMalloc files from here\n");
|
||||
int result = hipMallocfunc();
|
||||
REQUIRE(result == 1);
|
||||
}
|
||||
/*
|
||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
* */
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
extern "C" {
|
||||
#include "LaunchKernel.h"
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - calling launchKernel which is c function from catch2
|
||||
* and compile with gcc compiler and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/gcc/gccTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_LaunchKernelgccTests") {
|
||||
printf("Calling launchKernel files from here\n");
|
||||
int result = launchKernel();
|
||||
REQUIRE(result == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calling hipMalloc which is c file from catch2 and compile
|
||||
* with gcc compiler and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/gcc/gccTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipMallocgccTests") {
|
||||
printf("Calling hipMalloc files from here\n");
|
||||
int result = hipMallocfunc();
|
||||
REQUIRE(result == 1);
|
||||
}
|
||||
|
||||
@@ -1,176 +1,176 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
|
||||
unsigned threadsPerBlock = 256;
|
||||
|
||||
template <unsigned batch, typename T>
|
||||
__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
|
||||
T tmp;
|
||||
if (groupElements < batch)
|
||||
return;
|
||||
// sdata[tid] += sdata[tid - batch/2] does not work when block size is
|
||||
// greater than wave size because one wave may complete before another
|
||||
// wave.
|
||||
if (tid >= batch/2 && tid < groupElements)
|
||||
tmp = sdata[tid - batch/2];
|
||||
__syncthreads();
|
||||
if (tid >= batch/2 && tid < groupElements)
|
||||
sdata[tid] += tmp;
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
|
||||
size_t numElements, size_t groupElements) {
|
||||
// declare dynamic shared memory
|
||||
extern __shared__ double sdata0[];
|
||||
T* sdata = reinterpret_cast<T *>(sdata0);
|
||||
|
||||
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t tid = threadIdx.x;
|
||||
|
||||
// initialize dynamic shared memory
|
||||
if (tid < groupElements) {
|
||||
sdata[tid] = static_cast<T>(tid);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// prefix sum inside dynamic shared memory
|
||||
sum<512>(sdata, groupElements, tid);
|
||||
sum<256>(sdata, groupElements, tid);
|
||||
sum<128>(sdata, groupElements, tid);
|
||||
sum<64>(sdata, groupElements, tid);
|
||||
sum<32>(sdata, groupElements, tid);
|
||||
sum<16>(sdata, groupElements, tid);
|
||||
sum<8>(sdata, groupElements, tid);
|
||||
sum<4>(sdata, groupElements, tid);
|
||||
sum<2>(sdata, groupElements, tid);
|
||||
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void testExternShared(size_t N, unsigned groupElements) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
T *A_d, *B_d, *C_d;
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = N/threadsPerBlock;
|
||||
assert(N == blocks * threadsPerBlock);
|
||||
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
// calculate the amount of dynamic shared memory required
|
||||
size_t groupMemBytes = groupElements * sizeof(T);
|
||||
|
||||
// launch kernel with dynamic shared memory
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
|
||||
dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
|
||||
N, groupElements);
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
// verify
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
size_t tid = (i % min(threadsPerBlock, groupElements));
|
||||
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
|
||||
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
|
||||
REQUIRE(C_h[i] == expected);
|
||||
}
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - launch kernel with dynamic shared memory for float and double
|
||||
* datatypes and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipDynamicShared.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipDynamicShared") {
|
||||
SECTION("test case with float for least size") {
|
||||
testExternShared<float>(1024, 4);
|
||||
testExternShared<float>(1024, 8);
|
||||
testExternShared<float>(1024, 16);
|
||||
testExternShared<float>(1024, 32);
|
||||
testExternShared<float>(1024, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with float for max size") {
|
||||
testExternShared<float>(65536, 4);
|
||||
testExternShared<float>(65536, 8);
|
||||
testExternShared<float>(65536, 16);
|
||||
testExternShared<float>(65536, 32);
|
||||
testExternShared<float>(65536, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with double for least size") {
|
||||
testExternShared<double>(1024, 4);
|
||||
testExternShared<double>(1024, 8);
|
||||
testExternShared<double>(1024, 16);
|
||||
testExternShared<double>(1024, 32);
|
||||
testExternShared<double>(1024, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with double for max size") {
|
||||
testExternShared<double>(65536, 4);
|
||||
testExternShared<double>(65536, 8);
|
||||
testExternShared<double>(65536, 16);
|
||||
testExternShared<double>(65536, 32);
|
||||
testExternShared<double>(65536, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with float for max LDS size") {
|
||||
int maxLDS = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
||||
testExternShared<float>(1024, maxLDS/sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
|
||||
unsigned threadsPerBlock = 256;
|
||||
|
||||
template <unsigned batch, typename T>
|
||||
__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
|
||||
T tmp;
|
||||
if (groupElements < batch)
|
||||
return;
|
||||
// sdata[tid] += sdata[tid - batch/2] does not work when block size is
|
||||
// greater than wave size because one wave may complete before another
|
||||
// wave.
|
||||
if (tid >= batch/2 && tid < groupElements)
|
||||
tmp = sdata[tid - batch/2];
|
||||
__syncthreads();
|
||||
if (tid >= batch/2 && tid < groupElements)
|
||||
sdata[tid] += tmp;
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
|
||||
size_t numElements, size_t groupElements) {
|
||||
// declare dynamic shared memory
|
||||
extern __shared__ double sdata0[];
|
||||
T* sdata = reinterpret_cast<T *>(sdata0);
|
||||
|
||||
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t tid = threadIdx.x;
|
||||
|
||||
// initialize dynamic shared memory
|
||||
if (tid < groupElements) {
|
||||
sdata[tid] = static_cast<T>(tid);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// prefix sum inside dynamic shared memory
|
||||
sum<512>(sdata, groupElements, tid);
|
||||
sum<256>(sdata, groupElements, tid);
|
||||
sum<128>(sdata, groupElements, tid);
|
||||
sum<64>(sdata, groupElements, tid);
|
||||
sum<32>(sdata, groupElements, tid);
|
||||
sum<16>(sdata, groupElements, tid);
|
||||
sum<8>(sdata, groupElements, tid);
|
||||
sum<4>(sdata, groupElements, tid);
|
||||
sum<2>(sdata, groupElements, tid);
|
||||
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void testExternShared(size_t N, unsigned groupElements) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
T *A_d, *B_d, *C_d;
|
||||
T *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
unsigned blocks = N/threadsPerBlock;
|
||||
assert(N == blocks * threadsPerBlock);
|
||||
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
// calculate the amount of dynamic shared memory required
|
||||
size_t groupMemBytes = groupElements * sizeof(T);
|
||||
|
||||
// launch kernel with dynamic shared memory
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
|
||||
dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
|
||||
N, groupElements);
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
// verify
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
size_t tid = (i % min(threadsPerBlock, groupElements));
|
||||
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
|
||||
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
|
||||
REQUIRE(C_h[i] == expected);
|
||||
}
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - launch kernel with dynamic shared memory for float and double
|
||||
* datatypes and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipDynamicShared.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipDynamicShared") {
|
||||
SECTION("test case with float for least size") {
|
||||
testExternShared<float>(1024, 4);
|
||||
testExternShared<float>(1024, 8);
|
||||
testExternShared<float>(1024, 16);
|
||||
testExternShared<float>(1024, 32);
|
||||
testExternShared<float>(1024, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with float for max size") {
|
||||
testExternShared<float>(65536, 4);
|
||||
testExternShared<float>(65536, 8);
|
||||
testExternShared<float>(65536, 16);
|
||||
testExternShared<float>(65536, 32);
|
||||
testExternShared<float>(65536, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with double for least size") {
|
||||
testExternShared<double>(1024, 4);
|
||||
testExternShared<double>(1024, 8);
|
||||
testExternShared<double>(1024, 16);
|
||||
testExternShared<double>(1024, 32);
|
||||
testExternShared<double>(1024, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with double for max size") {
|
||||
testExternShared<double>(65536, 4);
|
||||
testExternShared<double>(65536, 8);
|
||||
testExternShared<double>(65536, 16);
|
||||
testExternShared<double>(65536, 32);
|
||||
testExternShared<double>(65536, 64);
|
||||
}
|
||||
|
||||
SECTION("test case with float for max LDS size") {
|
||||
int maxLDS = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
|
||||
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
||||
testExternShared<float>(1024, maxLDS/sizeof(float));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,94 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#define LEN (16 * 1024)
|
||||
#define SIZE (LEN * sizeof(float))
|
||||
|
||||
__global__ void vectorAdd(float* Ad, float* Bd) {
|
||||
extern __shared__ float sBd[];
|
||||
int tx = threadIdx.x;
|
||||
for (int i = 0; i < LEN / 64; i++) {
|
||||
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
|
||||
Bd[tx + i * 64] = sBd[tx + i * 64];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Assign max dynamic shared memory to kernel function and
|
||||
* verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipDynamicShared2.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipDynamicShared2") {
|
||||
float *A, *B, *Ad, *Bd;
|
||||
A = new float[LEN];
|
||||
B = new float[LEN];
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
A[i] = 1.0f;
|
||||
B[i] = 1.0f;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(&Ad, SIZE));
|
||||
HIP_CHECK(hipMalloc(&Bd, SIZE));
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
hipError_t ret = hipFuncSetAttribute(
|
||||
reinterpret_cast<const void*>(&vectorAdd),
|
||||
hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
|
||||
|
||||
REQUIRE(ret == hipSuccess);
|
||||
hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
assert(B[i] > 1.0f && B[i] < 3.0f);
|
||||
}
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#define LEN (16 * 1024)
|
||||
#define SIZE (LEN * sizeof(float))
|
||||
|
||||
__global__ void vectorAdd(float* Ad, float* Bd) {
|
||||
extern __shared__ float sBd[];
|
||||
int tx = threadIdx.x;
|
||||
for (int i = 0; i < LEN / 64; i++) {
|
||||
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
|
||||
Bd[tx + i * 64] = sBd[tx + i * 64];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Assign max dynamic shared memory to kernel function and
|
||||
* verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipDynamicShared2.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipDynamicShared2") {
|
||||
float *A, *B, *Ad, *Bd;
|
||||
A = new float[LEN];
|
||||
B = new float[LEN];
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
A[i] = 1.0f;
|
||||
B[i] = 1.0f;
|
||||
}
|
||||
HIP_CHECK(hipMalloc(&Ad, SIZE));
|
||||
HIP_CHECK(hipMalloc(&Bd, SIZE));
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
hipError_t ret = hipFuncSetAttribute(
|
||||
reinterpret_cast<const void*>(&vectorAdd),
|
||||
hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
|
||||
|
||||
REQUIRE(ret == hipSuccess);
|
||||
hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
assert(B[i] > 1.0f && B[i] < 3.0f);
|
||||
}
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,59 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
|
||||
__global__ void Empty(int param) {}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - pass empty Kernel function.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipEmptyKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipEmptyKernel") {
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||
|
||||
__global__ void Empty(int param) {}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - pass empty Kernel function.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipEmptyKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipEmptyKernel") {
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,138 +1,138 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
static unsigned threadsPerBlock = 256;
|
||||
static unsigned blocksPerCU = 6;
|
||||
|
||||
struct _t {
|
||||
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
||||
};
|
||||
|
||||
typedef struct _t _T;
|
||||
|
||||
__global__ void sKernel(_T s, double *a) {
|
||||
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
|
||||
}
|
||||
|
||||
__global__ void mKernel(char f, int16_t a, int b, double c,
|
||||
int16_t d, int e, double* res) {
|
||||
*res = a + b + c + d + e + f;
|
||||
}
|
||||
|
||||
void testMixData() {
|
||||
double m = 0;
|
||||
double *d_m;
|
||||
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
||||
int a = 1, e = 10;
|
||||
int16_t b = 2, d = 4;
|
||||
double c = 3.0;
|
||||
char ff = 10;
|
||||
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
|
||||
b, a, c, d, e, d_m);
|
||||
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(m == 30.0);
|
||||
HIP_CHECK(hipFree(d_m));
|
||||
}
|
||||
|
||||
void testStruct() {
|
||||
double m = 0;
|
||||
double *d_m;
|
||||
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
||||
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
|
||||
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(m == 55.0);
|
||||
HIP_CHECK(hipFree(d_m));
|
||||
}
|
||||
|
||||
void test(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
|
||||
static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
|
||||
Args... args)` -
|
||||
* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify sample array with hipExtLaunchKernelGGL()
|
||||
* and verify the results.
|
||||
* - Test case to verify struct data with hipExtLaunchKernelGGL()
|
||||
* and verify the results.
|
||||
* - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
|
||||
* and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipExtLaunchKernelGGL.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchKernelGGL") {
|
||||
SECTION("test run") {
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
test(N);
|
||||
}
|
||||
SECTION("testStruct run") {
|
||||
testStruct();
|
||||
}
|
||||
SECTION("testMixData run") {
|
||||
testMixData();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
static unsigned threadsPerBlock = 256;
|
||||
static unsigned blocksPerCU = 6;
|
||||
|
||||
struct _t {
|
||||
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
||||
};
|
||||
|
||||
typedef struct _t _T;
|
||||
|
||||
__global__ void sKernel(_T s, double *a) {
|
||||
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
|
||||
}
|
||||
|
||||
__global__ void mKernel(char f, int16_t a, int b, double c,
|
||||
int16_t d, int e, double* res) {
|
||||
*res = a + b + c + d + e + f;
|
||||
}
|
||||
|
||||
void testMixData() {
|
||||
double m = 0;
|
||||
double *d_m;
|
||||
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
||||
int a = 1, e = 10;
|
||||
int16_t b = 2, d = 4;
|
||||
double c = 3.0;
|
||||
char ff = 10;
|
||||
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
|
||||
b, a, c, d, e, d_m);
|
||||
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(m == 30.0);
|
||||
HIP_CHECK(hipFree(d_m));
|
||||
}
|
||||
|
||||
void testStruct() {
|
||||
double m = 0;
|
||||
double *d_m;
|
||||
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
||||
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
|
||||
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(m == 55.0);
|
||||
HIP_CHECK(hipFree(d_m));
|
||||
}
|
||||
|
||||
void test(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
|
||||
static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
|
||||
Args... args)` -
|
||||
* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify sample array with hipExtLaunchKernelGGL()
|
||||
* and verify the results.
|
||||
* - Test case to verify struct data with hipExtLaunchKernelGGL()
|
||||
* and verify the results.
|
||||
* - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
|
||||
* and verify the results.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipExtLaunchKernelGGL.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipExtLaunchKernelGGL") {
|
||||
SECTION("test run") {
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
test(N);
|
||||
}
|
||||
SECTION("testStruct run") {
|
||||
testStruct();
|
||||
}
|
||||
SECTION("testMixData run") {
|
||||
testMixData();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,122 +1,122 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
static unsigned threadsPerBlock = 256;
|
||||
static unsigned blocksPerCU = 6;
|
||||
|
||||
// __device__ maps to __attribute__((hc))
|
||||
__device__ int foo(int i) { return i + 1; }
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
double foo = __hiloint2double(A_d[i], B_d[i]);
|
||||
C_d[i] = __double2loint(foo) + __double2hiint(foo);
|
||||
}
|
||||
}
|
||||
|
||||
int test_gl2(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
// Full vadd in one large chunk, to get things started:
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, A_d, B_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// verify
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if __HIP__
|
||||
int test_triple_chevron(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
// Full vadd in one large chunk, to get things started:
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// verify
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify the Grid_Launch syntax.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipGridLaunch.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipGridLaunch") {
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
SECTION("Test test_gl2") {
|
||||
test_gl2(N);
|
||||
}
|
||||
|
||||
#if __HIP__
|
||||
SECTION("Test triple_chevron") {
|
||||
test_triple_chevron(N);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Test the Grid_Launch syntax.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
static unsigned threadsPerBlock = 256;
|
||||
static unsigned blocksPerCU = 6;
|
||||
|
||||
// __device__ maps to __attribute__((hc))
|
||||
__device__ int foo(int i) { return i + 1; }
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
double foo = __hiloint2double(A_d[i], B_d[i]);
|
||||
C_d[i] = __double2loint(foo) + __double2hiint(foo);
|
||||
}
|
||||
}
|
||||
|
||||
int test_gl2(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
// Full vadd in one large chunk, to get things started:
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, A_d, B_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// verify
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if __HIP__
|
||||
int test_triple_chevron(size_t N) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
// Full vadd in one large chunk, to get things started:
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// verify
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify the Grid_Launch syntax.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipGridLaunch.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipGridLaunch") {
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
SECTION("Test test_gl2") {
|
||||
test_gl2(N);
|
||||
}
|
||||
|
||||
#if __HIP__
|
||||
SECTION("Test triple_chevron") {
|
||||
test_triple_chevron(N);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,111 +1,111 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#pragma clang diagnostic ignored "-Wuninitialized"
|
||||
|
||||
// Simple tests for variable type qualifiers:
|
||||
__device__ int deviceVar;
|
||||
|
||||
// TODO-HCC __constant__ not working yet.
|
||||
__constant__ int constantVar1;
|
||||
|
||||
__constant__ __device__ int constantVar2;
|
||||
|
||||
// Test HOST space:
|
||||
__host__ void foo() { printf("foo!\n"); }
|
||||
|
||||
__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
|
||||
__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
|
||||
|
||||
|
||||
__device__ __host__ float PlusOne(float x) { return x + 1.0; }
|
||||
|
||||
__global__ void MyKernel(const float* a, const float* b, float* c,
|
||||
unsigned N) {
|
||||
unsigned gid = threadIdx.x;
|
||||
if (gid < N) {
|
||||
c[gid] = a[gid] + PlusOne(b[gid]);
|
||||
}
|
||||
}
|
||||
|
||||
void callMyKernel() {
|
||||
float *a, *b, *c;
|
||||
const unsigned blockSize = 256;
|
||||
unsigned N = blockSize;
|
||||
hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
|
||||
0, 0, a, b, c, N);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
|
||||
#ifdef NOT_YET
|
||||
int a = __shfl_up(x, 1);
|
||||
#endif
|
||||
float x = 1.0;
|
||||
#ifdef NOT_YET
|
||||
float fastZ = __sin(x);
|
||||
#endif
|
||||
__syncthreads();
|
||||
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Collection of code to make sure that various features
|
||||
* in the hip kernel language compile.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipLanguageExtensions.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipLanguageExtensions") {
|
||||
REQUIRE(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#include <hip/math_functions.h>
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#pragma clang diagnostic ignored "-Wuninitialized"
|
||||
|
||||
// Simple tests for variable type qualifiers:
|
||||
__device__ int deviceVar;
|
||||
|
||||
// TODO-HCC __constant__ not working yet.
|
||||
__constant__ int constantVar1;
|
||||
|
||||
__constant__ __device__ int constantVar2;
|
||||
|
||||
// Test HOST space:
|
||||
__host__ void foo() { printf("foo!\n"); }
|
||||
|
||||
__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
|
||||
__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
|
||||
|
||||
|
||||
__device__ __host__ float PlusOne(float x) { return x + 1.0; }
|
||||
|
||||
__global__ void MyKernel(const float* a, const float* b, float* c,
|
||||
unsigned N) {
|
||||
unsigned gid = threadIdx.x;
|
||||
if (gid < N) {
|
||||
c[gid] = a[gid] + PlusOne(b[gid]);
|
||||
}
|
||||
}
|
||||
|
||||
void callMyKernel() {
|
||||
float *a, *b, *c;
|
||||
const unsigned blockSize = 256;
|
||||
unsigned N = blockSize;
|
||||
hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
|
||||
0, 0, a, b, c, N);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
|
||||
#ifdef NOT_YET
|
||||
int a = __shfl_up(x, 1);
|
||||
#endif
|
||||
float x = 1.0;
|
||||
#ifdef NOT_YET
|
||||
float fastZ = __sin(x);
|
||||
#endif
|
||||
__syncthreads();
|
||||
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Collection of code to make sure that various features
|
||||
* in the hip kernel language compile.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipLanguageExtensions.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipLanguageExtensions") {
|
||||
REQUIRE(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
Diff 로드
@@ -1,464 +1,464 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
class HipFunctorTests {
|
||||
public:
|
||||
// Test that a class functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForSimpleClassFunctor(void);
|
||||
// Test that a templated class functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForClassTemplateFunctor(void);
|
||||
// Test that a class functor object ptr can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForClassObjPtrFunctor(void);
|
||||
// Test that a class object containing functor can be passed
|
||||
// to hiplaunchparam and can be used in kernel
|
||||
void TestForFunctorContainInClassObj(void);
|
||||
// Test that a stuct functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForSimpleStructFunctor(void);
|
||||
// Test that a stuct functor object ptr can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForStructObjPtrFunctor(void);
|
||||
// Test that a templated struct functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForStructTemplateFunctor(void);
|
||||
// Test that a struct object containing functor can be
|
||||
// passed to hiplaunchparam and can be used in kernel
|
||||
void TestForFunctorContainInStructObj(void);
|
||||
};
|
||||
|
||||
static const int BLOCK_DIM_SIZE = 1024;
|
||||
static const int THREADS_PER_BLOCK = 1;
|
||||
|
||||
// class functor tests
|
||||
|
||||
// Simple doubler Functor
|
||||
class DoublerFunctor{
|
||||
public:
|
||||
__device__ int operator()(int x) { return x * 2;}
|
||||
};
|
||||
|
||||
// simple doubler functor passed to kernel
|
||||
__global__ void DoublerFunctorKernel(
|
||||
DoublerFunctor doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForSimpleClassFunctor(void) {
|
||||
DoublerFunctor doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForSimpleClassFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// pointer functor passed to kernel
|
||||
__global__ void PtrDoublerFunctorKernel(
|
||||
DoublerFunctor *doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = (*doubler_)(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForClassObjPtrFunctor(void) {
|
||||
DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
||||
|
||||
// Validation part of TestForClassObjPtrFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
delete[] ptrdoubler;
|
||||
}
|
||||
|
||||
class compare {
|
||||
public:
|
||||
template<typename T1, typename T2>
|
||||
__device__ bool operator()(const T1& v1, const T2& v2) {
|
||||
return v1 > v2;
|
||||
}
|
||||
};
|
||||
|
||||
// template functor passed to kernel
|
||||
__global__ void TemplateFunctorKernel(
|
||||
compare compare_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
deviceResult[x] = compare_(2.2, 2.1);
|
||||
deviceResult[x] = compare_(2, 1);
|
||||
deviceResult[x] = compare_('b', 'a');
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForClassTemplateFunctor(void) {
|
||||
compare comparefunctor;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
||||
|
||||
// Validation part of TestForClassTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
|
||||
// Doubler calculator
|
||||
class DoublerCalculator {
|
||||
public:
|
||||
int a, result;
|
||||
// fucntor contained in class object
|
||||
DoublerFunctor doubler;
|
||||
};
|
||||
|
||||
// doubler functor conatined in class obj passed to kernel
|
||||
__global__ void DoublerCalculatorFunctorKernel(
|
||||
DoublerCalculator doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_.doubler(doubler_.a);
|
||||
deviceResult[x] = (doubler_.result == result);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForFunctorContainInClassObj(void) {
|
||||
DoublerCalculator Doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
Doubler.a = 5;
|
||||
Doubler.result = 10;
|
||||
// pass comparefunctor to hipLaunchParm
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForStructTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// Struct functor tests
|
||||
|
||||
// Simple doubler Functor
|
||||
struct sDoublerFunctor {
|
||||
public:
|
||||
__device__ int operator()(int x) { return x * 2;}
|
||||
};
|
||||
|
||||
|
||||
// simple sturct doubler functor passed to kernel
|
||||
__global__ void structDoublerFunctorKernel(
|
||||
sDoublerFunctor doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForSimpleStructFunctor(void) {
|
||||
sDoublerFunctor doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForSimpleStructFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// ptr functor passed to kernel
|
||||
__global__ void structPtrDoublerFunctorKernel(
|
||||
sDoublerFunctor *doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = (*doubler_)(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForStructObjPtrFunctor(void) {
|
||||
sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
||||
|
||||
// Validation part of TestForStructObjPtrFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
delete[] ptrdoubler;
|
||||
}
|
||||
|
||||
struct sCompare {
|
||||
public:
|
||||
template< typename T1, typename T2 >
|
||||
__device__ bool operator()(const T1& v1, const T2& v2) {
|
||||
return v1 > v2;
|
||||
}
|
||||
};
|
||||
|
||||
// template functor passed to kernel
|
||||
__global__ void structTemplateFunctorKernel(
|
||||
sCompare compare_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
deviceResult[x] = compare_(2.2, 2.1);
|
||||
deviceResult[x] = compare_(2, 1);
|
||||
deviceResult[x] = compare_('b', 'a');
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForStructTemplateFunctor(void) {
|
||||
sCompare comparefunctor;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
// pass comparefunctor to hipLaunchKernelGGL
|
||||
hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
||||
|
||||
// Validation part of TestForStructTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// Doubler calculator struct
|
||||
struct sDoublerCalculator {
|
||||
public:
|
||||
int a, result;
|
||||
// fucntor contained in class object
|
||||
DoublerFunctor doubler;
|
||||
};
|
||||
|
||||
|
||||
|
||||
// doubler functor contained in struct passed to kernel
|
||||
__global__ void DoublerCalculatorFunctorKernel(
|
||||
sDoublerCalculator doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_.doubler(doubler_.a);
|
||||
deviceResult[x] = (doubler_.result == result);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForFunctorContainInStructObj(void) {
|
||||
sDoublerCalculator Doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
Doubler.a = 5;
|
||||
Doubler.result = 10;
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
// pass comparefunctor to hipLaunchKernelGGL
|
||||
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForStructTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test that a class functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel.
|
||||
* - Test that a templated class functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel.
|
||||
* - Test that a class functor object ptr can be passed to hiplaunchparam
|
||||
* and can be used in kernel.
|
||||
* - Test that a class object containing functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a stuct functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a stuct functor object ptr can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a templated struct functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a struct object containing functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipLaunchParmFunctor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipLaunchParmFunctor") {
|
||||
HipFunctorTests FunctorTests;
|
||||
|
||||
SECTION("test for simple class functor") {
|
||||
FunctorTests.TestForSimpleClassFunctor();
|
||||
}
|
||||
SECTION("test for class objptr functor") {
|
||||
FunctorTests.TestForClassObjPtrFunctor();
|
||||
}
|
||||
SECTION("test for class templete functor") {
|
||||
FunctorTests.TestForClassTemplateFunctor();
|
||||
}
|
||||
SECTION("test for simple struct functor") {
|
||||
FunctorTests.TestForSimpleStructFunctor();
|
||||
}
|
||||
SECTION("test for struct objptr functor") {
|
||||
FunctorTests.TestForStructObjPtrFunctor();
|
||||
}
|
||||
SECTION("test for struct templete functor") {
|
||||
FunctorTests.TestForStructTemplateFunctor();
|
||||
}
|
||||
SECTION("test for functor contain in classobj") {
|
||||
FunctorTests.TestForFunctorContainInClassObj();
|
||||
}
|
||||
SECTION("test for functor contain in structobj") {
|
||||
FunctorTests.TestForFunctorContainInStructObj();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
class HipFunctorTests {
|
||||
public:
|
||||
// Test that a class functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForSimpleClassFunctor(void);
|
||||
// Test that a templated class functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForClassTemplateFunctor(void);
|
||||
// Test that a class functor object ptr can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForClassObjPtrFunctor(void);
|
||||
// Test that a class object containing functor can be passed
|
||||
// to hiplaunchparam and can be used in kernel
|
||||
void TestForFunctorContainInClassObj(void);
|
||||
// Test that a stuct functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForSimpleStructFunctor(void);
|
||||
// Test that a stuct functor object ptr can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForStructObjPtrFunctor(void);
|
||||
// Test that a templated struct functor can be passed to hiplaunchparam
|
||||
// and can be used in kernel
|
||||
void TestForStructTemplateFunctor(void);
|
||||
// Test that a struct object containing functor can be
|
||||
// passed to hiplaunchparam and can be used in kernel
|
||||
void TestForFunctorContainInStructObj(void);
|
||||
};
|
||||
|
||||
static const int BLOCK_DIM_SIZE = 1024;
|
||||
static const int THREADS_PER_BLOCK = 1;
|
||||
|
||||
// class functor tests
|
||||
|
||||
// Simple doubler Functor
|
||||
class DoublerFunctor{
|
||||
public:
|
||||
__device__ int operator()(int x) { return x * 2;}
|
||||
};
|
||||
|
||||
// simple doubler functor passed to kernel
|
||||
__global__ void DoublerFunctorKernel(
|
||||
DoublerFunctor doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForSimpleClassFunctor(void) {
|
||||
DoublerFunctor doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForSimpleClassFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// pointer functor passed to kernel
|
||||
__global__ void PtrDoublerFunctorKernel(
|
||||
DoublerFunctor *doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = (*doubler_)(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForClassObjPtrFunctor(void) {
|
||||
DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
||||
|
||||
// Validation part of TestForClassObjPtrFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
delete[] ptrdoubler;
|
||||
}
|
||||
|
||||
class compare {
|
||||
public:
|
||||
template<typename T1, typename T2>
|
||||
__device__ bool operator()(const T1& v1, const T2& v2) {
|
||||
return v1 > v2;
|
||||
}
|
||||
};
|
||||
|
||||
// template functor passed to kernel
|
||||
__global__ void TemplateFunctorKernel(
|
||||
compare compare_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
deviceResult[x] = compare_(2.2, 2.1);
|
||||
deviceResult[x] = compare_(2, 1);
|
||||
deviceResult[x] = compare_('b', 'a');
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForClassTemplateFunctor(void) {
|
||||
compare comparefunctor;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
||||
|
||||
// Validation part of TestForClassTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
|
||||
// Doubler calculator
|
||||
class DoublerCalculator {
|
||||
public:
|
||||
int a, result;
|
||||
// fucntor contained in class object
|
||||
DoublerFunctor doubler;
|
||||
};
|
||||
|
||||
// doubler functor conatined in class obj passed to kernel
|
||||
__global__ void DoublerCalculatorFunctorKernel(
|
||||
DoublerCalculator doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_.doubler(doubler_.a);
|
||||
deviceResult[x] = (doubler_.result == result);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForFunctorContainInClassObj(void) {
|
||||
DoublerCalculator Doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
Doubler.a = 5;
|
||||
Doubler.result = 10;
|
||||
// pass comparefunctor to hipLaunchParm
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForStructTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// Struct functor tests
|
||||
|
||||
// Simple doubler Functor
|
||||
struct sDoublerFunctor {
|
||||
public:
|
||||
__device__ int operator()(int x) { return x * 2;}
|
||||
};
|
||||
|
||||
|
||||
// simple sturct doubler functor passed to kernel
|
||||
__global__ void structDoublerFunctorKernel(
|
||||
sDoublerFunctor doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForSimpleStructFunctor(void) {
|
||||
sDoublerFunctor doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForSimpleStructFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// ptr functor passed to kernel
|
||||
__global__ void structPtrDoublerFunctorKernel(
|
||||
sDoublerFunctor *doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = (*doubler_)(5);
|
||||
deviceResult[x] = (result == 10);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForStructObjPtrFunctor(void) {
|
||||
sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
||||
|
||||
// Validation part of TestForStructObjPtrFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
delete[] ptrdoubler;
|
||||
}
|
||||
|
||||
struct sCompare {
|
||||
public:
|
||||
template< typename T1, typename T2 >
|
||||
__device__ bool operator()(const T1& v1, const T2& v2) {
|
||||
return v1 > v2;
|
||||
}
|
||||
};
|
||||
|
||||
// template functor passed to kernel
|
||||
__global__ void structTemplateFunctorKernel(
|
||||
sCompare compare_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
deviceResult[x] = compare_(2.2, 2.1);
|
||||
deviceResult[x] = compare_(2, 1);
|
||||
deviceResult[x] = compare_('b', 'a');
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForStructTemplateFunctor(void) {
|
||||
sCompare comparefunctor;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
// pass comparefunctor to hipLaunchKernelGGL
|
||||
hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
||||
|
||||
// Validation part of TestForStructTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
// Doubler calculator struct
|
||||
struct sDoublerCalculator {
|
||||
public:
|
||||
int a, result;
|
||||
// fucntor contained in class object
|
||||
DoublerFunctor doubler;
|
||||
};
|
||||
|
||||
|
||||
|
||||
// doubler functor contained in struct passed to kernel
|
||||
__global__ void DoublerCalculatorFunctorKernel(
|
||||
sDoublerCalculator doubler_,
|
||||
bool* deviceResult) {
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int result = doubler_.doubler(doubler_.a);
|
||||
deviceResult[x] = (doubler_.result == result);
|
||||
}
|
||||
|
||||
void HipFunctorTests::TestForFunctorContainInStructObj(void) {
|
||||
sDoublerCalculator Doubler;
|
||||
bool *deviceResults, *hostResults;
|
||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||
// initialize to false, will be set to
|
||||
// true if the functor is called in device code
|
||||
hostResults[k] = false;
|
||||
}
|
||||
|
||||
Doubler.a = 5;
|
||||
Doubler.result = 10;
|
||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
// pass comparefunctor to hipLaunchKernelGGL
|
||||
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
||||
|
||||
// Validation part of TestForStructTemplateFunctor
|
||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||
REQUIRE(hostResults[k] == true);
|
||||
HIP_CHECK(hipHostFree(hostResults));
|
||||
HIP_CHECK(hipFree(deviceResults));
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||
* @{
|
||||
* @ingroup KernelTest
|
||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||
* Method to invocate kernel functions
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test that a class functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel.
|
||||
* - Test that a templated class functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel.
|
||||
* - Test that a class functor object ptr can be passed to hiplaunchparam
|
||||
* and can be used in kernel.
|
||||
* - Test that a class object containing functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a stuct functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a stuct functor object ptr can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a templated struct functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
* - Test that a struct object containing functor can be passed to hiplaunchparam
|
||||
* and can be used in kernel
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipLaunchParmFunctor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipLaunchParmFunctor") {
|
||||
HipFunctorTests FunctorTests;
|
||||
|
||||
SECTION("test for simple class functor") {
|
||||
FunctorTests.TestForSimpleClassFunctor();
|
||||
}
|
||||
SECTION("test for class objptr functor") {
|
||||
FunctorTests.TestForClassObjPtrFunctor();
|
||||
}
|
||||
SECTION("test for class templete functor") {
|
||||
FunctorTests.TestForClassTemplateFunctor();
|
||||
}
|
||||
SECTION("test for simple struct functor") {
|
||||
FunctorTests.TestForSimpleStructFunctor();
|
||||
}
|
||||
SECTION("test for struct objptr functor") {
|
||||
FunctorTests.TestForStructObjPtrFunctor();
|
||||
}
|
||||
SECTION("test for struct templete functor") {
|
||||
FunctorTests.TestForStructTemplateFunctor();
|
||||
}
|
||||
SECTION("test for functor contain in classobj") {
|
||||
FunctorTests.TestForFunctorContainInClassObj();
|
||||
}
|
||||
SECTION("test for functor contain in structobj") {
|
||||
FunctorTests.TestForFunctorContainInStructObj();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -119,7 +119,7 @@ void verify_linked_lists_on_device(hipStream_t stream, Node* pNodes,
|
||||
unsigned int* pNumCorrect, unsigned int numLists,
|
||||
unsigned int ListLength) {
|
||||
*pNumCorrect = 0; // reset numCorrect to zero
|
||||
|
||||
|
||||
verify_linked_lists_on_device<<<(numLists + 255) / 256, 256, 0, stream>>>(pNodes, pNumCorrect,
|
||||
ListLength);
|
||||
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
|
||||
# /catch/unit/p2p folder and its dependent files.
|
||||
set(TEST_SRC
|
||||
hipDeviceGetP2PAttribute.cc
|
||||
)
|
||||
|
||||
# only for AMD
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(AMD_SRC
|
||||
hipP2pLinkTypeAndHopFunc.cc
|
||||
)
|
||||
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
||||
endif()
|
||||
|
||||
set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
|
||||
|
||||
add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
|
||||
|
||||
hip_add_exe_to_target(NAME p2pTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests)
|
||||
|
||||
add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
|
||||
# Common Tests - Test independent of all platforms
|
||||
# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
|
||||
# /catch/unit/p2p folder and its dependent files.
|
||||
set(TEST_SRC
|
||||
hipDeviceGetP2PAttribute.cc
|
||||
)
|
||||
|
||||
# only for AMD
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(AMD_SRC
|
||||
hipP2pLinkTypeAndHopFunc.cc
|
||||
)
|
||||
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
||||
endif()
|
||||
|
||||
set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
|
||||
|
||||
add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
|
||||
|
||||
hip_add_exe_to_target(NAME p2pTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests)
|
||||
|
||||
add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
|
||||
|
||||
@@ -1,356 +1,356 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hipP2pLinkTypeAndHopFunc.h"
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
#include <vector>
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
#ifdef __linux__
|
||||
void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
// disable visible_devices env from shell
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool testMaskedDevice(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
// disable visible_devices env from shell
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
uint32_t linktype;
|
||||
uint32_t hopcount;
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
|
||||
VISIBLE_DEVICE+count, &linktype, &hopcount);
|
||||
REQUIRE(err == hipSuccess);
|
||||
}
|
||||
close(fd[0]);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("Info:fork() failed\n");
|
||||
testResult = false;
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool testhipInvalidDevice(int numDevices) {
|
||||
hipError_t ret;
|
||||
uint32_t linktype;
|
||||
uint32_t hopcount;
|
||||
SECTION("Invalid device number case 1") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 2") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 3") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 4") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 5") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
bool testhipInvalidLinkType() {
|
||||
uint32_t hopcount;
|
||||
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
|
||||
&hopcount));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipInvalidHopcount() {
|
||||
uint32_t linktype;
|
||||
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipSameDevice(int numGPUs) {
|
||||
hipError_t ret;
|
||||
uint32_t linktype = 0;
|
||||
uint32_t hopcount = 0;
|
||||
for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
|
||||
ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
|
||||
bool TestPassed = true;
|
||||
// Get the unique pair of devices
|
||||
for (int x = 0; x < numDevices; x++) {
|
||||
for (int y = x+1; y < numDevices; y++) {
|
||||
uint32_t linktype1 = 0, linktype2 = 0;
|
||||
uint32_t hopcount1 = 0, hopcount2 = 0;
|
||||
HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
|
||||
&linktype1, &hopcount1));
|
||||
HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
|
||||
&linktype2, &hopcount2));
|
||||
if (hopcount1 != hopcount2) {
|
||||
TestPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal Function
|
||||
*/
|
||||
bool validateLinkType(uint32_t linktype_Hip,
|
||||
RSMI_IO_LINK_TYPE linktype_RocmSmi) {
|
||||
bool TestPassed = false;
|
||||
|
||||
if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
|
||||
(linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
|
||||
TestPassed = true;
|
||||
} else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
|
||||
(linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
|
||||
TestPassed = true;
|
||||
} else {
|
||||
printf("linktype Hip = %u, linktype RocmSmi = %u\n",
|
||||
linktype_Hip, linktype_RocmSmi);
|
||||
TestPassed = false;
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
bool testhipLinkTypeHopcountDevice(int numDevices) {
|
||||
bool TestPassed = true;
|
||||
// Opening and initializing rocm-smi library
|
||||
void *lib_rocm_smi_hdl;
|
||||
rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
|
||||
RSMI_IO_LINK_TYPE*);
|
||||
rsmi_status_t (*fntopo_init)(uint64_t);
|
||||
rsmi_status_t (*fntopo_shut_down)();
|
||||
|
||||
lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
|
||||
RTLD_LAZY);
|
||||
REQUIRE(lib_rocm_smi_hdl);
|
||||
|
||||
void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
|
||||
REQUIRE(fnsym);
|
||||
|
||||
fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
|
||||
uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
|
||||
|
||||
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
|
||||
REQUIRE(fnsym);
|
||||
fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
|
||||
|
||||
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
|
||||
REQUIRE(fnsym);
|
||||
fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
|
||||
|
||||
uint64_t init_flags = 0;
|
||||
rsmi_status_t retsmi_init;
|
||||
retsmi_init = fntopo_init(init_flags);
|
||||
REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
|
||||
|
||||
// Use rocm-smi API rsmi_topo_get_link_type() to validate
|
||||
struct devicePair {
|
||||
int device1;
|
||||
int device2;
|
||||
};
|
||||
std::vector<struct devicePair> devicePairList;
|
||||
// Get the unique pair of devices
|
||||
for (int x = 0; x < numDevices; x++) {
|
||||
for (int y = x+1; y < numDevices; y++) {
|
||||
devicePairList.push_back({x, y});
|
||||
}
|
||||
}
|
||||
for (auto pos=devicePairList.begin();
|
||||
pos != devicePairList.end(); pos++) {
|
||||
uint32_t linktype1 = 0;
|
||||
uint32_t hopcount1 = 0;
|
||||
RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
|
||||
uint64_t hopcount2 = 0;
|
||||
rsmi_status_t retsmi;
|
||||
HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
|
||||
(*pos).device2, &linktype1, &hopcount1));
|
||||
retsmi = fntopo_get_link_type((*pos).device1,
|
||||
(*pos).device2, &hopcount2, &linktype2);
|
||||
REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
|
||||
|
||||
// Validate linktype
|
||||
TestPassed = validateLinkType(linktype1, linktype2);
|
||||
}
|
||||
fntopo_shut_down();
|
||||
dlclose(lib_rocm_smi_hdl);
|
||||
return TestPassed;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
|
||||
* @{
|
||||
* @ingroup p2pTest
|
||||
* `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
|
||||
* Returns the link type and hop count between two devices
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
|
||||
* 1)Test Scenario to verify when device1 is visible and device2 is masked
|
||||
* 2)Test Scenario to verify Invalid Device Number(s)
|
||||
* 3)Test Scenario to verify when linktype = NULL
|
||||
* 4)Test Scenario to verify when hopcount = NULL
|
||||
* 5)Test Scenario to verify when device1 = device2
|
||||
* 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
|
||||
* and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
|
||||
* 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
|
||||
* GPUs with the output of rocm_smi tool.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
|
||||
int numDevices = 0;
|
||||
bool TestPassed = true;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices < 2) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return;
|
||||
}
|
||||
SECTION("Test running for testhipInvalidDevice") {
|
||||
TestPassed = testhipInvalidDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
#ifdef __linux__
|
||||
getDeviceCount(&numDevices);
|
||||
if (numDevices < 2) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return;
|
||||
}
|
||||
SECTION("Test running for testMaskedDevice") {
|
||||
TestPassed = testMaskedDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipInvalidLinkType") {
|
||||
TestPassed = testhipInvalidLinkType();
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipInvalidHopcount") {
|
||||
TestPassed = testhipInvalidHopcount();
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipSameDevice") {
|
||||
TestPassed = testhipSameDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
|
||||
TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipLinkTypeHopcountDevice") {
|
||||
TestPassed = testhipLinkTypeHopcountDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group p2pTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hipP2pLinkTypeAndHopFunc.h"
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <sys/wait.h>
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
#include <vector>
|
||||
#define MAX_SIZE 30
|
||||
#define VISIBLE_DEVICE 0
|
||||
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
#ifdef __linux__
|
||||
void getDeviceCount(int *pdevCnt) {
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
// disable visible_devices env from shell
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bool testMaskedDevice(int actualNumGPUs) {
|
||||
bool testResult = true;
|
||||
int fd[2];
|
||||
pipe(fd);
|
||||
|
||||
pid_t cPid;
|
||||
cPid = fork();
|
||||
if (cPid == 0) { // child
|
||||
hipError_t err;
|
||||
char visibleDeviceString[MAX_SIZE] = {};
|
||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||
// disable visible_devices env from shell
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||
uint32_t linktype;
|
||||
uint32_t hopcount;
|
||||
for (int count = 1;
|
||||
count < actualNumGPUs; count++) {
|
||||
err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
|
||||
VISIBLE_DEVICE+count, &linktype, &hopcount);
|
||||
REQUIRE(err == hipSuccess);
|
||||
}
|
||||
close(fd[0]);
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
|
||||
} else if (cPid > 0) { // parent
|
||||
close(fd[1]);
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
close(fd[0]);
|
||||
wait(NULL);
|
||||
|
||||
} else {
|
||||
printf("Info:fork() failed\n");
|
||||
testResult = false;
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool testhipInvalidDevice(int numDevices) {
|
||||
hipError_t ret;
|
||||
uint32_t linktype;
|
||||
uint32_t hopcount;
|
||||
SECTION("Invalid device number case 1") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 2") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 3") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 4") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
SECTION("Invalid device number case 5") {
|
||||
ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
bool testhipInvalidLinkType() {
|
||||
uint32_t hopcount;
|
||||
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
|
||||
&hopcount));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipInvalidHopcount() {
|
||||
uint32_t linktype;
|
||||
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipSameDevice(int numGPUs) {
|
||||
hipError_t ret;
|
||||
uint32_t linktype = 0;
|
||||
uint32_t hopcount = 0;
|
||||
for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
|
||||
ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
|
||||
bool TestPassed = true;
|
||||
// Get the unique pair of devices
|
||||
for (int x = 0; x < numDevices; x++) {
|
||||
for (int y = x+1; y < numDevices; y++) {
|
||||
uint32_t linktype1 = 0, linktype2 = 0;
|
||||
uint32_t hopcount1 = 0, hopcount2 = 0;
|
||||
HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
|
||||
&linktype1, &hopcount1));
|
||||
HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
|
||||
&linktype2, &hopcount2));
|
||||
if (hopcount1 != hopcount2) {
|
||||
TestPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal Function
|
||||
*/
|
||||
bool validateLinkType(uint32_t linktype_Hip,
|
||||
RSMI_IO_LINK_TYPE linktype_RocmSmi) {
|
||||
bool TestPassed = false;
|
||||
|
||||
if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
|
||||
(linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
|
||||
TestPassed = true;
|
||||
} else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
|
||||
(linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
|
||||
TestPassed = true;
|
||||
} else {
|
||||
printf("linktype Hip = %u, linktype RocmSmi = %u\n",
|
||||
linktype_Hip, linktype_RocmSmi);
|
||||
TestPassed = false;
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
bool testhipLinkTypeHopcountDevice(int numDevices) {
|
||||
bool TestPassed = true;
|
||||
// Opening and initializing rocm-smi library
|
||||
void *lib_rocm_smi_hdl;
|
||||
rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
|
||||
RSMI_IO_LINK_TYPE*);
|
||||
rsmi_status_t (*fntopo_init)(uint64_t);
|
||||
rsmi_status_t (*fntopo_shut_down)();
|
||||
|
||||
lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
|
||||
RTLD_LAZY);
|
||||
REQUIRE(lib_rocm_smi_hdl);
|
||||
|
||||
void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
|
||||
REQUIRE(fnsym);
|
||||
|
||||
fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
|
||||
uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
|
||||
|
||||
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
|
||||
REQUIRE(fnsym);
|
||||
fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
|
||||
|
||||
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
|
||||
REQUIRE(fnsym);
|
||||
fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
|
||||
|
||||
uint64_t init_flags = 0;
|
||||
rsmi_status_t retsmi_init;
|
||||
retsmi_init = fntopo_init(init_flags);
|
||||
REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
|
||||
|
||||
// Use rocm-smi API rsmi_topo_get_link_type() to validate
|
||||
struct devicePair {
|
||||
int device1;
|
||||
int device2;
|
||||
};
|
||||
std::vector<struct devicePair> devicePairList;
|
||||
// Get the unique pair of devices
|
||||
for (int x = 0; x < numDevices; x++) {
|
||||
for (int y = x+1; y < numDevices; y++) {
|
||||
devicePairList.push_back({x, y});
|
||||
}
|
||||
}
|
||||
for (auto pos=devicePairList.begin();
|
||||
pos != devicePairList.end(); pos++) {
|
||||
uint32_t linktype1 = 0;
|
||||
uint32_t hopcount1 = 0;
|
||||
RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
|
||||
uint64_t hopcount2 = 0;
|
||||
rsmi_status_t retsmi;
|
||||
HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
|
||||
(*pos).device2, &linktype1, &hopcount1));
|
||||
retsmi = fntopo_get_link_type((*pos).device1,
|
||||
(*pos).device2, &hopcount2, &linktype2);
|
||||
REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
|
||||
|
||||
// Validate linktype
|
||||
TestPassed = validateLinkType(linktype1, linktype2);
|
||||
}
|
||||
fntopo_shut_down();
|
||||
dlclose(lib_rocm_smi_hdl);
|
||||
return TestPassed;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
|
||||
* @{
|
||||
* @ingroup p2pTest
|
||||
* `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
|
||||
* Returns the link type and hop count between two devices
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
|
||||
* 1)Test Scenario to verify when device1 is visible and device2 is masked
|
||||
* 2)Test Scenario to verify Invalid Device Number(s)
|
||||
* 3)Test Scenario to verify when linktype = NULL
|
||||
* 4)Test Scenario to verify when hopcount = NULL
|
||||
* 5)Test Scenario to verify when device1 = device2
|
||||
* 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
|
||||
* and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
|
||||
* 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
|
||||
* GPUs with the output of rocm_smi tool.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
|
||||
int numDevices = 0;
|
||||
bool TestPassed = true;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices < 2) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return;
|
||||
}
|
||||
SECTION("Test running for testhipInvalidDevice") {
|
||||
TestPassed = testhipInvalidDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
#ifdef __linux__
|
||||
getDeviceCount(&numDevices);
|
||||
if (numDevices < 2) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return;
|
||||
}
|
||||
SECTION("Test running for testMaskedDevice") {
|
||||
TestPassed = testMaskedDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipInvalidLinkType") {
|
||||
TestPassed = testhipInvalidLinkType();
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipInvalidHopcount") {
|
||||
TestPassed = testhipInvalidHopcount();
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipSameDevice") {
|
||||
TestPassed = testhipSameDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
|
||||
TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
SECTION("Test running for testhipLinkTypeHopcountDevice") {
|
||||
TestPassed = testhipLinkTypeHopcountDevice(numDevices);
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
#else
|
||||
printf("This test is skipped due to non linux environment.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group p2pTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,110 +1,110 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||
#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||
/**
|
||||
* rocm_smi.h enums
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
|
||||
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
|
||||
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
|
||||
//!< action is not available for the
|
||||
//!< given input, on the given system
|
||||
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
|
||||
//!< may because the operation is not
|
||||
//!< supported by the Linux kernel
|
||||
//!< version running on the executing
|
||||
//!< machine
|
||||
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
|
||||
//!< error. Many functions require
|
||||
//!< root access to run.
|
||||
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
|
||||
//!< resource
|
||||
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
|
||||
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
|
||||
//!< allowable or safe range
|
||||
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
|
||||
//!< initializing internal data
|
||||
//!< structures
|
||||
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
|
||||
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
|
||||
//!< yet been implemented in the
|
||||
//!< current system for the current
|
||||
//!< devices
|
||||
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
|
||||
//!< found
|
||||
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
|
||||
//!< available for the operation
|
||||
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
|
||||
//!< execution of function
|
||||
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
|
||||
//!< was read
|
||||
RSMI_STATUS_NO_DATA, //!< No data was found for a given
|
||||
//!< input
|
||||
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
|
||||
//!< function is not what was expected
|
||||
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
|
||||
//!< acquired because it is already
|
||||
//!< being used
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||
//!< exceeded INT32_MAX
|
||||
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
||||
} rsmi_status_t;
|
||||
|
||||
/**
|
||||
* Types for IO Link returned from rocm_smi
|
||||
*/
|
||||
typedef enum _RSMI_IO_LINK_TYPE {
|
||||
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
|
||||
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
|
||||
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
|
||||
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
|
||||
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
|
||||
} RSMI_IO_LINK_TYPE;
|
||||
|
||||
/**
|
||||
* Types for IO Link returned from rocm runtime
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Hyper-transport bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
|
||||
/**
|
||||
* QPI bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_QPI = 1,
|
||||
/**
|
||||
* PCIe bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
|
||||
/**
|
||||
* Infiniband bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
|
||||
/**
|
||||
* xGMI link type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_XGMI = 4
|
||||
} hsa_amd_link_info_type_t;
|
||||
|
||||
#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||
#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||
/**
|
||||
* rocm_smi.h enums
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
|
||||
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
|
||||
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
|
||||
//!< action is not available for the
|
||||
//!< given input, on the given system
|
||||
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
|
||||
//!< may because the operation is not
|
||||
//!< supported by the Linux kernel
|
||||
//!< version running on the executing
|
||||
//!< machine
|
||||
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
|
||||
//!< error. Many functions require
|
||||
//!< root access to run.
|
||||
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
|
||||
//!< resource
|
||||
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
|
||||
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
|
||||
//!< allowable or safe range
|
||||
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
|
||||
//!< initializing internal data
|
||||
//!< structures
|
||||
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
|
||||
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
|
||||
//!< yet been implemented in the
|
||||
//!< current system for the current
|
||||
//!< devices
|
||||
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
|
||||
//!< found
|
||||
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
|
||||
//!< available for the operation
|
||||
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
|
||||
//!< execution of function
|
||||
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
|
||||
//!< was read
|
||||
RSMI_STATUS_NO_DATA, //!< No data was found for a given
|
||||
//!< input
|
||||
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
|
||||
//!< function is not what was expected
|
||||
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
|
||||
//!< acquired because it is already
|
||||
//!< being used
|
||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||
//!< exceeded INT32_MAX
|
||||
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
||||
} rsmi_status_t;
|
||||
|
||||
/**
|
||||
* Types for IO Link returned from rocm_smi
|
||||
*/
|
||||
typedef enum _RSMI_IO_LINK_TYPE {
|
||||
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
|
||||
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
|
||||
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
|
||||
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
|
||||
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
|
||||
} RSMI_IO_LINK_TYPE;
|
||||
|
||||
/**
|
||||
* Types for IO Link returned from rocm runtime
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* Hyper-transport bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
|
||||
/**
|
||||
* QPI bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_QPI = 1,
|
||||
/**
|
||||
* PCIe bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
|
||||
/**
|
||||
* Infiniband bus type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
|
||||
/**
|
||||
* xGMI link type.
|
||||
*/
|
||||
HSA_AMD_LINK_INFO_TYPE_XGMI = 4
|
||||
} hsa_amd_link_info_type_t;
|
||||
|
||||
#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||
|
||||
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
Diff 로드
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
Diff 로드
@@ -1,178 +1,178 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
|
||||
*/
|
||||
|
||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||
#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||
#include <string>
|
||||
|
||||
bool check_architecture(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_rdc(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_denormals_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_denormals_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_ffp_contract_off(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_ffp_contract_on(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_ffp_contract_fast(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_fast_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_fast_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_slp_vectorize_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_slp_vectorize_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_macro(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_undef_macro(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_header_dir(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_warning(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_Rpass_inline(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionerror_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionerror_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionwarning_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionwarning_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_max_thread(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_unsafe_atomic_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_unsafe_atomic_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_infinite_num_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_infinite_num_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_NAN_num_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_NAN_num_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_finite_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_finite_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_associative_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_associative_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_signed_zeros_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_signed_zeros_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_trapping_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_trapping_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
|
||||
int extra_CO_IRadded_size, const char** Combination_CO,
|
||||
int Combination_CO_size);
|
||||
|
||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
|
||||
*/
|
||||
|
||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||
#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||
#include <string>
|
||||
|
||||
bool check_architecture(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_rdc(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_denormals_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_denormals_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_ffp_contract_off(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_ffp_contract_on(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_ffp_contract_fast(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_fast_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_fast_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_slp_vectorize_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_slp_vectorize_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_macro(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_undef_macro(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_header_dir(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_warning(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_Rpass_inline(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionerror_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionerror_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionwarning_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_conversionwarning_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_max_thread(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_unsafe_atomic_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_unsafe_atomic_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_infinite_num_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_infinite_num_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_NAN_num_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_NAN_num_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_finite_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_finite_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_associative_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_associative_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_signed_zeros_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size,
|
||||
int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_signed_zeros_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_trapping_math_enabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
bool check_trapping_math_disabled(const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_pos,
|
||||
int fast_math_present);
|
||||
|
||||
std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
|
||||
int extra_CO_IRadded_size, const char** Combination_CO,
|
||||
int Combination_CO_size);
|
||||
|
||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||
|
||||
@@ -1,163 +1,163 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
RtcKernels.h contains the string's with the which includes the kernel code.
|
||||
They are utilized by the compiler option functions, defined in RtcFunctions.cpp
|
||||
*/
|
||||
|
||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||
#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||
#include <hip/hiprtc.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <math.h>
|
||||
|
||||
static constexpr auto max_thread_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void max_thread(int* a) {
|
||||
int BD = blockDim.x;
|
||||
*a = BD;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto denormals_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void denormals(double* base, double* power, double* result) {
|
||||
float denorm = powf(*base, *power);
|
||||
if (*result == 0 || *result ==1 )
|
||||
*result = (denorm==0) ? 0 : 1;
|
||||
else
|
||||
*result = powf(*base, *power);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto warning_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void warning() {
|
||||
#warning "Just printing a WARNING message onto the terminal";
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto fp32_div_sqrt_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void fp32_div_sqrt(float* result) {
|
||||
float input = 109.6209;
|
||||
*result = sqrt(input);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto error_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void error() {
|
||||
unsigned int a = -1;
|
||||
unsigned int b = +1;
|
||||
signed int c = -1;
|
||||
signed int d = +1;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto macro_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void macro(int *result) {
|
||||
*result = PI;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto undef_macro_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void undef_macro() {
|
||||
int a = Z;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto header_dir_string {
|
||||
R"(
|
||||
#include "RtcFact.h"
|
||||
extern "C"
|
||||
__global__ void header_dir(int* a, int* val) {
|
||||
*a = fact(*val);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto rdc_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void rdc(float* a, float* b, float* c) {
|
||||
*c = *a * *b;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto ffp_contract_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void ffp_contract(float* a, float* b, float* c) {
|
||||
*c = *a * *b + *c;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto slp_vectorize_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
|
||||
(*y).data.x = x.data.x + a.data.x;
|
||||
(*y).data.y = x.data.y + a.data.y;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto unsafe_atomic_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void unsafe_atomic(float* a) {
|
||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (id < 1000) {
|
||||
unsafeAtomicAdd(&a[id], 0.2f);
|
||||
}
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto amdgpu_ieee_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void amdgpu_ieee(float* a, float* b, float* c) {
|
||||
*c = sqrt(*a / *b);
|
||||
printf("sqrt(a * b) = %f\n", *c);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto associative_math_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void associative_math(int* check) {
|
||||
double x = 0.1f;
|
||||
double y = 0.2f;
|
||||
double z = 0.3f;
|
||||
if((x*y)*z != x*(y*z))
|
||||
*check = 1;
|
||||
else *check = 0;
|
||||
}
|
||||
)"};
|
||||
|
||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
RtcKernels.h contains the string's with the which includes the kernel code.
|
||||
They are utilized by the compiler option functions, defined in RtcFunctions.cpp
|
||||
*/
|
||||
|
||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||
#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||
#include <hip/hiprtc.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <math.h>
|
||||
|
||||
static constexpr auto max_thread_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void max_thread(int* a) {
|
||||
int BD = blockDim.x;
|
||||
*a = BD;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto denormals_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void denormals(double* base, double* power, double* result) {
|
||||
float denorm = powf(*base, *power);
|
||||
if (*result == 0 || *result ==1 )
|
||||
*result = (denorm==0) ? 0 : 1;
|
||||
else
|
||||
*result = powf(*base, *power);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto warning_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void warning() {
|
||||
#warning "Just printing a WARNING message onto the terminal";
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto fp32_div_sqrt_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void fp32_div_sqrt(float* result) {
|
||||
float input = 109.6209;
|
||||
*result = sqrt(input);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto error_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void error() {
|
||||
unsigned int a = -1;
|
||||
unsigned int b = +1;
|
||||
signed int c = -1;
|
||||
signed int d = +1;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto macro_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void macro(int *result) {
|
||||
*result = PI;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto undef_macro_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void undef_macro() {
|
||||
int a = Z;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto header_dir_string {
|
||||
R"(
|
||||
#include "RtcFact.h"
|
||||
extern "C"
|
||||
__global__ void header_dir(int* a, int* val) {
|
||||
*a = fact(*val);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto rdc_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void rdc(float* a, float* b, float* c) {
|
||||
*c = *a * *b;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto ffp_contract_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void ffp_contract(float* a, float* b, float* c) {
|
||||
*c = *a * *b + *c;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto slp_vectorize_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
|
||||
(*y).data.x = x.data.x + a.data.x;
|
||||
(*y).data.y = x.data.y + a.data.y;
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto unsafe_atomic_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void unsafe_atomic(float* a) {
|
||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (id < 1000) {
|
||||
unsafeAtomicAdd(&a[id], 0.2f);
|
||||
}
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto amdgpu_ieee_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void amdgpu_ieee(float* a, float* b, float* c) {
|
||||
*c = sqrt(*a / *b);
|
||||
printf("sqrt(a * b) = %f\n", *c);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto associative_math_string {
|
||||
R"(
|
||||
extern "C"
|
||||
__global__ void associative_math(int* check) {
|
||||
double x = 0.1f;
|
||||
double y = 0.2f;
|
||||
double z = 0.3f;
|
||||
if((x*y)*z != x*(y*z))
|
||||
*check = 1;
|
||||
else *check = 0;
|
||||
}
|
||||
)"};
|
||||
|
||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||
|
||||
@@ -1,53 +1,53 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
|
||||
*/
|
||||
|
||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||
#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||
#include <picojson.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
std::vector<std::string> get_combi_string_vec();
|
||||
|
||||
int split_comb_string(std::string option);
|
||||
|
||||
int calling_combination_function(std::vector<std::string> combi_vec_list);
|
||||
|
||||
int check_positive_CO_present(std::string find_string);
|
||||
|
||||
int check_negative_CO_present(std::string find_string);
|
||||
|
||||
bool calling_resp_function(const std::string block_name,
|
||||
const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_position,
|
||||
int fast_math_present);
|
||||
|
||||
picojson::array getblock_fromconfig();
|
||||
|
||||
std::string get_string_parameters(std::string para_name_to_retrieve,
|
||||
std::string block_name);
|
||||
|
||||
picojson::array get_array_parameters(std::string para_name_to_retrieve,
|
||||
std::string block_name);
|
||||
|
||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
|
||||
*/
|
||||
|
||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||
#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||
#include <picojson.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
std::vector<std::string> get_combi_string_vec();
|
||||
|
||||
int split_comb_string(std::string option);
|
||||
|
||||
int calling_combination_function(std::vector<std::string> combi_vec_list);
|
||||
|
||||
int check_positive_CO_present(std::string find_string);
|
||||
|
||||
int check_negative_CO_present(std::string find_string);
|
||||
|
||||
bool calling_resp_function(const std::string block_name,
|
||||
const char** Combination_CO,
|
||||
int Combination_CO_size, int max_thread_position,
|
||||
int fast_math_present);
|
||||
|
||||
picojson::array getblock_fromconfig();
|
||||
|
||||
std::string get_string_parameters(std::string para_name_to_retrieve,
|
||||
std::string block_name);
|
||||
|
||||
picojson::array get_array_parameters(std::string para_name_to_retrieve,
|
||||
std::string block_name);
|
||||
|
||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||
|
||||
@@ -1,25 +1,25 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
copy_coherency.cc
|
||||
)
|
||||
add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
|
||||
${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
|
||||
${HIP_PATH}/include -I
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
|
||||
${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
|
||||
# only for AMD
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(AMD_SRC
|
||||
cache_coherency_cpu_gpu.cc
|
||||
cache_coherency_gpu_gpu.cc
|
||||
)
|
||||
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
||||
endif()
|
||||
|
||||
hip_add_exe_to_target(NAME synchronizationTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++14)
|
||||
add_dependencies(synchronizationTests memcpyInt.hsaco)
|
||||
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
copy_coherency.cc
|
||||
)
|
||||
add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
|
||||
${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
|
||||
${HIP_PATH}/include -I
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
|
||||
${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
|
||||
# only for AMD
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(AMD_SRC
|
||||
cache_coherency_cpu_gpu.cc
|
||||
cache_coherency_gpu_gpu.cc
|
||||
)
|
||||
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
||||
endif()
|
||||
|
||||
hip_add_exe_to_target(NAME synchronizationTests
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++14)
|
||||
add_dependencies(synchronizationTests memcpyInt.hsaco)
|
||||
|
||||
|
||||
@@ -1,282 +1,282 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Simple test for Fine Grained CPU-GPU coherency.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
typedef _Atomic(unsigned int) atomic_uint;
|
||||
|
||||
// Helper function to spin on address until address equals value.
|
||||
// If the address holds the value of -1, abort because the other thread failed.
|
||||
__device__ int
|
||||
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||
unsigned int value) {
|
||||
unsigned int compare;
|
||||
bool check = false;
|
||||
do {
|
||||
compare = value;
|
||||
check = __opencl_atomic_compare_exchange_strong(
|
||||
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
||||
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (compare == -1)
|
||||
return -1;
|
||||
} while (!check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This kernel requires a single block, single thread dispatch.
|
||||
__global__ void
|
||||
gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
// Store data into A, system fence, and atomically mark flag.
|
||||
// This guarantees this global write is visible by device 1.
|
||||
A[i] = X[i];
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on device 1's global write to B.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
||||
*dresult = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check device 1 properly stored Y into B.
|
||||
bool stored_data_matches = (B[i] == Y[i]);
|
||||
if (!stored_data_matches) {
|
||||
// If the data does not match, alert other thread and abort.
|
||||
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
||||
i, B[i], Y[i]);
|
||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
*dresult = -1;
|
||||
}
|
||||
// Otherwise tell the other thread to continue.
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
||||
*dresult = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*dresult = 0;
|
||||
}
|
||||
|
||||
__host__ int
|
||||
cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||
unsigned int value) {
|
||||
unsigned int compare;
|
||||
bool check = false;
|
||||
do {
|
||||
compare = value;
|
||||
check = __atomic_compare_exchange_n(
|
||||
address, /*expected=*/ &compare, /*desired=*/ value,
|
||||
/*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
|
||||
if (compare == -1)
|
||||
return -1;
|
||||
} while (!check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This host thread runs only on a single CPU thread.
|
||||
__host__ void
|
||||
cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
B[i] = Y[i];
|
||||
__atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
|
||||
if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
||||
*hresult = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
bool stored_data_matches = (A[i] == X[i]);
|
||||
if (!stored_data_matches) {
|
||||
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
||||
i, A[i], X[i]);
|
||||
__atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
|
||||
*hresult = -1;
|
||||
break;
|
||||
}
|
||||
__atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
|
||||
if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
||||
*hresult = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*hresult = 0;
|
||||
}
|
||||
|
||||
static bool cpu_to_gpu_coherency() {
|
||||
int *A_d, *B_d, *X_d, *Y_d;
|
||||
int *A_res, *A_h, *B_h, *X_h, *Y_h;
|
||||
unsigned int hresult, dresult;
|
||||
size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int numDevices = 0;
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices < 1) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Skip this test if feature is not supported.
|
||||
static int device0 = 0;
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device0));
|
||||
if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
|
||||
strncmp(props.gcnArchName, "gfx940", 6) != 0) {
|
||||
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
|
||||
(hipHostMallocCoherent | hipHostMallocMapped)));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
|
||||
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
|
||||
// Initialize the arrays and atomic variables.
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
X_h[i] = 100000000 + i;
|
||||
Y_h[i] = 300000000 + i;
|
||||
}
|
||||
|
||||
// Initialize shared atomic flags between CPU and GPU.
|
||||
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
||||
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
||||
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
||||
AA1_h, 0));
|
||||
*AA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
||||
AA2_h, 0));
|
||||
*AA2_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
||||
BA1_h, 0));
|
||||
*BA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
||||
BA2_h, 0));
|
||||
*BA2_h = 0;
|
||||
|
||||
// Skip the first stream, ensure stream is non-blocking.
|
||||
hipStream_t stream[2];
|
||||
HIP_CHECK(hipStreamCreate(&stream[0]));
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||
|
||||
// Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
|
||||
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
||||
Nbytes, hipDeviceMallocFinegrained);
|
||||
REQUIRE(status == hipSuccess);
|
||||
// SVM memory - host pointer is the same as device pointer to array A.
|
||||
A_h = A_d;
|
||||
HIP_CHECK(hipMalloc(&X_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&Y_d, Nbytes));
|
||||
|
||||
HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Launch the GPU kernel.
|
||||
const unsigned blocks = 1;
|
||||
const unsigned threadsPerBlock = 1;
|
||||
hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[1],
|
||||
A_d, B_d, X_d, Y_d, N,
|
||||
AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
|
||||
// Check if launch failed.
|
||||
HIP_CHECK(hipGetLastError());
|
||||
REQUIRE(dresult == 0);
|
||||
|
||||
// Do not sync the launched stream, instead run the cpu_thread.
|
||||
std::thread host_thread(cpu_thread,
|
||||
A_h, B_h, X_h, Y_h, N,
|
||||
AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
|
||||
host_thread.detach();
|
||||
REQUIRE(hresult == 0);
|
||||
// Wait for Device side to finish.
|
||||
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
||||
|
||||
// Evaluate the resultant arrays A and B.
|
||||
A_res = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
REQUIRE(A_res[i] == (100000000 + i));
|
||||
REQUIRE(B_h[i] == (300000000 + i));
|
||||
}
|
||||
|
||||
// Free all the device and host memory allocated.
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
HIP_CHECK(hipFree(Y_d));
|
||||
HIP_CHECK(hipHostFree(AA1_h));
|
||||
HIP_CHECK(hipHostFree(AA2_h));
|
||||
HIP_CHECK(hipHostFree(BA1_h));
|
||||
HIP_CHECK(hipHostFree(BA2_h));
|
||||
HIP_CHECK(hipHostFree(B_h));
|
||||
free(X_h);
|
||||
free(Y_h);
|
||||
free(A_res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This test runs on devices where XGMI enables fine-grained communication
|
||||
* between GPUs. This performs a message passing test.
|
||||
* Array A is allocated on Device 0, and remotely on host.
|
||||
* Device 0 also increments atomic ints AA1 and AA2.
|
||||
* Array B is allocated on host, and remotely on Device 0.
|
||||
* Host also increments atomic ints BA1 and BA2.
|
||||
* Kernel will launch on Device 0, and store array X into array A.
|
||||
* Host Thread will store array Y into array B.
|
||||
* Kernel will validate that the correct values of array Y are stored in B.
|
||||
* Host Thread will validate that the correct values of array X are stored in A.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
* - Test to be run only on AMD.
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_cache_coherency_cpu_gpu") {
|
||||
bool passed = true;
|
||||
// Coherency between CPU and GPU sharing host and device memory.
|
||||
REQUIRE(passed == cpu_to_gpu_coherency());
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Simple test for Fine Grained CPU-GPU coherency.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
typedef _Atomic(unsigned int) atomic_uint;
|
||||
|
||||
// Helper function to spin on address until address equals value.
|
||||
// If the address holds the value of -1, abort because the other thread failed.
|
||||
__device__ int
|
||||
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||
unsigned int value) {
|
||||
unsigned int compare;
|
||||
bool check = false;
|
||||
do {
|
||||
compare = value;
|
||||
check = __opencl_atomic_compare_exchange_strong(
|
||||
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
||||
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (compare == -1)
|
||||
return -1;
|
||||
} while (!check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This kernel requires a single block, single thread dispatch.
|
||||
__global__ void
|
||||
gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
// Store data into A, system fence, and atomically mark flag.
|
||||
// This guarantees this global write is visible by device 1.
|
||||
A[i] = X[i];
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on device 1's global write to B.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
||||
*dresult = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check device 1 properly stored Y into B.
|
||||
bool stored_data_matches = (B[i] == Y[i]);
|
||||
if (!stored_data_matches) {
|
||||
// If the data does not match, alert other thread and abort.
|
||||
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
||||
i, B[i], Y[i]);
|
||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
*dresult = -1;
|
||||
}
|
||||
// Otherwise tell the other thread to continue.
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
||||
*dresult = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*dresult = 0;
|
||||
}
|
||||
|
||||
__host__ int
|
||||
cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||
unsigned int value) {
|
||||
unsigned int compare;
|
||||
bool check = false;
|
||||
do {
|
||||
compare = value;
|
||||
check = __atomic_compare_exchange_n(
|
||||
address, /*expected=*/ &compare, /*desired=*/ value,
|
||||
/*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
|
||||
if (compare == -1)
|
||||
return -1;
|
||||
} while (!check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This host thread runs only on a single CPU thread.
|
||||
__host__ void
|
||||
cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
B[i] = Y[i];
|
||||
__atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
|
||||
if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
||||
*hresult = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
bool stored_data_matches = (A[i] == X[i]);
|
||||
if (!stored_data_matches) {
|
||||
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
||||
i, A[i], X[i]);
|
||||
__atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
|
||||
*hresult = -1;
|
||||
break;
|
||||
}
|
||||
__atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
|
||||
if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
||||
*hresult = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*hresult = 0;
|
||||
}
|
||||
|
||||
static bool cpu_to_gpu_coherency() {
|
||||
int *A_d, *B_d, *X_d, *Y_d;
|
||||
int *A_res, *A_h, *B_h, *X_h, *Y_h;
|
||||
unsigned int hresult, dresult;
|
||||
size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int numDevices = 0;
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices < 1) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Skip this test if feature is not supported.
|
||||
static int device0 = 0;
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device0));
|
||||
if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
|
||||
strncmp(props.gcnArchName, "gfx940", 6) != 0) {
|
||||
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
|
||||
(hipHostMallocCoherent | hipHostMallocMapped)));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
|
||||
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
|
||||
// Initialize the arrays and atomic variables.
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
X_h[i] = 100000000 + i;
|
||||
Y_h[i] = 300000000 + i;
|
||||
}
|
||||
|
||||
// Initialize shared atomic flags between CPU and GPU.
|
||||
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
||||
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
||||
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
||||
AA1_h, 0));
|
||||
*AA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
||||
AA2_h, 0));
|
||||
*AA2_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
||||
BA1_h, 0));
|
||||
*BA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
||||
BA2_h, 0));
|
||||
*BA2_h = 0;
|
||||
|
||||
// Skip the first stream, ensure stream is non-blocking.
|
||||
hipStream_t stream[2];
|
||||
HIP_CHECK(hipStreamCreate(&stream[0]));
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||
|
||||
// Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
|
||||
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
||||
Nbytes, hipDeviceMallocFinegrained);
|
||||
REQUIRE(status == hipSuccess);
|
||||
// SVM memory - host pointer is the same as device pointer to array A.
|
||||
A_h = A_d;
|
||||
HIP_CHECK(hipMalloc(&X_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&Y_d, Nbytes));
|
||||
|
||||
HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Launch the GPU kernel.
|
||||
const unsigned blocks = 1;
|
||||
const unsigned threadsPerBlock = 1;
|
||||
hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[1],
|
||||
A_d, B_d, X_d, Y_d, N,
|
||||
AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
|
||||
// Check if launch failed.
|
||||
HIP_CHECK(hipGetLastError());
|
||||
REQUIRE(dresult == 0);
|
||||
|
||||
// Do not sync the launched stream, instead run the cpu_thread.
|
||||
std::thread host_thread(cpu_thread,
|
||||
A_h, B_h, X_h, Y_h, N,
|
||||
AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
|
||||
host_thread.detach();
|
||||
REQUIRE(hresult == 0);
|
||||
// Wait for Device side to finish.
|
||||
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
||||
|
||||
// Evaluate the resultant arrays A and B.
|
||||
A_res = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
REQUIRE(A_res[i] == (100000000 + i));
|
||||
REQUIRE(B_h[i] == (300000000 + i));
|
||||
}
|
||||
|
||||
// Free all the device and host memory allocated.
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(X_d));
|
||||
HIP_CHECK(hipFree(Y_d));
|
||||
HIP_CHECK(hipHostFree(AA1_h));
|
||||
HIP_CHECK(hipHostFree(AA2_h));
|
||||
HIP_CHECK(hipHostFree(BA1_h));
|
||||
HIP_CHECK(hipHostFree(BA2_h));
|
||||
HIP_CHECK(hipHostFree(B_h));
|
||||
free(X_h);
|
||||
free(Y_h);
|
||||
free(A_res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This test runs on devices where XGMI enables fine-grained communication
|
||||
* between GPUs. This performs a message passing test.
|
||||
* Array A is allocated on Device 0, and remotely on host.
|
||||
* Device 0 also increments atomic ints AA1 and AA2.
|
||||
* Array B is allocated on host, and remotely on Device 0.
|
||||
* Host also increments atomic ints BA1 and BA2.
|
||||
* Kernel will launch on Device 0, and store array X into array A.
|
||||
* Host Thread will store array Y into array B.
|
||||
* Kernel will validate that the correct values of array Y are stored in B.
|
||||
* Host Thread will validate that the correct values of array X are stored in A.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
* - Test to be run only on AMD.
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_cache_coherency_cpu_gpu") {
|
||||
bool passed = true;
|
||||
// Coherency between CPU and GPU sharing host and device memory.
|
||||
REQUIRE(passed == cpu_to_gpu_coherency());
|
||||
}
|
||||
|
||||
@@ -1,294 +1,294 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Simple test for Fine Grained GPU-GPU coherency.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
typedef _Atomic(unsigned int) atomic_uint;
|
||||
|
||||
// Helper function to spin on address until address equals value.
|
||||
// If the address holds the value of -1, abort because the other thread failed.
|
||||
__device__ int
|
||||
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||
unsigned int value) {
|
||||
unsigned int compare;
|
||||
bool check = false;
|
||||
do {
|
||||
compare = value;
|
||||
check = __opencl_atomic_compare_exchange_strong(
|
||||
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
||||
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (compare == -1)
|
||||
return -1;
|
||||
} while (!check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This kernel requires a single block, single thread dispatch.
|
||||
__global__ void
|
||||
gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
// Store data into A, system fence, and atomically mark flag.
|
||||
// This guarantees this global write is visible by device 1.
|
||||
A[i] = X[i];
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on device 1's global write to B.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
||||
*cache0_result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check device 1 properly stored Y into B.
|
||||
bool stored_data_matches = (B[i] == Y[i]);
|
||||
if (!stored_data_matches) {
|
||||
// If the data does not match, alert other thread and abort.
|
||||
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
||||
i, B[i], Y[i]);
|
||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
*cache0_result = -1;
|
||||
}
|
||||
// Otherwise tell the other thread to continue.
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
||||
*cache0_result = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*cache0_result = 0;
|
||||
}
|
||||
|
||||
// This kernel requires a single block, single thread dispatch.
|
||||
__global__ void
|
||||
gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
B[i] = Y[i];
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
||||
*cache1_result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
bool stored_data_matches = (A[i] == X[i]);
|
||||
if (!stored_data_matches) {
|
||||
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
||||
i, A[i], X[i]);
|
||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
*cache1_result = -1;
|
||||
}
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
||||
*cache1_result = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*cache1_result = 0;
|
||||
}
|
||||
|
||||
static bool gpu_to_gpu_coherency() {
|
||||
int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
|
||||
int *A_h, *B_h, *X_h, *Y_h;
|
||||
unsigned int cache0_result, cache1_result;
|
||||
size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int numDevices = 0;
|
||||
int numTestDevices = 2;
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices < numTestDevices) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Skip this test if either device does not support this feature.
|
||||
hipDeviceProp_t props0, props1;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props0, 0));
|
||||
HIP_CHECK(hipGetDeviceProperties(&props1, 1));
|
||||
if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
|
||||
strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
|
||||
(strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
|
||||
strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
|
||||
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate Host Side Memory.
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
|
||||
// Initialize the arrays and atomic variables.
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
X_h[i] = 100000000 + i;
|
||||
Y_h[i] = 300000000 + i;
|
||||
}
|
||||
|
||||
// Initialize shared atomic flags on host coherent memory.
|
||||
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
||||
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
||||
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
||||
AA1_h, 0));
|
||||
*AA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
||||
AA2_h, 0));
|
||||
*AA2_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
||||
BA1_h, 0));
|
||||
*BA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
||||
BA2_h, 0));
|
||||
*BA2_h = 0;
|
||||
|
||||
// Skip the first stream.
|
||||
hipStream_t stream[3];
|
||||
HIP_CHECK(hipStreamCreate(&stream[0]));
|
||||
|
||||
// Set-up Device 0.
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
// Enable P2P access to Device 1.
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||
// Allocating Coherent Memory for Array A_d on Device 0.
|
||||
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
||||
Nbytes, hipDeviceMallocFinegrained);
|
||||
REQUIRE(status == hipSuccess);
|
||||
HIP_CHECK(hipMalloc(&X_d0, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
|
||||
|
||||
// Set-up Device 1.
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
// Enable P2P access to Device 0.
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
|
||||
// Allocating Coherent Memory for Array B_d on Device 1.
|
||||
printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
|
||||
Nbytes, hipDeviceMallocFinegrained);
|
||||
REQUIRE(status == hipSuccess);
|
||||
HIP_CHECK(hipMalloc(&X_d1, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
|
||||
|
||||
// Transfer initialized data onto the device arrays.
|
||||
HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Prepare and launch the device kernels.
|
||||
const unsigned blocks = 1;
|
||||
const unsigned threadsPerBlock = 1;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[1],
|
||||
A_d, B_d, X_d0, Y_d0, N,
|
||||
AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
|
||||
// Check if launch failed.
|
||||
HIP_CHECK(hipGetLastError());
|
||||
REQUIRE(cache0_result == 0);
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[2],
|
||||
A_d, B_d, X_d1, Y_d1, N,
|
||||
AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
REQUIRE(cache1_result == 0);
|
||||
|
||||
// Wait for kernels on both devices.
|
||||
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream[2]));
|
||||
|
||||
// Evaluate the resultant arrays A and B.
|
||||
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
REQUIRE(A_h[i] == (100000000 + i));
|
||||
REQUIRE(B_h[i] == (300000000 + i));
|
||||
}
|
||||
|
||||
// Free all the device and host memory allocated.
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(X_d0));
|
||||
HIP_CHECK(hipFree(Y_d0));
|
||||
HIP_CHECK(hipFree(X_d1));
|
||||
HIP_CHECK(hipFree(Y_d1));
|
||||
HIP_CHECK(hipHostFree(AA1_h));
|
||||
HIP_CHECK(hipHostFree(AA2_h));
|
||||
HIP_CHECK(hipHostFree(BA1_h));
|
||||
HIP_CHECK(hipHostFree(BA2_h));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
free(X_h);
|
||||
free(Y_h);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This test runs on devices where XGMI enables fine-grained communication
|
||||
* between GPUs. This performs a message passing test.
|
||||
* Array A is allocated on Device 0, and remotely on Device 1.
|
||||
* Device 0 also increments atomic ints AA1 and AA2.
|
||||
* Array B is allocated on Device 1, and remotely on Device 0.
|
||||
* Device 1 also increments atomic ints BA1 and BA2.
|
||||
* Kernel 0 will launch on Device 0, and store array X into array A.
|
||||
* Kernel 1 will launch on Device 1, and store array Y into array B.
|
||||
* Kernel 0 will validate that the correct values of array Y are stored in B.
|
||||
* Kernel 1 will validate that the correct values of array X are stored in A.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
* - Test to be run only on AMD.
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_cache_coherency_gpu_gpu") {
|
||||
bool passed = true;
|
||||
// Coherency between GPUs accessing local or remote FB.
|
||||
REQUIRE(passed == gpu_to_gpu_coherency());
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
// Simple test for Fine Grained GPU-GPU coherency.
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
typedef _Atomic(unsigned int) atomic_uint;
|
||||
|
||||
// Helper function to spin on address until address equals value.
|
||||
// If the address holds the value of -1, abort because the other thread failed.
|
||||
__device__ int
|
||||
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||
unsigned int value) {
|
||||
unsigned int compare;
|
||||
bool check = false;
|
||||
do {
|
||||
compare = value;
|
||||
check = __opencl_atomic_compare_exchange_strong(
|
||||
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
||||
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (compare == -1)
|
||||
return -1;
|
||||
} while (!check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This kernel requires a single block, single thread dispatch.
|
||||
__global__ void
|
||||
gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
// Store data into A, system fence, and atomically mark flag.
|
||||
// This guarantees this global write is visible by device 1.
|
||||
A[i] = X[i];
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on device 1's global write to B.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
||||
*cache0_result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
// Check device 1 properly stored Y into B.
|
||||
bool stored_data_matches = (B[i] == Y[i]);
|
||||
if (!stored_data_matches) {
|
||||
// If the data does not match, alert other thread and abort.
|
||||
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
||||
i, B[i], Y[i]);
|
||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
*cache0_result = -1;
|
||||
}
|
||||
// Otherwise tell the other thread to continue.
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
||||
*cache0_result = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*cache0_result = 0;
|
||||
}
|
||||
|
||||
// This kernel requires a single block, single thread dispatch.
|
||||
__global__ void
|
||||
gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
|
||||
unsigned int *AA1, unsigned int *AA2,
|
||||
unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
B[i] = Y[i];
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
||||
*cache1_result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
bool stored_data_matches = (A[i] == X[i]);
|
||||
if (!stored_data_matches) {
|
||||
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
||||
i, A[i], X[i]);
|
||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
*cache1_result = -1;
|
||||
}
|
||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
|
||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||
if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
||||
*cache1_result = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*cache1_result = 0;
|
||||
}
|
||||
|
||||
static bool gpu_to_gpu_coherency() {
|
||||
int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
|
||||
int *A_h, *B_h, *X_h, *Y_h;
|
||||
unsigned int cache0_result, cache1_result;
|
||||
size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int numDevices = 0;
|
||||
int numTestDevices = 2;
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (numDevices < numTestDevices) {
|
||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Skip this test if either device does not support this feature.
|
||||
hipDeviceProp_t props0, props1;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props0, 0));
|
||||
HIP_CHECK(hipGetDeviceProperties(&props1, 1));
|
||||
if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
|
||||
strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
|
||||
(strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
|
||||
strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
|
||||
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate Host Side Memory.
|
||||
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||
|
||||
// Initialize the arrays and atomic variables.
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
X_h[i] = 100000000 + i;
|
||||
Y_h[i] = 300000000 + i;
|
||||
}
|
||||
|
||||
// Initialize shared atomic flags on host coherent memory.
|
||||
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
||||
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
||||
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
||||
AA1_h, 0));
|
||||
*AA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
||||
AA2_h, 0));
|
||||
*AA2_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
||||
BA1_h, 0));
|
||||
*BA1_h = 0;
|
||||
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
||||
BA2_h, 0));
|
||||
*BA2_h = 0;
|
||||
|
||||
// Skip the first stream.
|
||||
hipStream_t stream[3];
|
||||
HIP_CHECK(hipStreamCreate(&stream[0]));
|
||||
|
||||
// Set-up Device 0.
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
// Enable P2P access to Device 1.
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||
// Allocating Coherent Memory for Array A_d on Device 0.
|
||||
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
||||
Nbytes, hipDeviceMallocFinegrained);
|
||||
REQUIRE(status == hipSuccess);
|
||||
HIP_CHECK(hipMalloc(&X_d0, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
|
||||
|
||||
// Set-up Device 1.
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
// Enable P2P access to Device 0.
|
||||
HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
|
||||
// Allocating Coherent Memory for Array B_d on Device 1.
|
||||
printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||
status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
|
||||
Nbytes, hipDeviceMallocFinegrained);
|
||||
REQUIRE(status == hipSuccess);
|
||||
HIP_CHECK(hipMalloc(&X_d1, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
|
||||
|
||||
// Transfer initialized data onto the device arrays.
|
||||
HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
// Prepare and launch the device kernels.
|
||||
const unsigned blocks = 1;
|
||||
const unsigned threadsPerBlock = 1;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[1],
|
||||
A_d, B_d, X_d0, Y_d0, N,
|
||||
AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
|
||||
// Check if launch failed.
|
||||
HIP_CHECK(hipGetLastError());
|
||||
REQUIRE(cache0_result == 0);
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[2],
|
||||
A_d, B_d, X_d1, Y_d1, N,
|
||||
AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
REQUIRE(cache1_result == 0);
|
||||
|
||||
// Wait for kernels on both devices.
|
||||
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream[2]));
|
||||
|
||||
// Evaluate the resultant arrays A and B.
|
||||
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
REQUIRE(A_h[i] == (100000000 + i));
|
||||
REQUIRE(B_h[i] == (300000000 + i));
|
||||
}
|
||||
|
||||
// Free all the device and host memory allocated.
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(B_d));
|
||||
HIP_CHECK(hipFree(X_d0));
|
||||
HIP_CHECK(hipFree(Y_d0));
|
||||
HIP_CHECK(hipFree(X_d1));
|
||||
HIP_CHECK(hipFree(Y_d1));
|
||||
HIP_CHECK(hipHostFree(AA1_h));
|
||||
HIP_CHECK(hipHostFree(AA2_h));
|
||||
HIP_CHECK(hipHostFree(BA1_h));
|
||||
HIP_CHECK(hipHostFree(BA2_h));
|
||||
free(A_h);
|
||||
free(B_h);
|
||||
free(X_h);
|
||||
free(Y_h);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - This test runs on devices where XGMI enables fine-grained communication
|
||||
* between GPUs. This performs a message passing test.
|
||||
* Array A is allocated on Device 0, and remotely on Device 1.
|
||||
* Device 0 also increments atomic ints AA1 and AA2.
|
||||
* Array B is allocated on Device 1, and remotely on Device 0.
|
||||
* Device 1 also increments atomic ints BA1 and BA2.
|
||||
* Kernel 0 will launch on Device 0, and store array X into array A.
|
||||
* Kernel 1 will launch on Device 1, and store array Y into array B.
|
||||
* Kernel 0 will validate that the correct values of array Y are stored in B.
|
||||
* Kernel 1 will validate that the correct values of array X are stored in A.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
* - Test to be run only on AMD.
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_cache_coherency_gpu_gpu") {
|
||||
bool passed = true;
|
||||
// Coherency between GPUs accessing local or remote FB.
|
||||
REQUIRE(passed == gpu_to_gpu_coherency());
|
||||
}
|
||||
|
||||
@@ -1,340 +1,340 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
unsigned threadsPerBlock = 256;
|
||||
unsigned blocksPerCU = 6;
|
||||
|
||||
class MemcpyFunction {
|
||||
public:
|
||||
MemcpyFunction(const char* fileName, const char* functionName) {
|
||||
load(fileName, functionName);
|
||||
}
|
||||
void load(const char* fileName, const char* functionName);
|
||||
void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
|
||||
|
||||
private:
|
||||
hipFunction_t _function;
|
||||
hipModule_t _module;
|
||||
};
|
||||
|
||||
|
||||
void MemcpyFunction::load(const char* fileName, const char* functionName) {
|
||||
HIP_CHECK(hipModuleLoad(&_module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
|
||||
}
|
||||
|
||||
void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
|
||||
struct {
|
||||
int* _dst;
|
||||
const int* _src;
|
||||
size_t _numElements;
|
||||
} args;
|
||||
|
||||
args._dst = dst;
|
||||
args._src = src;
|
||||
args._numElements = numElements;
|
||||
|
||||
size_t size = sizeof(args);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
|
||||
numElements);
|
||||
HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
|
||||
1, 1, 0, s, NULL,
|
||||
reinterpret_cast<void**>(&config)));
|
||||
}
|
||||
|
||||
bool g_warnOnFail = true;
|
||||
int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
|
||||
|
||||
// Set value of array to specified 32-bit integer:
|
||||
__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
|
||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = gid; i < numElements; i += stride) {
|
||||
ptr[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
|
||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = gid; i < numElements; i += stride) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Check arrays in reverse order, to more easily detect cases where
|
||||
// the copy is "partially" done.
|
||||
void checkReverse(const int* ptr, int numElements, int expected) {
|
||||
int mismatchCnt = 0;
|
||||
for (int i = numElements - 1; i >= 0; i--) {
|
||||
if (!g_warnOnFail) {
|
||||
REQUIRE(ptr[i] == expected);
|
||||
}
|
||||
if (++mismatchCnt >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define ENUM_CASE_STR(x) \
|
||||
case x: \
|
||||
return #x
|
||||
|
||||
enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
|
||||
|
||||
const char* CmdTypeStr(CmdType c) {
|
||||
switch (c) {
|
||||
ENUM_CASE_STR(COPY);
|
||||
ENUM_CASE_STR(KERNEL);
|
||||
ENUM_CASE_STR(MODULE_KERNEL);
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
enum SyncType {
|
||||
NONE,
|
||||
EVENT_QUERY,
|
||||
EVENT_SYNC,
|
||||
STREAM_WAIT_EVENT,
|
||||
STREAM_QUERY,
|
||||
STREAM_SYNC,
|
||||
DEVICE_SYNC,
|
||||
MAX_SyncType
|
||||
};
|
||||
|
||||
const char* SyncTypeStr(SyncType s) {
|
||||
switch (s) {
|
||||
ENUM_CASE_STR(NONE);
|
||||
ENUM_CASE_STR(EVENT_QUERY);
|
||||
ENUM_CASE_STR(EVENT_SYNC);
|
||||
ENUM_CASE_STR(STREAM_WAIT_EVENT);
|
||||
ENUM_CASE_STR(STREAM_QUERY);
|
||||
ENUM_CASE_STR(STREAM_SYNC);
|
||||
ENUM_CASE_STR(DEVICE_SYNC);
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
|
||||
size_t numElements) {
|
||||
switch (cmd) {
|
||||
case COPY:
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(dst, src, numElements * sizeof(int),
|
||||
hipMemcpyDeviceToDevice, s));
|
||||
break;
|
||||
case KERNEL: {
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||
threadsPerBlock, numElements);
|
||||
hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, s, dst, src, numElements);
|
||||
} break;
|
||||
case MODULE_KERNEL: {
|
||||
MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
|
||||
g_moduleMemcpy.launch(dst, src, numElements, s);
|
||||
} break;
|
||||
default:
|
||||
printf("Info:unknown cmd=%d type", cmd);
|
||||
}
|
||||
}
|
||||
|
||||
void resetInputs(int* Ad, int* Bd, int* Ch,
|
||||
size_t numElements, int expected) {
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||
threadsPerBlock, numElements);
|
||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, hipStream_t(0), Ad, expected, numElements);
|
||||
// poison with bad value to ensure is overwritten correctly
|
||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
|
||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, hipStream_t(0), Bd, 0xF000BA55, numElements);
|
||||
memset(Ch, 13, numElements * sizeof(int));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// Intended to test proper synchronization and cache flushing
|
||||
// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
|
||||
// using either hipMemcpyAsync or kernel implementations.
|
||||
// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
|
||||
// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
|
||||
// Correct result at the end is that Ch contains the
|
||||
// contents originally in Ad (integer 0x42)
|
||||
|
||||
void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
|
||||
hipStream_t stream1, hipStream_t stream2, int numElements,
|
||||
int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
|
||||
hipEvent_t e;
|
||||
HIP_CHECK(hipEventCreateWithFlags(&e, 0));
|
||||
|
||||
resetInputs(Ad, Bd, Ch, numElements, expected);
|
||||
|
||||
const size_t sizeElements = numElements * sizeof(int);
|
||||
fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
|
||||
sizeElements, static_cast<double>(sizeElements / 1024.0),
|
||||
CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
|
||||
|
||||
/*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
|
||||
fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
|
||||
return;
|
||||
}*/
|
||||
|
||||
// Step A:
|
||||
runCmd(cmdAType, Bd, Ad, stream1, numElements);
|
||||
|
||||
// Sync in-between?
|
||||
switch (syncType) {
|
||||
case NONE:
|
||||
break;
|
||||
case EVENT_QUERY: {
|
||||
hipError_t st = hipErrorNotReady;
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
do {
|
||||
st = hipEventQuery(e);
|
||||
} while (st == hipErrorNotReady);
|
||||
HIP_CHECK(st);
|
||||
} break;
|
||||
case EVENT_SYNC:
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
HIP_CHECK(hipEventSynchronize(e));
|
||||
break;
|
||||
case STREAM_WAIT_EVENT:
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
|
||||
break;
|
||||
case STREAM_QUERY: {
|
||||
hipError_t st = hipErrorNotReady;
|
||||
do {
|
||||
st = hipStreamQuery(stream1);
|
||||
} while (st == hipErrorNotReady);
|
||||
HIP_CHECK(st);
|
||||
} break;
|
||||
case STREAM_SYNC:
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
break;
|
||||
case DEVICE_SYNC:
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
|
||||
return;
|
||||
}
|
||||
runCmd(cmdBType, Cd, Bd, stream2, numElements);
|
||||
|
||||
// Copy back to host, use async copy to avoid any extra synchronization
|
||||
// that might mask issues.
|
||||
HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
|
||||
stream2));
|
||||
HIP_CHECK(hipStreamSynchronize(stream2));
|
||||
|
||||
checkReverse(Ch, numElements, expected);
|
||||
|
||||
HIP_CHECK(hipEventDestroy(e));
|
||||
}
|
||||
|
||||
void testWrapper(size_t numElements) {
|
||||
const size_t sizeElements = numElements * sizeof(int);
|
||||
const int expected = 0x42;
|
||||
int *Ad, *Bd, *Cd, *Ch;
|
||||
|
||||
HIP_CHECK(hipMalloc(&Ad, sizeElements));
|
||||
HIP_CHECK(hipMalloc(&Bd, sizeElements));
|
||||
HIP_CHECK(hipMalloc(&Cd, sizeElements));
|
||||
HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
|
||||
|
||||
hipStream_t stream1, stream2;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
|
||||
Ad, Bd, Cd, Ch, expected);
|
||||
|
||||
for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
|
||||
for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
|
||||
for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
|
||||
switch (syncMode) {
|
||||
// case NONE::
|
||||
case EVENT_QUERY:
|
||||
case EVENT_SYNC:
|
||||
case STREAM_WAIT_EVENT:
|
||||
// case STREAM_QUERY:
|
||||
case STREAM_SYNC:
|
||||
case DEVICE_SYNC:
|
||||
runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
|
||||
stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
|
||||
numElements, Ad, Bd, Cd, Ch, expected);
|
||||
runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
|
||||
Ad, Bd, Cd, Ch, expected);
|
||||
runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
|
||||
numElements, Ad, Bd, Cd, Ch, expected);
|
||||
runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
|
||||
Ad, Bd, Cd, Ch, expected);
|
||||
#endif
|
||||
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(Cd));
|
||||
HIP_CHECK(hipHostFree(Ch));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test cache management (fences) and synchronization between
|
||||
* kernel and copy commands. Exhaustively tests 3 command types
|
||||
* (copy, kernel, module kernel), many sync types (see SyncType), followed by
|
||||
* another command, across a sweep of data sizes designed to stress
|
||||
* various levels of the memory hierarchy.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/synchronization/copy_coherency.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_Copy_Coherency") {
|
||||
for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
|
||||
size_t numElements = g_elementSizes[index];
|
||||
testWrapper(numElements);
|
||||
}
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
unsigned threadsPerBlock = 256;
|
||||
unsigned blocksPerCU = 6;
|
||||
|
||||
class MemcpyFunction {
|
||||
public:
|
||||
MemcpyFunction(const char* fileName, const char* functionName) {
|
||||
load(fileName, functionName);
|
||||
}
|
||||
void load(const char* fileName, const char* functionName);
|
||||
void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
|
||||
|
||||
private:
|
||||
hipFunction_t _function;
|
||||
hipModule_t _module;
|
||||
};
|
||||
|
||||
|
||||
void MemcpyFunction::load(const char* fileName, const char* functionName) {
|
||||
HIP_CHECK(hipModuleLoad(&_module, fileName));
|
||||
HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
|
||||
}
|
||||
|
||||
void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
|
||||
struct {
|
||||
int* _dst;
|
||||
const int* _src;
|
||||
size_t _numElements;
|
||||
} args;
|
||||
|
||||
args._dst = dst;
|
||||
args._src = src;
|
||||
args._numElements = numElements;
|
||||
|
||||
size_t size = sizeof(args);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
|
||||
numElements);
|
||||
HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
|
||||
1, 1, 0, s, NULL,
|
||||
reinterpret_cast<void**>(&config)));
|
||||
}
|
||||
|
||||
bool g_warnOnFail = true;
|
||||
int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
|
||||
|
||||
// Set value of array to specified 32-bit integer:
|
||||
__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
|
||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = gid; i < numElements; i += stride) {
|
||||
ptr[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
|
||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (size_t i = gid; i < numElements; i += stride) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Check arrays in reverse order, to more easily detect cases where
|
||||
// the copy is "partially" done.
|
||||
void checkReverse(const int* ptr, int numElements, int expected) {
|
||||
int mismatchCnt = 0;
|
||||
for (int i = numElements - 1; i >= 0; i--) {
|
||||
if (!g_warnOnFail) {
|
||||
REQUIRE(ptr[i] == expected);
|
||||
}
|
||||
if (++mismatchCnt >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define ENUM_CASE_STR(x) \
|
||||
case x: \
|
||||
return #x
|
||||
|
||||
enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
|
||||
|
||||
const char* CmdTypeStr(CmdType c) {
|
||||
switch (c) {
|
||||
ENUM_CASE_STR(COPY);
|
||||
ENUM_CASE_STR(KERNEL);
|
||||
ENUM_CASE_STR(MODULE_KERNEL);
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
enum SyncType {
|
||||
NONE,
|
||||
EVENT_QUERY,
|
||||
EVENT_SYNC,
|
||||
STREAM_WAIT_EVENT,
|
||||
STREAM_QUERY,
|
||||
STREAM_SYNC,
|
||||
DEVICE_SYNC,
|
||||
MAX_SyncType
|
||||
};
|
||||
|
||||
const char* SyncTypeStr(SyncType s) {
|
||||
switch (s) {
|
||||
ENUM_CASE_STR(NONE);
|
||||
ENUM_CASE_STR(EVENT_QUERY);
|
||||
ENUM_CASE_STR(EVENT_SYNC);
|
||||
ENUM_CASE_STR(STREAM_WAIT_EVENT);
|
||||
ENUM_CASE_STR(STREAM_QUERY);
|
||||
ENUM_CASE_STR(STREAM_SYNC);
|
||||
ENUM_CASE_STR(DEVICE_SYNC);
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
|
||||
size_t numElements) {
|
||||
switch (cmd) {
|
||||
case COPY:
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(dst, src, numElements * sizeof(int),
|
||||
hipMemcpyDeviceToDevice, s));
|
||||
break;
|
||||
case KERNEL: {
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||
threadsPerBlock, numElements);
|
||||
hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, s, dst, src, numElements);
|
||||
} break;
|
||||
case MODULE_KERNEL: {
|
||||
MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
|
||||
g_moduleMemcpy.launch(dst, src, numElements, s);
|
||||
} break;
|
||||
default:
|
||||
printf("Info:unknown cmd=%d type", cmd);
|
||||
}
|
||||
}
|
||||
|
||||
void resetInputs(int* Ad, int* Bd, int* Ch,
|
||||
size_t numElements, int expected) {
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||
threadsPerBlock, numElements);
|
||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, hipStream_t(0), Ad, expected, numElements);
|
||||
// poison with bad value to ensure is overwritten correctly
|
||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
|
||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, hipStream_t(0), Bd, 0xF000BA55, numElements);
|
||||
memset(Ch, 13, numElements * sizeof(int));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// Intended to test proper synchronization and cache flushing
|
||||
// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
|
||||
// using either hipMemcpyAsync or kernel implementations.
|
||||
// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
|
||||
// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
|
||||
// Correct result at the end is that Ch contains the
|
||||
// contents originally in Ad (integer 0x42)
|
||||
|
||||
void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
|
||||
hipStream_t stream1, hipStream_t stream2, int numElements,
|
||||
int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
|
||||
hipEvent_t e;
|
||||
HIP_CHECK(hipEventCreateWithFlags(&e, 0));
|
||||
|
||||
resetInputs(Ad, Bd, Ch, numElements, expected);
|
||||
|
||||
const size_t sizeElements = numElements * sizeof(int);
|
||||
fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
|
||||
sizeElements, static_cast<double>(sizeElements / 1024.0),
|
||||
CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
|
||||
|
||||
/*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
|
||||
fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
|
||||
return;
|
||||
}*/
|
||||
|
||||
// Step A:
|
||||
runCmd(cmdAType, Bd, Ad, stream1, numElements);
|
||||
|
||||
// Sync in-between?
|
||||
switch (syncType) {
|
||||
case NONE:
|
||||
break;
|
||||
case EVENT_QUERY: {
|
||||
hipError_t st = hipErrorNotReady;
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
do {
|
||||
st = hipEventQuery(e);
|
||||
} while (st == hipErrorNotReady);
|
||||
HIP_CHECK(st);
|
||||
} break;
|
||||
case EVENT_SYNC:
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
HIP_CHECK(hipEventSynchronize(e));
|
||||
break;
|
||||
case STREAM_WAIT_EVENT:
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
|
||||
break;
|
||||
case STREAM_QUERY: {
|
||||
hipError_t st = hipErrorNotReady;
|
||||
do {
|
||||
st = hipStreamQuery(stream1);
|
||||
} while (st == hipErrorNotReady);
|
||||
HIP_CHECK(st);
|
||||
} break;
|
||||
case STREAM_SYNC:
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
break;
|
||||
case DEVICE_SYNC:
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
|
||||
return;
|
||||
}
|
||||
runCmd(cmdBType, Cd, Bd, stream2, numElements);
|
||||
|
||||
// Copy back to host, use async copy to avoid any extra synchronization
|
||||
// that might mask issues.
|
||||
HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
|
||||
stream2));
|
||||
HIP_CHECK(hipStreamSynchronize(stream2));
|
||||
|
||||
checkReverse(Ch, numElements, expected);
|
||||
|
||||
HIP_CHECK(hipEventDestroy(e));
|
||||
}
|
||||
|
||||
void testWrapper(size_t numElements) {
|
||||
const size_t sizeElements = numElements * sizeof(int);
|
||||
const int expected = 0x42;
|
||||
int *Ad, *Bd, *Cd, *Ch;
|
||||
|
||||
HIP_CHECK(hipMalloc(&Ad, sizeElements));
|
||||
HIP_CHECK(hipMalloc(&Bd, sizeElements));
|
||||
HIP_CHECK(hipMalloc(&Cd, sizeElements));
|
||||
HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
|
||||
|
||||
hipStream_t stream1, stream2;
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
|
||||
Ad, Bd, Cd, Ch, expected);
|
||||
|
||||
for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
|
||||
for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
|
||||
for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
|
||||
switch (syncMode) {
|
||||
// case NONE::
|
||||
case EVENT_QUERY:
|
||||
case EVENT_SYNC:
|
||||
case STREAM_WAIT_EVENT:
|
||||
// case STREAM_QUERY:
|
||||
case STREAM_SYNC:
|
||||
case DEVICE_SYNC:
|
||||
runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
|
||||
stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
|
||||
numElements, Ad, Bd, Cd, Ch, expected);
|
||||
runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
|
||||
Ad, Bd, Cd, Ch, expected);
|
||||
runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
|
||||
numElements, Ad, Bd, Cd, Ch, expected);
|
||||
runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
|
||||
Ad, Bd, Cd, Ch, expected);
|
||||
#endif
|
||||
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipFree(Cd));
|
||||
HIP_CHECK(hipHostFree(Ch));
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test cache management (fences) and synchronization between
|
||||
* kernel and copy commands. Exhaustively tests 3 command types
|
||||
* (copy, kernel, module kernel), many sync types (see SyncType), followed by
|
||||
* another command, across a sweep of data sizes designed to stress
|
||||
* various levels of the memory hierarchy.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/synchronization/copy_coherency.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.5
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_Copy_Coherency") {
|
||||
for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
|
||||
size_t numElements = g_elementSizes[index];
|
||||
testWrapper(numElements);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,182 +1,182 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
|
||||
#define WIDTH 4
|
||||
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
T val = in[x];
|
||||
for (int i = 0; i < width; i++) {
|
||||
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
|
||||
}
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
template <typename T>
|
||||
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void getFactor(int* fact) { *fact = 101; }
|
||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||
static void getFactor(float* fact) { *fact = 2.5; }
|
||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||
static void getFactor(double* fact) { *fact = 2.5; }
|
||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||
|
||||
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <typename T> void init(T* Matrix) {
|
||||
// initialize the input data
|
||||
T factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (T)i + factor;
|
||||
}
|
||||
}
|
||||
|
||||
template <> void init(__half* Matrix) {
|
||||
// initialize the input data
|
||||
__half factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = i + __half2float(factor);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> static void runTest() {
|
||||
T* Matrix;
|
||||
T* TransposeMatrix;
|
||||
T* cpuTransposeMatrix;
|
||||
|
||||
T* gpuMatrix;
|
||||
T* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
int errors = 0;
|
||||
|
||||
Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||
TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||
cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||
|
||||
init(Matrix);
|
||||
|
||||
// allocate the memory on the device side
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
|
||||
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
|
||||
// free the resources on device side
|
||||
HIP_CHECK(hipFree(gpuMatrix));
|
||||
HIP_CHECK(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl(T var, int srcLane, int width=warpSize)` -
|
||||
* Contains wrap __shfl functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflTests.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipShflTests") {
|
||||
SECTION("run test for int") { runTest<int>(); }
|
||||
SECTION("run test for float") { runTest<float>(); }
|
||||
SECTION("run test for double") { runTest<double>(); }
|
||||
// Test added to support half datatype.
|
||||
SECTION("run test for __half") { runTest<__half>(); }
|
||||
SECTION("run test for int64_t") { runTest<int64_t>(); }
|
||||
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
|
||||
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group ShflTest.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
|
||||
#define WIDTH 4
|
||||
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
T val = in[x];
|
||||
for (int i = 0; i < width; i++) {
|
||||
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
|
||||
}
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
template <typename T>
|
||||
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void getFactor(int* fact) { *fact = 101; }
|
||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||
static void getFactor(float* fact) { *fact = 2.5; }
|
||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||
static void getFactor(double* fact) { *fact = 2.5; }
|
||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||
|
||||
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <typename T> void init(T* Matrix) {
|
||||
// initialize the input data
|
||||
T factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (T)i + factor;
|
||||
}
|
||||
}
|
||||
|
||||
template <> void init(__half* Matrix) {
|
||||
// initialize the input data
|
||||
__half factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = i + __half2float(factor);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> static void runTest() {
|
||||
T* Matrix;
|
||||
T* TransposeMatrix;
|
||||
T* cpuTransposeMatrix;
|
||||
|
||||
T* gpuMatrix;
|
||||
T* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
int errors = 0;
|
||||
|
||||
Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||
TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||
cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||
|
||||
init(Matrix);
|
||||
|
||||
// allocate the memory on the device side
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
|
||||
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
|
||||
// free the resources on device side
|
||||
HIP_CHECK(hipFree(gpuMatrix));
|
||||
HIP_CHECK(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl(T var, int srcLane, int width=warpSize)` -
|
||||
* Contains wrap __shfl functions.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflTests.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipShflTests") {
|
||||
SECTION("run test for int") { runTest<int>(); }
|
||||
SECTION("run test for float") { runTest<float>(); }
|
||||
SECTION("run test for double") { runTest<double>(); }
|
||||
// Test added to support half datatype.
|
||||
SECTION("run test for __half") { runTest<__half>(); }
|
||||
SECTION("run test for int64_t") { runTest<int64_t>(); }
|
||||
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
|
||||
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
|
||||
}
|
||||
|
||||
/**
|
||||
* End doxygen group ShflTest.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,241 +1,241 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
|
||||
const int size = 32;
|
||||
|
||||
template <typename T> __global__ void shflDownSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_down(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
template <typename T> __global__ void shflUpSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_up(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
template <typename T> __global__ void shflXorSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_xor(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
static void getFactor(int* fact) { *fact = 101; }
|
||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||
static void getFactor(float* fact) { *fact = 2.5; }
|
||||
static void getFactor(double* fact) { *fact = 2.5; }
|
||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||
|
||||
template <typename T> T sum(T* a) {
|
||||
T cpuSum = 0;
|
||||
T factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = i + factor;
|
||||
cpuSum += a[i];
|
||||
}
|
||||
return cpuSum;
|
||||
}
|
||||
|
||||
template <> __half sum(__half* a) {
|
||||
__half cpuSum = 0;
|
||||
__half factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = i + __half2float(factor);
|
||||
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
|
||||
}
|
||||
return cpuSum;
|
||||
}
|
||||
|
||||
template <typename T> bool compare(T gpuSum, T cpuSum) {
|
||||
if (gpuSum != cpuSum) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <> bool compare(__half gpuSum, __half cpuSum) {
|
||||
if (__half2float(gpuSum) != __half2float(cpuSum)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T> static void runTestShflUp() {
|
||||
const int size = 32;
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||
hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
|
||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||
REQUIRE((compare(a[size - 1], cpuSum)) == 0);
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
template <typename T> static void runTestShflDown() {
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||
hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
|
||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||
REQUIRE((compare(a[0], cpuSum)) == 0);
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
template <typename T> static void runTestShflXor() {
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||
hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
|
||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||
REQUIRE((compare(a[0], cpuSum)) == 0);
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_up function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl_up warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
* - Gaurding this test against cuda with refernce to mentioned
|
||||
* ticket SWDEV-379177
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_up") {
|
||||
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
|
||||
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
|
||||
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
|
||||
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
|
||||
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
|
||||
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
|
||||
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_down function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl_down warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
* - Gaurding this test against cuda with refernce to mentioned
|
||||
* ticket SWDEV-379177
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_Down") {
|
||||
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
|
||||
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
|
||||
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
|
||||
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
|
||||
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
|
||||
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
|
||||
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
|
||||
* Contains warp __shfl_xor function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl_xor warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
* - Gaurding this test against cuda with refernce to mentioned
|
||||
* ticket SWDEV-379177
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_Xor") {
|
||||
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
|
||||
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
|
||||
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
|
||||
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
|
||||
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
|
||||
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
|
||||
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
* @}
|
||||
*/
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
|
||||
const int size = 32;
|
||||
|
||||
template <typename T> __global__ void shflDownSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_down(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
template <typename T> __global__ void shflUpSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_up(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
template <typename T> __global__ void shflXorSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_xor(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
static void getFactor(int* fact) { *fact = 101; }
|
||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||
static void getFactor(float* fact) { *fact = 2.5; }
|
||||
static void getFactor(double* fact) { *fact = 2.5; }
|
||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||
|
||||
template <typename T> T sum(T* a) {
|
||||
T cpuSum = 0;
|
||||
T factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = i + factor;
|
||||
cpuSum += a[i];
|
||||
}
|
||||
return cpuSum;
|
||||
}
|
||||
|
||||
template <> __half sum(__half* a) {
|
||||
__half cpuSum = 0;
|
||||
__half factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = i + __half2float(factor);
|
||||
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
|
||||
}
|
||||
return cpuSum;
|
||||
}
|
||||
|
||||
template <typename T> bool compare(T gpuSum, T cpuSum) {
|
||||
if (gpuSum != cpuSum) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <> bool compare(__half gpuSum, __half cpuSum) {
|
||||
if (__half2float(gpuSum) != __half2float(cpuSum)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T> static void runTestShflUp() {
|
||||
const int size = 32;
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||
hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
|
||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||
REQUIRE((compare(a[size - 1], cpuSum)) == 0);
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
template <typename T> static void runTestShflDown() {
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||
hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
|
||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||
REQUIRE((compare(a[0], cpuSum)) == 0);
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
template <typename T> static void runTestShflXor() {
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||
hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
|
||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||
REQUIRE((compare(a[0], cpuSum)) == 0);
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_up function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl_up warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
* - Gaurding this test against cuda with refernce to mentioned
|
||||
* ticket SWDEV-379177
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_up") {
|
||||
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
|
||||
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
|
||||
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
|
||||
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
|
||||
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
|
||||
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
|
||||
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_down function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl_down warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
* - Gaurding this test against cuda with refernce to mentioned
|
||||
* ticket SWDEV-379177
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_Down") {
|
||||
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
|
||||
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
|
||||
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
|
||||
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
|
||||
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
|
||||
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
|
||||
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
|
||||
* Contains warp __shfl_xor function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify __shfl_xor warp functions for different datatypes.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
* - Gaurding this test against cuda with refernce to mentioned
|
||||
* ticket SWDEV-379177
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_Xor") {
|
||||
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
|
||||
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
|
||||
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
|
||||
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
|
||||
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
|
||||
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
|
||||
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
* @}
|
||||
*/
|
||||
|
||||
@@ -1,437 +1,437 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
|
||||
static unsigned int sizeList[] = {
|
||||
256, 512, 1024, 2048, 4096, 8192,
|
||||
};
|
||||
|
||||
static unsigned int eleNumList[] = {
|
||||
0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
|
||||
0x200000, 0x400000, 0x800000, 0x1000000
|
||||
};
|
||||
|
||||
typedef struct _dataType {
|
||||
char memsetval = 0x42;
|
||||
char memsetD8val = 0xDE;
|
||||
int16_t memsetD16val = 0xDEAD;
|
||||
int memsetD32val = 0xDEADBEEF;
|
||||
}dataType;
|
||||
|
||||
#define NUM_ITER 1000
|
||||
|
||||
enum MemsetType {
|
||||
hipMemsetTypeDefault,
|
||||
hipMemsetTypeD8,
|
||||
hipMemsetTypeD16,
|
||||
hipMemsetTypeD32,
|
||||
hipMemsetTypeMax
|
||||
|
||||
};
|
||||
|
||||
using namespace std;
|
||||
|
||||
class hipPerfMemset {
|
||||
private:
|
||||
uint64_t bufSize_;
|
||||
unsigned int num_elements_;
|
||||
unsigned int testNumEle_;
|
||||
unsigned int _numSubTests = 0;
|
||||
unsigned int _numSubTests2D = 0;
|
||||
unsigned int _numSubTests3D = 0;
|
||||
unsigned int num_sizes_ =0;
|
||||
|
||||
public:
|
||||
hipPerfMemset() {
|
||||
num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
|
||||
_numSubTests = num_elements_ * hipMemsetTypeMax;
|
||||
|
||||
num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
|
||||
_numSubTests2D = num_sizes_;
|
||||
_numSubTests3D = _numSubTests2D;
|
||||
};
|
||||
|
||||
~hipPerfMemset() {};
|
||||
|
||||
void open(int deviceID);
|
||||
|
||||
template<typename T>
|
||||
void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||
|
||||
template<typename T>
|
||||
void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||
|
||||
template<typename T>
|
||||
void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||
|
||||
uint getNumTests() {
|
||||
return _numSubTests;
|
||||
}
|
||||
|
||||
uint getNumTests2D() {
|
||||
return _numSubTests2D;
|
||||
}
|
||||
uint getNumTests3D() {
|
||||
return _numSubTests3D;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void hipPerfMemset::open(int deviceId) {
|
||||
int nGpu = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
failed("No GPU!");
|
||||
}
|
||||
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
hipDeviceProp_t props = {0};
|
||||
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||
|
||||
T * A_h;
|
||||
T * A_d;
|
||||
|
||||
testNumEle_ = eleNumList[test % num_elements_];
|
||||
|
||||
bufSize_ = testNumEle_ * sizeof(uint32_t);
|
||||
|
||||
HIPCHECK(hipMalloc(&A_d, bufSize_));
|
||||
|
||||
A_h = reinterpret_cast<T*> (malloc(bufSize_));
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||
|
||||
// Warm-up
|
||||
if (async) {
|
||||
HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto start = chrono::high_resolution_clock::now();
|
||||
for (uint i = 0; i < NUM_ITER; i++) {
|
||||
if (type == hipMemsetTypeDefault && !async) {
|
||||
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
||||
}
|
||||
else if (type == hipMemsetTypeDefault && async) {
|
||||
HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
|
||||
}
|
||||
else if (type == hipMemsetTypeD8 && !async){
|
||||
HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
|
||||
}
|
||||
else if (type == hipMemsetTypeD8 && async) {
|
||||
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
|
||||
}
|
||||
else if (type == hipMemsetTypeD16 && !async) {
|
||||
HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
||||
}
|
||||
else if (type == hipMemsetTypeD16 && async) {
|
||||
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
||||
}
|
||||
else if (type == hipMemsetTypeD32 && !async) {
|
||||
HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
||||
}
|
||||
else if (type == hipMemsetTypeD32 && async) {
|
||||
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
||||
}
|
||||
}
|
||||
if (async) {
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto end = chrono::high_resolution_clock::now();
|
||||
|
||||
HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
|
||||
|
||||
for (int i = 0; i < bufSize_ / sizeof(T); i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
|
||||
auto diff = std::chrono::duration<double>(end - start);
|
||||
auto sec = diff.count();
|
||||
|
||||
auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
|
||||
|
||||
cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
|
||||
<< " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||
|
||||
bufSize_ = sizeList[test % num_sizes_];
|
||||
|
||||
size_t numH = bufSize_;
|
||||
size_t numW = bufSize_;
|
||||
size_t pitch_A;
|
||||
size_t width = numW * sizeof(char);
|
||||
size_t sizeElements = width * numH;
|
||||
size_t elements = numW* numH;
|
||||
|
||||
T * A_h;
|
||||
T * A_d;
|
||||
|
||||
HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
|
||||
numH));
|
||||
A_h = reinterpret_cast<char*>(malloc(sizeElements));
|
||||
|
||||
for (size_t i=0; i < elements; i++) {
|
||||
A_h[i] = 1;
|
||||
}
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||
|
||||
// Warm-up
|
||||
if (async) {
|
||||
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto start = chrono::steady_clock::now();
|
||||
|
||||
for (uint i = 0; i < NUM_ITER; i++) {
|
||||
if (type == hipMemsetTypeDefault && !async) {
|
||||
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
||||
}
|
||||
else if (type == hipMemsetTypeDefault && async) {
|
||||
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
||||
}
|
||||
}
|
||||
|
||||
if (async) {
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto end = chrono::steady_clock::now();
|
||||
|
||||
HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i < elements; i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
chrono::duration<double> diff = end - start;
|
||||
|
||||
auto sec = diff.count();
|
||||
|
||||
auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
|
||||
|
||||
cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] "
|
||||
<< " " << "(GB/s) for " << setw(5) << bufSize_
|
||||
<< " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl;
|
||||
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||
|
||||
bufSize_ = sizeList[test % num_sizes_];
|
||||
|
||||
size_t numH = bufSize_;
|
||||
size_t numW = bufSize_;
|
||||
size_t depth = 10;
|
||||
size_t width = numW * sizeof(char);
|
||||
size_t sizeElements = width * numH * depth;
|
||||
size_t elements = numW* numH* depth;
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||
|
||||
T *A_h;
|
||||
|
||||
hipExtent extent = make_hipExtent(width, numH, depth);
|
||||
hipPitchedPtr devPitchedPtr;
|
||||
|
||||
HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
|
||||
A_h = (char*)malloc(sizeElements);
|
||||
HIPASSERT(A_h != NULL);
|
||||
|
||||
for (size_t i=0; i<elements; i++) {
|
||||
A_h[i] = 1;
|
||||
}
|
||||
|
||||
// Warm-up
|
||||
if (async) {
|
||||
HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
auto start = chrono::steady_clock::now();
|
||||
|
||||
for (uint i = 0; i < NUM_ITER; i++) {
|
||||
if (type == hipMemsetTypeDefault && !async) {
|
||||
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
||||
}
|
||||
else if (type == hipMemsetTypeDefault && async) {
|
||||
HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
|
||||
}
|
||||
}
|
||||
|
||||
if (async) {
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto end = chrono::steady_clock::now();
|
||||
|
||||
hipMemcpy3DParms myparms = {0};
|
||||
myparms.srcPos = make_hipPos(0,0,0);
|
||||
myparms.dstPos = make_hipPos(0,0,0);
|
||||
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
|
||||
myparms.srcPtr = devPitchedPtr;
|
||||
myparms.extent = extent;
|
||||
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIPCHECK(hipMemcpy3D(&myparms));
|
||||
|
||||
for (int i=0; i<elements; i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
chrono::duration<double> diff = end - start;
|
||||
|
||||
auto sec = diff.count();
|
||||
|
||||
auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
|
||||
|
||||
cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " "
|
||||
<< "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
|
||||
<< bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl;
|
||||
HIPCHECK(hipFree(devPitchedPtr.ptr));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
int main() {
|
||||
hipPerfMemset hipPerfMemset;
|
||||
|
||||
dataType pattern;
|
||||
int deviceId = 0;
|
||||
hipPerfMemset.open(deviceId);
|
||||
MemsetType type;
|
||||
|
||||
int numTests = hipPerfMemset.getNumTests();
|
||||
int numTests2D = hipPerfMemset.getNumTests2D();
|
||||
int numTests3D = hipPerfMemset.getNumTests3D();
|
||||
|
||||
|
||||
cout << "--------------------- 1D buffer -------------------" << endl;
|
||||
bool async= false;
|
||||
for (uint i = 0; i < 2 ; i++) {
|
||||
cout << endl;
|
||||
|
||||
for (auto testCase = 0; testCase < numTests; testCase++) {
|
||||
if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||
cout << "API: hipMemsetD8" << (async ? "Async " : " ");
|
||||
hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
|
||||
}
|
||||
|
||||
else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||
cout << "API: hipMemsetD16" << (async ? "Async" : " ");
|
||||
hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
|
||||
}
|
||||
|
||||
else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||
cout << "API: hipMemsetD32" << (async ? "Async" : " ");
|
||||
hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
|
||||
}
|
||||
|
||||
else {
|
||||
cout << "API: hipMemset" << (async ? "Async " : " ");
|
||||
hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
|
||||
}
|
||||
}
|
||||
async = true;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
cout << "------------------ 2D buffer arrays ---------------" << endl;
|
||||
|
||||
async = false;
|
||||
for (uint i = 0; i < 2; i++) {
|
||||
cout << endl;
|
||||
for (uint test = 0; test < numTests2D; test++) {
|
||||
hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
||||
}
|
||||
async = true;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
cout << "------------------ 3D buffer arrays ---------------" << endl;
|
||||
|
||||
async = false;
|
||||
for (uint i = 0; i < 2; i++) {
|
||||
cout << endl;
|
||||
for (uint test =0; test < numTests3D; test++) {
|
||||
hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
||||
}
|
||||
async = true;
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
/*
|
||||
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../src/test_common.cpp
|
||||
* TEST: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
|
||||
static unsigned int sizeList[] = {
|
||||
256, 512, 1024, 2048, 4096, 8192,
|
||||
};
|
||||
|
||||
static unsigned int eleNumList[] = {
|
||||
0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
|
||||
0x200000, 0x400000, 0x800000, 0x1000000
|
||||
};
|
||||
|
||||
typedef struct _dataType {
|
||||
char memsetval = 0x42;
|
||||
char memsetD8val = 0xDE;
|
||||
int16_t memsetD16val = 0xDEAD;
|
||||
int memsetD32val = 0xDEADBEEF;
|
||||
}dataType;
|
||||
|
||||
#define NUM_ITER 1000
|
||||
|
||||
enum MemsetType {
|
||||
hipMemsetTypeDefault,
|
||||
hipMemsetTypeD8,
|
||||
hipMemsetTypeD16,
|
||||
hipMemsetTypeD32,
|
||||
hipMemsetTypeMax
|
||||
|
||||
};
|
||||
|
||||
using namespace std;
|
||||
|
||||
class hipPerfMemset {
|
||||
private:
|
||||
uint64_t bufSize_;
|
||||
unsigned int num_elements_;
|
||||
unsigned int testNumEle_;
|
||||
unsigned int _numSubTests = 0;
|
||||
unsigned int _numSubTests2D = 0;
|
||||
unsigned int _numSubTests3D = 0;
|
||||
unsigned int num_sizes_ =0;
|
||||
|
||||
public:
|
||||
hipPerfMemset() {
|
||||
num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
|
||||
_numSubTests = num_elements_ * hipMemsetTypeMax;
|
||||
|
||||
num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
|
||||
_numSubTests2D = num_sizes_;
|
||||
_numSubTests3D = _numSubTests2D;
|
||||
};
|
||||
|
||||
~hipPerfMemset() {};
|
||||
|
||||
void open(int deviceID);
|
||||
|
||||
template<typename T>
|
||||
void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||
|
||||
template<typename T>
|
||||
void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||
|
||||
template<typename T>
|
||||
void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||
|
||||
uint getNumTests() {
|
||||
return _numSubTests;
|
||||
}
|
||||
|
||||
uint getNumTests2D() {
|
||||
return _numSubTests2D;
|
||||
}
|
||||
uint getNumTests3D() {
|
||||
return _numSubTests3D;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void hipPerfMemset::open(int deviceId) {
|
||||
int nGpu = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
failed("No GPU!");
|
||||
}
|
||||
|
||||
HIPCHECK(hipSetDevice(deviceId));
|
||||
hipDeviceProp_t props = {0};
|
||||
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||
|
||||
T * A_h;
|
||||
T * A_d;
|
||||
|
||||
testNumEle_ = eleNumList[test % num_elements_];
|
||||
|
||||
bufSize_ = testNumEle_ * sizeof(uint32_t);
|
||||
|
||||
HIPCHECK(hipMalloc(&A_d, bufSize_));
|
||||
|
||||
A_h = reinterpret_cast<T*> (malloc(bufSize_));
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||
|
||||
// Warm-up
|
||||
if (async) {
|
||||
HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto start = chrono::high_resolution_clock::now();
|
||||
for (uint i = 0; i < NUM_ITER; i++) {
|
||||
if (type == hipMemsetTypeDefault && !async) {
|
||||
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
||||
}
|
||||
else if (type == hipMemsetTypeDefault && async) {
|
||||
HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
|
||||
}
|
||||
else if (type == hipMemsetTypeD8 && !async){
|
||||
HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
|
||||
}
|
||||
else if (type == hipMemsetTypeD8 && async) {
|
||||
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
|
||||
}
|
||||
else if (type == hipMemsetTypeD16 && !async) {
|
||||
HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
||||
}
|
||||
else if (type == hipMemsetTypeD16 && async) {
|
||||
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
||||
}
|
||||
else if (type == hipMemsetTypeD32 && !async) {
|
||||
HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
||||
}
|
||||
else if (type == hipMemsetTypeD32 && async) {
|
||||
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
||||
}
|
||||
}
|
||||
if (async) {
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto end = chrono::high_resolution_clock::now();
|
||||
|
||||
HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
|
||||
|
||||
for (int i = 0; i < bufSize_ / sizeof(T); i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
|
||||
auto diff = std::chrono::duration<double>(end - start);
|
||||
auto sec = diff.count();
|
||||
|
||||
auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
|
||||
|
||||
cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
|
||||
<< " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||
|
||||
bufSize_ = sizeList[test % num_sizes_];
|
||||
|
||||
size_t numH = bufSize_;
|
||||
size_t numW = bufSize_;
|
||||
size_t pitch_A;
|
||||
size_t width = numW * sizeof(char);
|
||||
size_t sizeElements = width * numH;
|
||||
size_t elements = numW* numH;
|
||||
|
||||
T * A_h;
|
||||
T * A_d;
|
||||
|
||||
HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
|
||||
numH));
|
||||
A_h = reinterpret_cast<char*>(malloc(sizeElements));
|
||||
|
||||
for (size_t i=0; i < elements; i++) {
|
||||
A_h[i] = 1;
|
||||
}
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||
|
||||
// Warm-up
|
||||
if (async) {
|
||||
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto start = chrono::steady_clock::now();
|
||||
|
||||
for (uint i = 0; i < NUM_ITER; i++) {
|
||||
if (type == hipMemsetTypeDefault && !async) {
|
||||
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
||||
}
|
||||
else if (type == hipMemsetTypeDefault && async) {
|
||||
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
||||
}
|
||||
}
|
||||
|
||||
if (async) {
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto end = chrono::steady_clock::now();
|
||||
|
||||
HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
for (int i=0; i < elements; i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
chrono::duration<double> diff = end - start;
|
||||
|
||||
auto sec = diff.count();
|
||||
|
||||
auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
|
||||
|
||||
cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] "
|
||||
<< " " << "(GB/s) for " << setw(5) << bufSize_
|
||||
<< " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl;
|
||||
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
HIPCHECK(hipFree(A_d));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||
|
||||
bufSize_ = sizeList[test % num_sizes_];
|
||||
|
||||
size_t numH = bufSize_;
|
||||
size_t numW = bufSize_;
|
||||
size_t depth = 10;
|
||||
size_t width = numW * sizeof(char);
|
||||
size_t sizeElements = width * numH * depth;
|
||||
size_t elements = numW* numH* depth;
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||
|
||||
T *A_h;
|
||||
|
||||
hipExtent extent = make_hipExtent(width, numH, depth);
|
||||
hipPitchedPtr devPitchedPtr;
|
||||
|
||||
HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
|
||||
A_h = (char*)malloc(sizeElements);
|
||||
HIPASSERT(A_h != NULL);
|
||||
|
||||
for (size_t i=0; i<elements; i++) {
|
||||
A_h[i] = 1;
|
||||
}
|
||||
|
||||
// Warm-up
|
||||
if (async) {
|
||||
HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
auto start = chrono::steady_clock::now();
|
||||
|
||||
for (uint i = 0; i < NUM_ITER; i++) {
|
||||
if (type == hipMemsetTypeDefault && !async) {
|
||||
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
||||
}
|
||||
else if (type == hipMemsetTypeDefault && async) {
|
||||
HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
|
||||
}
|
||||
}
|
||||
|
||||
if (async) {
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
} else {
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
auto end = chrono::steady_clock::now();
|
||||
|
||||
hipMemcpy3DParms myparms = {0};
|
||||
myparms.srcPos = make_hipPos(0,0,0);
|
||||
myparms.dstPos = make_hipPos(0,0,0);
|
||||
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
|
||||
myparms.srcPtr = devPitchedPtr;
|
||||
myparms.extent = extent;
|
||||
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
|
||||
HIPCHECK(hipMemcpy3D(&myparms));
|
||||
|
||||
for (int i=0; i<elements; i++) {
|
||||
if (A_h[i] != memsetval) {
|
||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
chrono::duration<double> diff = end - start;
|
||||
|
||||
auto sec = diff.count();
|
||||
|
||||
auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
|
||||
|
||||
cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " "
|
||||
<< "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
|
||||
<< bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl;
|
||||
HIPCHECK(hipFree(devPitchedPtr.ptr));
|
||||
free(A_h);
|
||||
}
|
||||
|
||||
int main() {
|
||||
hipPerfMemset hipPerfMemset;
|
||||
|
||||
dataType pattern;
|
||||
int deviceId = 0;
|
||||
hipPerfMemset.open(deviceId);
|
||||
MemsetType type;
|
||||
|
||||
int numTests = hipPerfMemset.getNumTests();
|
||||
int numTests2D = hipPerfMemset.getNumTests2D();
|
||||
int numTests3D = hipPerfMemset.getNumTests3D();
|
||||
|
||||
|
||||
cout << "--------------------- 1D buffer -------------------" << endl;
|
||||
bool async= false;
|
||||
for (uint i = 0; i < 2 ; i++) {
|
||||
cout << endl;
|
||||
|
||||
for (auto testCase = 0; testCase < numTests; testCase++) {
|
||||
if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||
cout << "API: hipMemsetD8" << (async ? "Async " : " ");
|
||||
hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
|
||||
}
|
||||
|
||||
else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||
cout << "API: hipMemsetD16" << (async ? "Async" : " ");
|
||||
hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
|
||||
}
|
||||
|
||||
else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||
cout << "API: hipMemsetD32" << (async ? "Async" : " ");
|
||||
hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
|
||||
}
|
||||
|
||||
else {
|
||||
cout << "API: hipMemset" << (async ? "Async " : " ");
|
||||
hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
|
||||
}
|
||||
}
|
||||
async = true;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
cout << "------------------ 2D buffer arrays ---------------" << endl;
|
||||
|
||||
async = false;
|
||||
for (uint i = 0; i < 2; i++) {
|
||||
cout << endl;
|
||||
for (uint test = 0; test < numTests2D; test++) {
|
||||
hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
||||
}
|
||||
async = true;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
cout << "------------------ 3D buffer arrays ---------------" << endl;
|
||||
|
||||
async = false;
|
||||
for (uint i = 0; i < 2; i++) {
|
||||
cout << endl;
|
||||
for (uint test =0; test < numTests3D; test++) {
|
||||
hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
||||
}
|
||||
async = true;
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -41,4 +41,4 @@ cmake ../samples
|
||||
|
||||
make package_samples
|
||||
|
||||
## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.
|
||||
## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.
|
||||
|
||||
새 이슈에서 참조
사용자 차단