SWDEV-472723 - Correct file format and remove trailing spaces

Change-Id: Ie40c763e9391fa36d6c890cd0a171659a1502a83


[ROCm/hip-tests commit: 5d042c80fa]
Цей коміт міститься в:
Julia Jiang
2024-07-10 16:06:00 -04:00
зафіксовано Julia Jiang
джерело f9bb3c5f74
коміт 13c5e7a3e4
47 змінених файлів з 10766 додано та 10746 видалено
+20
Переглянути файл
@@ -0,0 +1,20 @@
# Set the default behavior, in case people don't have core.autolf set.
* text=auto
# Explicitly declare text files you want to always be normalized and converted
# to have LF line endings on checkout.
*.c text eol=lf
*.cpp text eol=lf
*.cc text eol=lf
*.h text eol=lf
*.hpp text eol=lf
*.txt text eol=lf
# Define files to support auto-remove trailing white space
# Need to run the command below, before add modified file(s) to the staging area
# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"'
*.cpp filter=trimspace
*.c filter=trimspace
*.h filter=trimspacecpp
*.hpp filter=trimspace
*.md filter=trimspace
+1 -1
Переглянути файл
@@ -180,7 +180,7 @@ hipcc <path_to_test.cpp> -I<HIP_SRC_DIR>/tests/catch/include <HIP_SRC_DIR>/tests
## Debugging support
Catch2 allows multiple ways in which you can debug the test case.
- `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
- User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
- User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
+119 -119
Переглянути файл
@@ -1,119 +1,119 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
// Test case to validate atomicInc and atomicDec functions.
// if TestToRun=1, then atomicInc function will be tested and validated
// if TestToRun=2, then atomicDec function will be tested and validated.
// kernel function for atomicInc
static __global__ void AtomicCheckInc(int* g_ptr) {
atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
}
// kernel function for atomicDec
static __global__ void AtomicCheckDec(int* g_ptr) {
atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
}
// verify results for atomicInc
static int verifyResultInc(int value) {
int limit = 17;
value = (value >= limit) ? 0 : value + 1;
return value;
}
// verify results for atomicDec
static int verifyResultDec(int value) {
int limit = 25;
value = ((value == 0) || (value > limit)) ? limit : value - 1;
return value;
}
// common fuction to launch atomic functions kernel.
static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
unsigned int memSize = sizeof(int) * 1;
int *dptr{nullptr};
// allocate device memory
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
// copy host memory to device
HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
// launch kernel function
if (TestToRun == 1) {
AtomicCheckInc<<<1, 1>>>(dptr);
} else if (TestToRun == 2) {
AtomicCheckDec<<<1, 1>>>(dptr);
}
// copy back from device to host
HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
// verify the results.
if (TestToRun == 1) {
int result = verifyResultInc(val);
REQUIRE(result == Hptr[0]);
} else if (TestToRun == 2) {
int result = verifyResultDec(val);
REQUIRE(result == Hptr[0]);
}
// Cleanup memory
HIP_CHECK(hipFree(dptr));
}
TEST_CASE("Unit_AtomicFunctions_Inc") {
int *Hptr{nullptr};
int val;
// Allocate Host memory
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
SECTION("Test case when value is lesser than limit") {
val = Hptr[0] = 10;
launchAtomicFunction(Hptr, val, 1);
}
SECTION("Test case when value is greater than limit") {
val = Hptr[0] = 20;
launchAtomicFunction(Hptr, val, 1);
}
SECTION("Test case when value is equal to the limit") {
val = Hptr[0] = 17;
launchAtomicFunction(Hptr, val, 1);
}
free(Hptr);
}
TEST_CASE("Unit_AtomicFunctions_Dec") {
int *Hptr{nullptr};
int val;
// Allocate Host memory
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
SECTION("Test case when value is less than limit") {
val = Hptr[0] = 4;
launchAtomicFunction(Hptr, val, 2);
}
SECTION("Test case when value is greater than limit") {
val = Hptr[0] = 31;
launchAtomicFunction(Hptr, val, 2);
}
SECTION("Test case when value is equal to the limit") {
val = Hptr[0] = 25;
launchAtomicFunction(Hptr, val, 2);
}
free(Hptr);
}
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
// Test case to validate atomicInc and atomicDec functions.
// if TestToRun=1, then atomicInc function will be tested and validated
// if TestToRun=2, then atomicDec function will be tested and validated.
// kernel function for atomicInc
static __global__ void AtomicCheckInc(int* g_ptr) {
atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
}
// kernel function for atomicDec
static __global__ void AtomicCheckDec(int* g_ptr) {
atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
}
// verify results for atomicInc
static int verifyResultInc(int value) {
int limit = 17;
value = (value >= limit) ? 0 : value + 1;
return value;
}
// verify results for atomicDec
static int verifyResultDec(int value) {
int limit = 25;
value = ((value == 0) || (value > limit)) ? limit : value - 1;
return value;
}
// common fuction to launch atomic functions kernel.
static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
unsigned int memSize = sizeof(int) * 1;
int *dptr{nullptr};
// allocate device memory
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
// copy host memory to device
HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
// launch kernel function
if (TestToRun == 1) {
AtomicCheckInc<<<1, 1>>>(dptr);
} else if (TestToRun == 2) {
AtomicCheckDec<<<1, 1>>>(dptr);
}
// copy back from device to host
HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
// verify the results.
if (TestToRun == 1) {
int result = verifyResultInc(val);
REQUIRE(result == Hptr[0]);
} else if (TestToRun == 2) {
int result = verifyResultDec(val);
REQUIRE(result == Hptr[0]);
}
// Cleanup memory
HIP_CHECK(hipFree(dptr));
}
TEST_CASE("Unit_AtomicFunctions_Inc") {
int *Hptr{nullptr};
int val;
// Allocate Host memory
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
SECTION("Test case when value is lesser than limit") {
val = Hptr[0] = 10;
launchAtomicFunction(Hptr, val, 1);
}
SECTION("Test case when value is greater than limit") {
val = Hptr[0] = 20;
launchAtomicFunction(Hptr, val, 1);
}
SECTION("Test case when value is equal to the limit") {
val = Hptr[0] = 17;
launchAtomicFunction(Hptr, val, 1);
}
free(Hptr);
}
TEST_CASE("Unit_AtomicFunctions_Dec") {
int *Hptr{nullptr};
int val;
// Allocate Host memory
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
SECTION("Test case when value is less than limit") {
val = Hptr[0] = 4;
launchAtomicFunction(Hptr, val, 2);
}
SECTION("Test case when value is greater than limit") {
val = Hptr[0] = 31;
launchAtomicFunction(Hptr, val, 2);
}
SECTION("Test case when value is equal to the limit") {
val = Hptr[0] = 25;
launchAtomicFunction(Hptr, val, 2);
}
free(Hptr);
}
+81 -81
Переглянути файл
@@ -1,81 +1,81 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void double_precision_intrinsics() {
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dadd_rd(0.0, 1.0);
#endif
__dadd_rn(0.0, 1.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dadd_ru(0.0, 1.0);
__dadd_rz(0.0, 1.0);
__ddiv_rd(0.0, 1.0);
#endif
__ddiv_rn(0.0, 1.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__ddiv_ru(0.0, 1.0);
__ddiv_rz(0.0, 1.0);
__dmul_rd(1.0, 2.0);
#endif
__dmul_rn(1.0, 2.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dmul_ru(1.0, 2.0);
__dmul_rz(1.0, 2.0);
__drcp_rd(2.0);
#endif
__drcp_rn(2.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__drcp_ru(2.0);
__drcp_rz(2.0);
__dsqrt_rd(4.0);
#endif
__dsqrt_rn(4.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dsqrt_ru(4.0);
__dsqrt_rz(4.0);
__dsub_rd(2.0, 1.0);
#endif
__dsub_rn(2.0, 1.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dsub_ru(2.0, 1.0);
__dsub_rz(2.0, 1.0);
__fma_rd(1.0, 2.0, 3.0);
#endif
__fma_rn(1.0, 2.0, 3.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fma_ru(1.0, 2.0, 3.0);
__fma_rz(1.0, 2.0, 3.0);
#endif
}
__global__ void compileDoublePrecisionIntrinsics(int) {
double_precision_intrinsics();
}
TEST_CASE("Unit_DoublePrecisionIntrinsics") {
hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void double_precision_intrinsics() {
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dadd_rd(0.0, 1.0);
#endif
__dadd_rn(0.0, 1.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dadd_ru(0.0, 1.0);
__dadd_rz(0.0, 1.0);
__ddiv_rd(0.0, 1.0);
#endif
__ddiv_rn(0.0, 1.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__ddiv_ru(0.0, 1.0);
__ddiv_rz(0.0, 1.0);
__dmul_rd(1.0, 2.0);
#endif
__dmul_rn(1.0, 2.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dmul_ru(1.0, 2.0);
__dmul_rz(1.0, 2.0);
__drcp_rd(2.0);
#endif
__drcp_rn(2.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__drcp_ru(2.0);
__drcp_rz(2.0);
__dsqrt_rd(4.0);
#endif
__dsqrt_rn(4.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dsqrt_ru(4.0);
__dsqrt_rz(4.0);
__dsub_rd(2.0, 1.0);
#endif
__dsub_rn(2.0, 1.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dsub_ru(2.0, 1.0);
__dsub_rz(2.0, 1.0);
__fma_rd(1.0, 2.0, 3.0);
#endif
__fma_rn(1.0, 2.0, 3.0);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fma_ru(1.0, 2.0, 3.0);
__fma_rz(1.0, 2.0, 3.0);
#endif
}
__global__ void compileDoublePrecisionIntrinsics(int) {
double_precision_intrinsics();
}
TEST_CASE("Unit_DoublePrecisionIntrinsics") {
hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
+133 -133
Переглянути файл
@@ -1,133 +1,133 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void double_precision_math_functions() {
int iX;
double fX, fY;
acos(1.0);
acosh(1.0);
asin(0.0);
asinh(0.0);
atan(0.0);
atan2(0.0, 1.0);
atanh(0.0);
cbrt(0.0);
ceil(0.0);
copysign(1.0, -2.0);
cos(0.0);
cosh(0.0);
cospi(0.0);
cyl_bessel_i0(0.0);
cyl_bessel_i1(0.0);
erf(0.0);
erfc(0.0);
erfcinv(2.0);
erfcx(0.0);
erfinv(1.0);
exp(0.0);
exp10(0.0);
exp2(0.0);
expm1(0.0);
fabs(1.0);
fdim(1.0, 0.0);
floor(0.0);
fma(1.0, 2.0, 3.0);
fmax(0.0, 0.0);
fmin(0.0, 0.0);
fmod(0.0, 1.0);
frexp(0.0, &iX);
hypot(1.0, 0.0);
ilogb(1.0);
isfinite(0.0);
isinf(0.0);
isnan(0.0);
j0(0.0);
j1(0.0);
jn(-1.0, 1.0);
ldexp(0.0, 0);
lgamma(1.0);
llrint(0.0);
llround(0.0);
log(1.0);
log10(1.0);
log1p(-1.0);
log2(1.0);
logb(1.0);
lrint(0.0);
lround(0.0);
modf(0.0, &fX);
nan("1");
nearbyint(0.0);
nextafter(0.0, 0.0);
fX = 1.0;
norm(1, &fX);
norm3d(1.0, 0.0, 0.0);
norm4d(1.0, 0.0, 0.0, 0.0);
normcdf(0.0);
normcdfinv(1.0);
pow(1.0, 0.0);
rcbrt(1.0);
remainder(2.0, 1.0);
remquo(1.0, 2.0, &iX);
rhypot(0.0, 1.0);
rint(1.0);
fX = 1.0;
rnorm(1, &fX);
rnorm3d(0.0, 0.0, 1.0);
rnorm4d(0.0, 0.0, 0.0, 1.0);
round(0.0);
rsqrt(1.0);
scalbln(0.0, 1);
scalbn(0.0, 1);
signbit(1.0);
sin(0.0);
#if HT_AMD
// NV A100 has a bug in sincos(), so temporarily disbale it
sincos(0.0, &fX, &fY);
#endif
sincospi(0.0, &fX, &fY);
sinh(0.0);
sinpi(0.0);
sqrt(0.0);
tan(0.0);
tanh(0.0);
tgamma(2.0);
trunc(0.0);
y0(1.0);
y1(1.0);
yn(1, 1.0);
}
__global__ void compileDoublePrecisionMathOnDevice(int) {
double_precision_math_functions();
}
TEST_CASE("Unit_DoublePrecisionMathDevice") {
hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void double_precision_math_functions() {
int iX;
double fX, fY;
acos(1.0);
acosh(1.0);
asin(0.0);
asinh(0.0);
atan(0.0);
atan2(0.0, 1.0);
atanh(0.0);
cbrt(0.0);
ceil(0.0);
copysign(1.0, -2.0);
cos(0.0);
cosh(0.0);
cospi(0.0);
cyl_bessel_i0(0.0);
cyl_bessel_i1(0.0);
erf(0.0);
erfc(0.0);
erfcinv(2.0);
erfcx(0.0);
erfinv(1.0);
exp(0.0);
exp10(0.0);
exp2(0.0);
expm1(0.0);
fabs(1.0);
fdim(1.0, 0.0);
floor(0.0);
fma(1.0, 2.0, 3.0);
fmax(0.0, 0.0);
fmin(0.0, 0.0);
fmod(0.0, 1.0);
frexp(0.0, &iX);
hypot(1.0, 0.0);
ilogb(1.0);
isfinite(0.0);
isinf(0.0);
isnan(0.0);
j0(0.0);
j1(0.0);
jn(-1.0, 1.0);
ldexp(0.0, 0);
lgamma(1.0);
llrint(0.0);
llround(0.0);
log(1.0);
log10(1.0);
log1p(-1.0);
log2(1.0);
logb(1.0);
lrint(0.0);
lround(0.0);
modf(0.0, &fX);
nan("1");
nearbyint(0.0);
nextafter(0.0, 0.0);
fX = 1.0;
norm(1, &fX);
norm3d(1.0, 0.0, 0.0);
norm4d(1.0, 0.0, 0.0, 0.0);
normcdf(0.0);
normcdfinv(1.0);
pow(1.0, 0.0);
rcbrt(1.0);
remainder(2.0, 1.0);
remquo(1.0, 2.0, &iX);
rhypot(0.0, 1.0);
rint(1.0);
fX = 1.0;
rnorm(1, &fX);
rnorm3d(0.0, 0.0, 1.0);
rnorm4d(0.0, 0.0, 0.0, 1.0);
round(0.0);
rsqrt(1.0);
scalbln(0.0, 1);
scalbn(0.0, 1);
signbit(1.0);
sin(0.0);
#if HT_AMD
// NV A100 has a bug in sincos(), so temporarily disbale it
sincos(0.0, &fX, &fY);
#endif
sincospi(0.0, &fX, &fY);
sinh(0.0);
sinpi(0.0);
sqrt(0.0);
tan(0.0);
tanh(0.0);
tgamma(2.0);
trunc(0.0);
y0(1.0);
y1(1.0);
yn(1, 1.0);
}
__global__ void compileDoublePrecisionMathOnDevice(int) {
double_precision_math_functions();
}
TEST_CASE("Unit_DoublePrecisionMathDevice") {
hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
+117 -117
Переглянути файл
@@ -1,117 +1,117 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <cmath>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__host__ static void double_precision_math_functions() {
int iX;
double fX, fY;
acos(1.0);
acosh(1.0);
asin(0.0);
asinh(0.0);
atan(0.0);
atan2(0.0, 1.0);
atanh(0.0);
cbrt(0.0);
ceil(0.0);
copysign(1.0, -2.0);
cos(0.0);
cosh(0.0);
erf(0.0);
erfc(0.0);
exp(0.0);
#ifdef __unix__
exp10(0.0);
#endif
exp2(0.0);
expm1(0.0);
fabs(1.0);
fdim(1.0, 0.0);
floor(0.0);
fma(1.0, 2.0, 3.0);
fmax(0.0, 0.0);
fmin(0.0, 0.0);
fmod(0.0, 1.0);
frexp(0.0, &iX);
hypot(1.0, 0.0);
ilogb(1.0);
std::isfinite(0.0);
std::isinf(0.0);
std::isnan(0.0);
#ifdef __unix__
j0(0.0);
j1(0.0);
jn(-1.0, 1.0);
#elif _WIN64
_j0(0.0);
_j1(0.0);
_jn(-1.0, 1.0);
#endif
ldexp(0.0, 0);
llrint(0.0);
llround(0.0);
log(1.0);
log10(1.0);
log1p(-1.0);
log2(1.0);
logb(1.0);
lrint(0.0);
lround(0.0);
modf(0.0, &fX);
nan("1");
nearbyint(0.0);
fX = 1.0;
pow(1.0, 0.0);
remainder(2.0, 1.0);
remquo(1.0, 2.0, &iX);
rint(1.0);
round(0.0);
scalbln(0.0, 1);
scalbn(0.0, 1);
std::signbit(1.0);
sin(0.0);
#ifdef _unix__
sincos(0.0, &fX, &fY);
#endif
sinh(0.0);
sqrt(0.0);
tan(0.0);
tanh(0.0);
tgamma(2.0);
trunc(0.0);
#ifdef __unix__
y0(1.0);
y1(1.0);
yn(1, 1.0);
#elif _WIN64
_y0(1.0);
_y1(1.0);
_yn(1, 1.0);
#endif
}
TEST_CASE("Unit_DoublePrecisionMathHost") {
double_precision_math_functions();
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <cmath>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__host__ static void double_precision_math_functions() {
int iX;
double fX, fY;
acos(1.0);
acosh(1.0);
asin(0.0);
asinh(0.0);
atan(0.0);
atan2(0.0, 1.0);
atanh(0.0);
cbrt(0.0);
ceil(0.0);
copysign(1.0, -2.0);
cos(0.0);
cosh(0.0);
erf(0.0);
erfc(0.0);
exp(0.0);
#ifdef __unix__
exp10(0.0);
#endif
exp2(0.0);
expm1(0.0);
fabs(1.0);
fdim(1.0, 0.0);
floor(0.0);
fma(1.0, 2.0, 3.0);
fmax(0.0, 0.0);
fmin(0.0, 0.0);
fmod(0.0, 1.0);
frexp(0.0, &iX);
hypot(1.0, 0.0);
ilogb(1.0);
std::isfinite(0.0);
std::isinf(0.0);
std::isnan(0.0);
#ifdef __unix__
j0(0.0);
j1(0.0);
jn(-1.0, 1.0);
#elif _WIN64
_j0(0.0);
_j1(0.0);
_jn(-1.0, 1.0);
#endif
ldexp(0.0, 0);
llrint(0.0);
llround(0.0);
log(1.0);
log10(1.0);
log1p(-1.0);
log2(1.0);
logb(1.0);
lrint(0.0);
lround(0.0);
modf(0.0, &fX);
nan("1");
nearbyint(0.0);
fX = 1.0;
pow(1.0, 0.0);
remainder(2.0, 1.0);
remquo(1.0, 2.0, &iX);
rint(1.0);
round(0.0);
scalbln(0.0, 1);
scalbn(0.0, 1);
std::signbit(1.0);
sin(0.0);
#ifdef _unix__
sincos(0.0, &fX, &fY);
#endif
sinh(0.0);
sqrt(0.0);
tan(0.0);
tanh(0.0);
tgamma(2.0);
trunc(0.0);
#ifdef __unix__
y0(1.0);
y1(1.0);
yn(1, 1.0);
#elif _WIN64
_y0(1.0);
_y1(1.0);
_yn(1, 1.0);
#endif
}
TEST_CASE("Unit_DoublePrecisionMathHost") {
double_precision_math_functions();
}
+128 -128
Переглянути файл
@@ -1,128 +1,128 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/math_functions.h>
__device__ void FloatMathPrecise() {
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
fX = ceilf(0.0f);
fX = copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
cospif(0.0f);
cyl_bessel_i0f(0.0f);
cyl_bessel_i1f(0.0f);
erfcf(0.0f);
erfcinvf(2.0f);
erfcxf(0.0f);
erff(0.0f);
erfinvf(1.0f);
exp10f(0.0f);
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fX = fabsf(1.0f);
fdimf(1.0f, 0.0f);
fdividef(0.0f, 1.0f);
fX = floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fX = fmaxf(0.0f, 0.0f);
fX = fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
isfinite(0.0f);
fX = isinf(0.0f);
fX = isnan(0.0f);
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
ldexpf(0.0f, 0);
lgammaf(1.0f);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
modff(0.0f, &fX);
fX = nanf("1");
fX = nearbyintf(0.0f);
nextafterf(0.0f, 0.0f);
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
normcdff(0.0f);
normcdfinvf(1.0f);
fX = 1.0f;
normf(1, &fX);
powf(1.0f, 0.0f);
rcbrtf(1.0f);
remainderf(2.0f, 1.0f);
remquof(1.0f, 2.0f, &iX);
rhypotf(0.0f, 1.0f);
fY = rintf(1.0f);
rnorm3df(0.0f, 0.0f, 1.0f);
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
fX = 1.0f;
rnormf(1, &fX);
fY = roundf(0.0f);
rsqrtf(1.0f);
scalblnf(0.0f, 1);
scalbnf(0.0f, 1);
signbit(1.0f);
sincosf(0.0f, &fX, &fY);
sincospif(0.0f, &fX, &fY);
sinf(0.0f);
sinhf(0.0f);
sinpif(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
fY = truncf(0.0f);
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
}
__global__ void CompileFloatMathPrecise(int) {
FloatMathPrecise();
}
TEST_CASE("Unit_FloatMathPrecise") {
hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/math_functions.h>
__device__ void FloatMathPrecise() {
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
fX = ceilf(0.0f);
fX = copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
cospif(0.0f);
cyl_bessel_i0f(0.0f);
cyl_bessel_i1f(0.0f);
erfcf(0.0f);
erfcinvf(2.0f);
erfcxf(0.0f);
erff(0.0f);
erfinvf(1.0f);
exp10f(0.0f);
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fX = fabsf(1.0f);
fdimf(1.0f, 0.0f);
fdividef(0.0f, 1.0f);
fX = floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fX = fmaxf(0.0f, 0.0f);
fX = fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
isfinite(0.0f);
fX = isinf(0.0f);
fX = isnan(0.0f);
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
ldexpf(0.0f, 0);
lgammaf(1.0f);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
modff(0.0f, &fX);
fX = nanf("1");
fX = nearbyintf(0.0f);
nextafterf(0.0f, 0.0f);
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
normcdff(0.0f);
normcdfinvf(1.0f);
fX = 1.0f;
normf(1, &fX);
powf(1.0f, 0.0f);
rcbrtf(1.0f);
remainderf(2.0f, 1.0f);
remquof(1.0f, 2.0f, &iX);
rhypotf(0.0f, 1.0f);
fY = rintf(1.0f);
rnorm3df(0.0f, 0.0f, 1.0f);
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
fX = 1.0f;
rnormf(1, &fX);
fY = roundf(0.0f);
rsqrtf(1.0f);
scalblnf(0.0f, 1);
scalbnf(0.0f, 1);
signbit(1.0f);
sincosf(0.0f, &fX, &fY);
sincospif(0.0f, &fX, &fY);
sinf(0.0f);
sinhf(0.0f);
sinpif(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
fY = truncf(0.0f);
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
}
__global__ void CompileFloatMathPrecise(int) {
FloatMathPrecise();
}
TEST_CASE("Unit_FloatMathPrecise") {
hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
+68 -68
Переглянути файл
@@ -1,68 +1,68 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/device_functions.h>
#include <algorithm>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void integer_intrinsics() {
__brev((unsigned int)10);
__brevll((uint64_t)10);
__byte_perm((unsigned int)0, (unsigned int)0, 0);
__clz(static_cast<int>(10));
__clzll((int64_t)10);
__ffs(static_cast<int>(10));
__ffsll((long long)(10)); // NOLINT
__funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__hadd(static_cast<int>(1), static_cast<int>(3));
__mul24(static_cast<int>(1), static_cast<int>(2));
__mul64hi((int64_t)1, (int64_t)2);
__mulhi(static_cast<int>(1), static_cast<int>(2));
__popc((unsigned int)4);
__popcll((uint64_t)4);
int a = min(static_cast<int>(4), static_cast<int>(5));
int b = max(static_cast<int>(4), static_cast<int>(5));
__rhadd(static_cast<int>(1), static_cast<int>(2));
__sad(static_cast<int>(1), static_cast<int>(2), 0);
__uhadd((unsigned int)1, (unsigned int)3);
__umul24((unsigned int)1, (unsigned int)2);
__umul64hi((uint64_t)1, (uint64_t)2);
__umulhi((unsigned int)1, (unsigned int)2);
__urhadd((unsigned int)1, (unsigned int)2);
__usad((unsigned int)1, (unsigned int)2, 0);
assert(1);
}
__global__ void compileIntegerIntrinsics(int) {
integer_intrinsics();
}
TEST_CASE("Unit_IntegerIntrinsics") {
hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/device_functions.h>
#include <algorithm>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void integer_intrinsics() {
__brev((unsigned int)10);
__brevll((uint64_t)10);
__byte_perm((unsigned int)0, (unsigned int)0, 0);
__clz(static_cast<int>(10));
__clzll((int64_t)10);
__ffs(static_cast<int>(10));
__ffsll((long long)(10)); // NOLINT
__funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
__hadd(static_cast<int>(1), static_cast<int>(3));
__mul24(static_cast<int>(1), static_cast<int>(2));
__mul64hi((int64_t)1, (int64_t)2);
__mulhi(static_cast<int>(1), static_cast<int>(2));
__popc((unsigned int)4);
__popcll((uint64_t)4);
int a = min(static_cast<int>(4), static_cast<int>(5));
int b = max(static_cast<int>(4), static_cast<int>(5));
__rhadd(static_cast<int>(1), static_cast<int>(2));
__sad(static_cast<int>(1), static_cast<int>(2), 0);
__uhadd((unsigned int)1, (unsigned int)3);
__umul24((unsigned int)1, (unsigned int)2);
__umul64hi((uint64_t)1, (uint64_t)2);
__umulhi((unsigned int)1, (unsigned int)2);
__urhadd((unsigned int)1, (unsigned int)2);
__usad((unsigned int)1, (unsigned int)2, 0);
assert(1);
}
__global__ void compileIntegerIntrinsics(int) {
integer_intrinsics();
}
TEST_CASE("Unit_IntegerIntrinsics") {
hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
+298 -298
Переглянути файл
@@ -1,298 +1,298 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <string.h>
#include <math.h>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <algorithm>
#include <type_traits>
using namespace std;
////////////////////////////////////////////////////////////////////////////////
// Auto-Verification Code
////////////////////////////////////////////////////////////////////////////////
bool verifyBitwise(...) {
return true;
}
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
bool verifyBitwise(T* gpuData, int len) {
// Atomic and
T val = 0xff;
for (int i = 0; i < len; ++i) {
// 9th element should be 1
val &= (2 * i + 7);
}
REQUIRE(val == gpuData[8]);
// atomic Or
val = 0;
for (int i = 0; i < len; ++i) {
// 10th element should be 0xff
val |= (1 << i);
}
REQUIRE(val == gpuData[9]);
// atomic Xor
val = 0xff;
for (int i = 0; i < len; ++i) {
// 11th element should be 0xff
val ^= i;
}
REQUIRE(val == gpuData[10]);
return true;
}
bool verifySub(...) {
return true;
}
template<
typename T,
typename enable_if<
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
bool verifySub(T* gpuData, int len) {
T val = 0;
for (int i = 0; i < len; ++i) {
val -= 10;
}
REQUIRE(val == gpuData[1]);
return true;
}
bool verifyExch(...) {
return true;
}
template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
bool computeExchExch(T* gpuData, int len) {
T val = 0;
for (T i = 0; i < len; ++i) {
if (i == gpuData[2]) {
return true;
break;
}
}
}
bool VerifyIntegral(...) {
return true;
}
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
bool VerifyIntegral(T* gpuData, int len) {
// atomic Max
T val = 0;
for (int i = 0; i < len; ++i) {
// fourth element should be len-1
val = max(val, static_cast<T>(i));
}
REQUIRE(val == gpuData[3]);
// atomic Min
val = 1 << 8;
for (int i = 0; i < len; ++i) {
val = min(val, static_cast<T>(i));
}
REQUIRE(val == gpuData[4]);
// atomic Inc
T limit = 17;
val = 0;
for (int i = 0; i < len; ++i) {
val = (val >= limit) ? 0 : val + 1;
}
REQUIRE(val == gpuData[5]);
// atomic Dec
limit = 137;
val = 0;
for (int i = 0; i < len; ++i) {
val = ((val == 0) || (val > limit)) ? limit : val - 1;
}
REQUIRE(val == gpuData[6]);
// atomic CAS
for (int i = 0; i < len; ++i) {
// eighth element should be a member of [0, len)
if (static_cast<T>(i) == gpuData[7]) {
return true;
break;
}
}
return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
}
template<typename T>
bool verifyData(T* gpuData, int len) {
T val = 0;
for (int i = 0; i < len; ++i) {
val += 10;
}
REQUIRE(val == gpuData[0]);
return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
}
__device__
void testKernelExch(...) {}
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
__device__
void testKernelExch(T* g_odata) {
// access thread id
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
// Atomic exchange
atomicExch(&g_odata[2], tid);
}
__device__
void testKernelSub(...) {}
template<
typename T,
typename enable_if<
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
__device__
void testKernelSub(T* g_odata) {
// Atomic subtraction (final should be 0)
atomicSub(&g_odata[1], 10);
}
__device__
void testKernelIntegral(...) {}
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
__device__
void testKernelIntegral(T* g_odata) {
// access thread id
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
// Atomic maximum
atomicMax(&g_odata[3], tid);
// Atomic minimum
atomicMin(&g_odata[4], tid);
// Atomic increment (modulo 17+1)
atomicInc((unsigned int*)&g_odata[5], 17);
// Atomic decrement
atomicDec((unsigned int*)&g_odata[6], 137);
// Atomic compare-and-swap
atomicCAS(&g_odata[7], tid - 1, tid);
// Bitwise atomic instructions
// Atomic AND
atomicAnd(&g_odata[8], 2 * tid + 7);
// Atomic OR
atomicOr(&g_odata[9], 1 << tid);
// Atomic XOR
atomicXor(&g_odata[10], tid);
testKernelSub(g_odata);
}
template<typename T>
__global__ void testKernel(T* g_odata) {
// Atomic addition
atomicAdd(&g_odata[0], 10);
testKernelIntegral(g_odata);
testKernelExch(g_odata);
}
template<typename T>
static void runTest() {
bool testResult = true;
unsigned int numThreads = 256;
unsigned int numBlocks = 64;
unsigned int numData = 11;
unsigned int memSize = sizeof(T) * numData;
// allocate mem for the result on host side
T* hOData = reinterpret_cast<T*>(malloc(memSize));
// initialize the memory
for (unsigned int i = 0; i < numData; i++) {
hOData[i] = 0;
}
// To make the AND and XOR tests generate something other than 0...
hOData[8] = hOData[10] = 0xff;
// allocate device memory for result
T* dOData;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
// Compute reference solution
REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
}
TEST_CASE("Unit_SimpleAtomicsTest") {
SECTION("test for int") {
runTest<int>();
}
SECTION("test for unsigned int") {
runTest<unsigned int>();
}
SECTION("test for float") {
runTest<float>();
}
#if HT_AMD
SECTION("test for unsigned long long") {
runTest<uint64_t>();
}
SECTION("test for double") {
runTest<double>();
}
#endif
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <string.h>
#include <math.h>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <algorithm>
#include <type_traits>
using namespace std;
////////////////////////////////////////////////////////////////////////////////
// Auto-Verification Code
////////////////////////////////////////////////////////////////////////////////
bool verifyBitwise(...) {
return true;
}
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
bool verifyBitwise(T* gpuData, int len) {
// Atomic and
T val = 0xff;
for (int i = 0; i < len; ++i) {
// 9th element should be 1
val &= (2 * i + 7);
}
REQUIRE(val == gpuData[8]);
// atomic Or
val = 0;
for (int i = 0; i < len; ++i) {
// 10th element should be 0xff
val |= (1 << i);
}
REQUIRE(val == gpuData[9]);
// atomic Xor
val = 0xff;
for (int i = 0; i < len; ++i) {
// 11th element should be 0xff
val ^= i;
}
REQUIRE(val == gpuData[10]);
return true;
}
bool verifySub(...) {
return true;
}
template<
typename T,
typename enable_if<
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
bool verifySub(T* gpuData, int len) {
T val = 0;
for (int i = 0; i < len; ++i) {
val -= 10;
}
REQUIRE(val == gpuData[1]);
return true;
}
bool verifyExch(...) {
return true;
}
template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
bool computeExchExch(T* gpuData, int len) {
T val = 0;
for (T i = 0; i < len; ++i) {
if (i == gpuData[2]) {
return true;
break;
}
}
}
bool VerifyIntegral(...) {
return true;
}
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
bool VerifyIntegral(T* gpuData, int len) {
// atomic Max
T val = 0;
for (int i = 0; i < len; ++i) {
// fourth element should be len-1
val = max(val, static_cast<T>(i));
}
REQUIRE(val == gpuData[3]);
// atomic Min
val = 1 << 8;
for (int i = 0; i < len; ++i) {
val = min(val, static_cast<T>(i));
}
REQUIRE(val == gpuData[4]);
// atomic Inc
T limit = 17;
val = 0;
for (int i = 0; i < len; ++i) {
val = (val >= limit) ? 0 : val + 1;
}
REQUIRE(val == gpuData[5]);
// atomic Dec
limit = 137;
val = 0;
for (int i = 0; i < len; ++i) {
val = ((val == 0) || (val > limit)) ? limit : val - 1;
}
REQUIRE(val == gpuData[6]);
// atomic CAS
for (int i = 0; i < len; ++i) {
// eighth element should be a member of [0, len)
if (static_cast<T>(i) == gpuData[7]) {
return true;
break;
}
}
return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
}
template<typename T>
bool verifyData(T* gpuData, int len) {
T val = 0;
for (int i = 0; i < len; ++i) {
val += 10;
}
REQUIRE(val == gpuData[0]);
return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
}
__device__
void testKernelExch(...) {}
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
__device__
void testKernelExch(T* g_odata) {
// access thread id
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
// Atomic exchange
atomicExch(&g_odata[2], tid);
}
__device__
void testKernelSub(...) {}
template<
typename T,
typename enable_if<
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
__device__
void testKernelSub(T* g_odata) {
// Atomic subtraction (final should be 0)
atomicSub(&g_odata[1], 10);
}
__device__
void testKernelIntegral(...) {}
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
__device__
void testKernelIntegral(T* g_odata) {
// access thread id
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
// Atomic maximum
atomicMax(&g_odata[3], tid);
// Atomic minimum
atomicMin(&g_odata[4], tid);
// Atomic increment (modulo 17+1)
atomicInc((unsigned int*)&g_odata[5], 17);
// Atomic decrement
atomicDec((unsigned int*)&g_odata[6], 137);
// Atomic compare-and-swap
atomicCAS(&g_odata[7], tid - 1, tid);
// Bitwise atomic instructions
// Atomic AND
atomicAnd(&g_odata[8], 2 * tid + 7);
// Atomic OR
atomicOr(&g_odata[9], 1 << tid);
// Atomic XOR
atomicXor(&g_odata[10], tid);
testKernelSub(g_odata);
}
template<typename T>
__global__ void testKernel(T* g_odata) {
// Atomic addition
atomicAdd(&g_odata[0], 10);
testKernelIntegral(g_odata);
testKernelExch(g_odata);
}
template<typename T>
static void runTest() {
bool testResult = true;
unsigned int numThreads = 256;
unsigned int numBlocks = 64;
unsigned int numData = 11;
unsigned int memSize = sizeof(T) * numData;
// allocate mem for the result on host side
T* hOData = reinterpret_cast<T*>(malloc(memSize));
// initialize the memory
for (unsigned int i = 0; i < numData; i++) {
hOData[i] = 0;
}
// To make the AND and XOR tests generate something other than 0...
hOData[8] = hOData[10] = 0xff;
// allocate device memory for result
T* dOData;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
// Compute reference solution
REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
}
TEST_CASE("Unit_SimpleAtomicsTest") {
SECTION("test for int") {
runTest<int>();
}
SECTION("test for unsigned int") {
runTest<unsigned int>();
}
SECTION("test for float") {
runTest<float>();
}
#if HT_AMD
SECTION("test for unsigned long long") {
runTest<uint64_t>();
}
SECTION("test for double") {
runTest<double>();
}
#endif
}
+101 -101
Переглянути файл
@@ -1,101 +1,101 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/device_functions.h>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void single_precision_intrinsics() {
float fX, fY;
__cosf(0.0f);
__exp10f(0.0f);
__expf(0.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fadd_rd(0.0f, 1.0f);
#endif
__fadd_rn(0.0f, 1.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fadd_ru(0.0f, 1.0f);
__fadd_rz(0.0f, 1.0f);
__fdiv_rd(4.0f, 2.0f);
#endif
__fdiv_rn(4.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fdiv_ru(4.0f, 2.0f);
__fdiv_rz(4.0f, 2.0f);
#endif
__fdividef(4.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmaf_rd(1.0f, 2.0f, 3.0f);
#endif
__fmaf_rn(1.0f, 2.0f, 3.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmaf_ru(1.0f, 2.0f, 3.0f);
__fmaf_rz(1.0f, 2.0f, 3.0f);
__fmul_rd(1.0f, 2.0f);
#endif
__fmul_rn(1.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmul_ru(1.0f, 2.0f);
__fmul_rz(1.0f, 2.0f);
__frcp_rd(2.0f);
#endif
__frcp_rn(2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__frcp_ru(2.0f);
__frcp_rz(2.0f);
#endif
__frsqrt_rn(4.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fsqrt_rd(4.0f);
#endif
__fsqrt_rn(4.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fsqrt_ru(4.0f);
__fsqrt_rz(4.0f);
__fsub_rd(2.0f, 1.0f);
#endif
__fsub_rn(2.0f, 1.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fsub_ru(2.0f, 1.0f);
__fsub_rz(2.0f, 1.0f);
#endif
__log10f(1.0f);
__log2f(1.0f);
__logf(1.0f);
__powf(1.0f, 0.0f);
__saturatef(0.1f);
__sincosf(0.0f, &fX, &fY);
__sinf(0.0f);
__tanf(0.0f);
}
__global__ void compileSinglePrecisionIntrinsics(int) {
single_precision_intrinsics();
}
TEST_CASE("Unit_SinglePrecisionIntrinsics") {
hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/device_functions.h>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void single_precision_intrinsics() {
float fX, fY;
__cosf(0.0f);
__exp10f(0.0f);
__expf(0.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fadd_rd(0.0f, 1.0f);
#endif
__fadd_rn(0.0f, 1.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fadd_ru(0.0f, 1.0f);
__fadd_rz(0.0f, 1.0f);
__fdiv_rd(4.0f, 2.0f);
#endif
__fdiv_rn(4.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fdiv_ru(4.0f, 2.0f);
__fdiv_rz(4.0f, 2.0f);
#endif
__fdividef(4.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmaf_rd(1.0f, 2.0f, 3.0f);
#endif
__fmaf_rn(1.0f, 2.0f, 3.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmaf_ru(1.0f, 2.0f, 3.0f);
__fmaf_rz(1.0f, 2.0f, 3.0f);
__fmul_rd(1.0f, 2.0f);
#endif
__fmul_rn(1.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmul_ru(1.0f, 2.0f);
__fmul_rz(1.0f, 2.0f);
__frcp_rd(2.0f);
#endif
__frcp_rn(2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__frcp_ru(2.0f);
__frcp_rz(2.0f);
#endif
__frsqrt_rn(4.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fsqrt_rd(4.0f);
#endif
__fsqrt_rn(4.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fsqrt_ru(4.0f);
__fsqrt_rz(4.0f);
__fsub_rd(2.0f, 1.0f);
#endif
__fsub_rn(2.0f, 1.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fsub_ru(2.0f, 1.0f);
__fsub_rz(2.0f, 1.0f);
#endif
__log10f(1.0f);
__log2f(1.0f);
__logf(1.0f);
__powf(1.0f, 0.0f);
__saturatef(0.1f);
__sincosf(0.0f, &fX, &fY);
__sinf(0.0f);
__tanf(0.0f);
}
__global__ void compileSinglePrecisionIntrinsics(int) {
single_precision_intrinsics();
}
TEST_CASE("Unit_SinglePrecisionIntrinsics") {
hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
+123 -123
Переглянути файл
@@ -1,123 +1,123 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/math_functions.h>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void single_precision_math_functions() {
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
ceilf(0.0f);
copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
cospif(0.0f);
erfcf(0.0f);
erfcinvf(2.0f);
erfcxf(0.0f);
erff(0.0f);
erfinvf(1.0f);
exp10f(0.0f);
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fabsf(1.0f);
fdimf(1.0f, 0.0f);
fdividef(0.0f, 1.0f);
floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fmaxf(0.0f, 0.0f);
fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
isfinite(0.0f);
isinf(0.0f);
isnan(0.0f);
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
ldexpf(0.0f, 0);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
nanf("1");
nearbyintf(0.0f);
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
normcdff(0.0f);
normcdfinvf(1.0f);
fX = 1.0f;
normf(1, &fX);
powf(1.0f, 0.0f);
remainderf(2.0f, 1.0f);
rhypotf(0.0f, 1.0f);
rintf(1.0f);
rnorm3df(0.0f, 0.0f, 1.0f);
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
fX = 1.0f;
rnormf(1, &fX);
roundf(0.0f);
rsqrtf(1.0f);
signbit(1.0f);
sincosf(0.0f, &fX, &fY);
sincospif(0.0f, &fX, &fY);
sinf(0.0f);
sinhf(0.0f);
sinpif(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
truncf(0.0f);
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
}
__global__ void compileSinglePrecisionMathOnDevice(int) {
single_precision_math_functions();
}
TEST_CASE("Unit_SinglePrecisionMathDevice") {
hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/math_functions.h>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void single_precision_math_functions() {
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
ceilf(0.0f);
copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
cospif(0.0f);
erfcf(0.0f);
erfcinvf(2.0f);
erfcxf(0.0f);
erff(0.0f);
erfinvf(1.0f);
exp10f(0.0f);
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fabsf(1.0f);
fdimf(1.0f, 0.0f);
fdividef(0.0f, 1.0f);
floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fmaxf(0.0f, 0.0f);
fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
isfinite(0.0f);
isinf(0.0f);
isnan(0.0f);
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
ldexpf(0.0f, 0);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
nanf("1");
nearbyintf(0.0f);
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
normcdff(0.0f);
normcdfinvf(1.0f);
fX = 1.0f;
normf(1, &fX);
powf(1.0f, 0.0f);
remainderf(2.0f, 1.0f);
rhypotf(0.0f, 1.0f);
rintf(1.0f);
rnorm3df(0.0f, 0.0f, 1.0f);
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
fX = 1.0f;
rnormf(1, &fX);
roundf(0.0f);
rsqrtf(1.0f);
signbit(1.0f);
sincosf(0.0f, &fX, &fY);
sincospif(0.0f, &fX, &fY);
sinf(0.0f);
sinhf(0.0f);
sinpif(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
truncf(0.0f);
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
}
__global__ void compileSinglePrecisionMathOnDevice(int) {
single_precision_math_functions();
}
TEST_CASE("Unit_SinglePrecisionMathDevice") {
hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
dim3(1, 1, 1), 0, 0, 1);
}
+113 -113
Переглянути файл
@@ -1,113 +1,113 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <cmath>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__host__ static void single_precision_math_functions() {
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
ceilf(0.0f);
copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
erfcf(0.0f);
erff(0.0f);
#ifdef __unix__
exp10f(0.0f);
#endif
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fabsf(1.0f);
fdimf(1.0f, 0.0f);
floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fmaxf(0.0f, 0.0f);
fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
std::isfinite(0.0f);
std::isinf(0.0f);
std::isnan(0.0f);
#ifdef __unix__
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
#endif
ldexpf(0.0f, 0);
lgammaf(1.0f);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
modff(0.0f, &fX);
nanf("1");
nearbyintf(0.0f);
powf(1.0f, 0.0f);
remainderf(2.0f, 1.0f);
remquof(1.0f, 2.0f, &iX);
rintf(1.0f);
#if HT_AMD
fX = 1.0f;
#endif
roundf(0.0f);
/// rsqrtf(1.0f);
scalblnf(0.0f, 1);
scalbnf(0.0f, 1);
std::signbit(1.0f);
#ifdef __unix__
sincosf(0.0f, &fX, &fY);
#endif
sinf(0.0f);
sinhf(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
truncf(0.0f);
#ifdef __unix__
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
#endif
}
TEST_CASE("Unit_SinglePrecisionMathHost") {
single_precision_math_functions();
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <cmath>
#pragma GCC diagnostic ignored "-Wall"
#pragma clang diagnostic ignored "-Wunused-variable"
__host__ static void single_precision_math_functions() {
int iX;
float fX, fY;
acosf(1.0f);
acoshf(1.0f);
asinf(0.0f);
asinhf(0.0f);
atan2f(0.0f, 1.0f);
atanf(0.0f);
atanhf(0.0f);
cbrtf(0.0f);
ceilf(0.0f);
copysignf(1.0f, -2.0f);
cosf(0.0f);
coshf(0.0f);
erfcf(0.0f);
erff(0.0f);
#ifdef __unix__
exp10f(0.0f);
#endif
exp2f(0.0f);
expf(0.0f);
expm1f(0.0f);
fabsf(1.0f);
fdimf(1.0f, 0.0f);
floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fmaxf(0.0f, 0.0f);
fminf(0.0f, 0.0f);
fmodf(0.0f, 1.0f);
frexpf(0.0f, &iX);
hypotf(1.0f, 0.0f);
ilogbf(1.0f);
std::isfinite(0.0f);
std::isinf(0.0f);
std::isnan(0.0f);
#ifdef __unix__
j0f(0.0f);
j1f(0.0f);
jnf(-1.0f, 1.0f);
#endif
ldexpf(0.0f, 0);
lgammaf(1.0f);
llrintf(0.0f);
llroundf(0.0f);
log10f(1.0f);
log1pf(-1.0f);
log2f(1.0f);
logbf(1.0f);
logf(1.0f);
lrintf(0.0f);
lroundf(0.0f);
modff(0.0f, &fX);
nanf("1");
nearbyintf(0.0f);
powf(1.0f, 0.0f);
remainderf(2.0f, 1.0f);
remquof(1.0f, 2.0f, &iX);
rintf(1.0f);
#if HT_AMD
fX = 1.0f;
#endif
roundf(0.0f);
/// rsqrtf(1.0f);
scalblnf(0.0f, 1);
scalbnf(0.0f, 1);
std::signbit(1.0f);
#ifdef __unix__
sincosf(0.0f, &fX, &fY);
#endif
sinf(0.0f);
sinhf(0.0f);
sqrtf(0.0f);
tanf(0.0f);
tanhf(0.0f);
tgammaf(2.0f);
truncf(0.0f);
#ifdef __unix__
y0f(1.0f);
y1f(1.0f);
ynf(1, 1.0f);
#endif
}
TEST_CASE("Unit_SinglePrecisionMathHost") {
single_precision_math_functions();
}
+153 -153
Переглянути файл
@@ -1,153 +1,153 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <complex>
#pragma clang diagnostic ignored "-Wunused-variable"
// Tolerance for error
const double tolerance = 1e-6;
#define LEN 64
#define ALL_FUN \
OP(add) \
OP(sub) \
OP(mul) \
OP(div) \
OP(abs) \
OP(arg) \
OP(sin) \
OP(cos)
#define OP(x) CK_##x,
enum CalcKind {
ALL_FUN
};
#undef OP
#define OP(x) case CK_##x: return #x;
std::string getName(enum CalcKind CK) {
switch (CK) {
ALL_FUN
}
return ""; // To prevent compile warning
}
#undef OP
// Calculates function.
// If the function has one argument, B is ignored.
// If the function returns real number, converts it to a complex number.
#define ONE_ARG(func) \
case CK_##func: \
return std::complex<FloatT>(func(A));
template<typename FloatT>
__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
std::complex<FloatT> B,
enum CalcKind CK) {
switch (CK) {
case CK_add:
return A + B;
case CK_sub:
return A - B;
case CK_mul:
return A * B;
case CK_div:
return A / B;
ONE_ARG(abs)
ONE_ARG(arg)
ONE_ARG(sin)
ONE_ARG(cos)
}
return A; // To prevent compile warning
}
template<typename FloatT>
__global__ void kernel(std::complex<FloatT>* A,
std::complex<FloatT>* B, std::complex<FloatT>* C,
enum CalcKind CK) {
int tx = threadIdx.x + blockIdx.x * blockDim.x;
C[tx] = calc<FloatT>(A[tx], B[tx], CK);
}
template<typename FloatT>
void test() {
typedef std::complex<FloatT> ComplexT;
ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
A = new ComplexT[LEN];
B = new ComplexT[LEN];
C = new ComplexT[LEN];
D = new ComplexT[LEN];
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
for (uint32_t i = 0; i < LEN; i++) {
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
B[i] = A[i];
C[i] = A[i];
}
HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
// Run kernel for a calculation kind and verify by comparing with host
// calculation result. Returns false if fails.
auto test_fun = [&](enum CalcKind CK) {
hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
Ad, Bd, Cd, CK);
HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
bool pass = true;
for (int i = 0; i < LEN; i++) {
ComplexT Expected = calc(A[i], B[i], CK);
FloatT error = abs(C[i] - Expected);
if (abs(Expected) > tolerance)
error /= abs(Expected);
pass &= error < tolerance;
}
return pass;
};
#define OP(x) assert(test_fun(CK_##x));
ALL_FUN
#undef OP
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
HIP_CHECK(hipFree(Cd));
delete[] A;
delete[] B;
delete[] C;
delete[] D;
}
#if HT_AMD
TEST_CASE("Unit_StdComplex") {
SECTION("Test run with float") {
test<float>();
}
SECTION("Test run with double") {
test<double>();
}
}
#endif
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <complex>
#pragma clang diagnostic ignored "-Wunused-variable"
// Tolerance for error
const double tolerance = 1e-6;
#define LEN 64
#define ALL_FUN \
OP(add) \
OP(sub) \
OP(mul) \
OP(div) \
OP(abs) \
OP(arg) \
OP(sin) \
OP(cos)
#define OP(x) CK_##x,
enum CalcKind {
ALL_FUN
};
#undef OP
#define OP(x) case CK_##x: return #x;
std::string getName(enum CalcKind CK) {
switch (CK) {
ALL_FUN
}
return ""; // To prevent compile warning
}
#undef OP
// Calculates function.
// If the function has one argument, B is ignored.
// If the function returns real number, converts it to a complex number.
#define ONE_ARG(func) \
case CK_##func: \
return std::complex<FloatT>(func(A));
template<typename FloatT>
__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
std::complex<FloatT> B,
enum CalcKind CK) {
switch (CK) {
case CK_add:
return A + B;
case CK_sub:
return A - B;
case CK_mul:
return A * B;
case CK_div:
return A / B;
ONE_ARG(abs)
ONE_ARG(arg)
ONE_ARG(sin)
ONE_ARG(cos)
}
return A; // To prevent compile warning
}
template<typename FloatT>
__global__ void kernel(std::complex<FloatT>* A,
std::complex<FloatT>* B, std::complex<FloatT>* C,
enum CalcKind CK) {
int tx = threadIdx.x + blockIdx.x * blockDim.x;
C[tx] = calc<FloatT>(A[tx], B[tx], CK);
}
template<typename FloatT>
void test() {
typedef std::complex<FloatT> ComplexT;
ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
A = new ComplexT[LEN];
B = new ComplexT[LEN];
C = new ComplexT[LEN];
D = new ComplexT[LEN];
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
for (uint32_t i = 0; i < LEN; i++) {
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
B[i] = A[i];
C[i] = A[i];
}
HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
// Run kernel for a calculation kind and verify by comparing with host
// calculation result. Returns false if fails.
auto test_fun = [&](enum CalcKind CK) {
hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
Ad, Bd, Cd, CK);
HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
bool pass = true;
for (int i = 0; i < LEN; i++) {
ComplexT Expected = calc(A[i], B[i], CK);
FloatT error = abs(C[i] - Expected);
if (abs(Expected) > tolerance)
error /= abs(Expected);
pass &= error < tolerance;
}
return pass;
};
#define OP(x) assert(test_fun(CK_##x));
ALL_FUN
#undef OP
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
HIP_CHECK(hipFree(Cd));
delete[] A;
delete[] B;
delete[] C;
delete[] D;
}
#if HT_AMD
TEST_CASE("Unit_StdComplex") {
SECTION("Test run with float") {
test<float>();
}
SECTION("Test run with double") {
test<double>();
}
}
#endif
+220 -220
Переглянути файл
@@ -1,220 +1,220 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
(TestCase 1)::
1) Execute atomicAdd in multi threaded scenario by diverging the data across
multiple threads and validate the output at the end of all operations.
2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
across multiple threads and validate the output at the end of all operations.
(TestCase 2)::
3) Execute atomicAdd API and validate the result.
4) Execute atomicAddNoRet API and validate the result.
(TestCase 3)::
5) atomicadd/NoRet negative scenarios (TBD).
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
/*
* Defines initial and increment values
*/
#define INCREMENT_VALUE 10
#define INT_INITIAL_VALUE 10
#define FLOAT_INITIAL_VALUE 10.50
#define DOUBLE_INITIAL_VALUE 200.12
#define LONG_INITIAL_VALUE 10000
#define UNSIGNED_INITIAL_VALUE 20
#if HT_NVIDIA
// atomicAddNoRet is unavailable in cuda
template <typename T>
__device__ void atomicAddNoRet(T* x, int y) {
atomicAdd(x, static_cast<T>(y));
}
#endif
bool p_atomicNoRet = false;
template <typename T>
__global__ void atomicnoret_manywaves(T* C_d) {
atomicAddNoRet(C_d, INCREMENT_VALUE);
}
template <typename T>
__global__ void atomic_manywaves(T* C_d) {
atomicAdd(C_d, INCREMENT_VALUE);
}
template <typename T>
__global__ void atomicnoret_simple(T* C_d) {
atomicAddNoRet(C_d, INCREMENT_VALUE);
}
template <typename T>
__global__ void atomic_simple(T* C_d) {
atomicAdd(C_d, INCREMENT_VALUE);
}
template <typename T>
bool atomictest_manywaves(const T& initial_val) {
unsigned int ThreadsperBlock = 10;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+
static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
template <typename T>
bool atomictestnoret_manywaves(const T& initial_val) {
unsigned int ThreadsperBlock = 10;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+
(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
template <typename T>
bool atomictest_simple(const T& initial_val) {
unsigned int ThreadsperBlock = 1;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
template <typename T>
bool atomictestnoret_simple(const T& initial_val) {
unsigned int ThreadsperBlock = 1;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
TEST_CASE("Unit_hipTestAtomicAdd") {
bool TestPassed = true;
SECTION("atomic tests with many waves") {
REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
REQUIRE(TestPassed ==
atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
#if HT_AMD
REQUIRE(TestPassed ==
atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
REQUIRE(TestPassed ==
atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
#endif
}
SECTION("atomic tests with many waves and no return") {
REQUIRE(TestPassed ==
atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
}
SECTION("simple atomic tests") {
REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
REQUIRE(TestPassed ==
atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
#if HT_AMD
REQUIRE(TestPassed ==
atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
#endif
}
SECTION("Simple atomic test with no return") {
REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
}
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
(TestCase 1)::
1) Execute atomicAdd in multi threaded scenario by diverging the data across
multiple threads and validate the output at the end of all operations.
2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
across multiple threads and validate the output at the end of all operations.
(TestCase 2)::
3) Execute atomicAdd API and validate the result.
4) Execute atomicAddNoRet API and validate the result.
(TestCase 3)::
5) atomicadd/NoRet negative scenarios (TBD).
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
/*
* Defines initial and increment values
*/
#define INCREMENT_VALUE 10
#define INT_INITIAL_VALUE 10
#define FLOAT_INITIAL_VALUE 10.50
#define DOUBLE_INITIAL_VALUE 200.12
#define LONG_INITIAL_VALUE 10000
#define UNSIGNED_INITIAL_VALUE 20
#if HT_NVIDIA
// atomicAddNoRet is unavailable in cuda
template <typename T>
__device__ void atomicAddNoRet(T* x, int y) {
atomicAdd(x, static_cast<T>(y));
}
#endif
bool p_atomicNoRet = false;
template <typename T>
__global__ void atomicnoret_manywaves(T* C_d) {
atomicAddNoRet(C_d, INCREMENT_VALUE);
}
template <typename T>
__global__ void atomic_manywaves(T* C_d) {
atomicAdd(C_d, INCREMENT_VALUE);
}
template <typename T>
__global__ void atomicnoret_simple(T* C_d) {
atomicAddNoRet(C_d, INCREMENT_VALUE);
}
template <typename T>
__global__ void atomic_simple(T* C_d) {
atomicAdd(C_d, INCREMENT_VALUE);
}
template <typename T>
bool atomictest_manywaves(const T& initial_val) {
unsigned int ThreadsperBlock = 10;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+
static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
template <typename T>
bool atomictestnoret_manywaves(const T& initial_val) {
unsigned int ThreadsperBlock = 10;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+
(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
template <typename T>
bool atomictest_simple(const T& initial_val) {
unsigned int ThreadsperBlock = 1;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
template <typename T>
bool atomictestnoret_simple(const T& initial_val) {
unsigned int ThreadsperBlock = 1;
unsigned int numBlocks = 1;
T memSize = sizeof(T);
T* hOData = reinterpret_cast<T*>(malloc(memSize));
*hOData = initial_val;
T* dOData;
HIP_CHECK(hipMalloc(&dOData, memSize));
// copy host memory to device to initialize to zero
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
// execute the kernel
hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
dim3(ThreadsperBlock), 0, 0, dOData);
// Copy result from device to host
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
// Cleanup memory
free(hOData);
HIP_CHECK(hipFree(dOData));
return true;
}
TEST_CASE("Unit_hipTestAtomicAdd") {
bool TestPassed = true;
SECTION("atomic tests with many waves") {
REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
REQUIRE(TestPassed ==
atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
#if HT_AMD
REQUIRE(TestPassed ==
atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
REQUIRE(TestPassed ==
atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
#endif
}
SECTION("atomic tests with many waves and no return") {
REQUIRE(TestPassed ==
atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
}
SECTION("simple atomic tests") {
REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
REQUIRE(TestPassed ==
atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
#if HT_AMD
REQUIRE(TestPassed ==
atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
#endif
}
SECTION("Simple atomic test with no return") {
REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
}
}
+51 -51
Переглянути файл
@@ -1,51 +1,51 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#define LEN 512
#define SIZE (LEN * sizeof(int64_t))
static __global__ void kernel1(int64_t* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = clock() + clock64() + __clock() + __clock64();
}
static __global__ void kernel2(int64_t* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
}
TEST_CASE("Unit_hipTestClock") {
int64_t *A, *Ad;
A = new int64_t[LEN];
for (unsigned i = 0; i < LEN; i++) {
A[i] = 0;
}
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
for (unsigned i = 0; i < LEN; i++) {
assert(0 != A[i]);
}
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#define LEN 512
#define SIZE (LEN * sizeof(int64_t))
static __global__ void kernel1(int64_t* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = clock() + clock64() + __clock() + __clock64();
}
static __global__ void kernel2(int64_t* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
}
TEST_CASE("Unit_hipTestClock") {
int64_t *A, *Ad;
A = new int64_t[LEN];
for (unsigned i = 0; i < LEN; i++) {
A[i] = 0;
}
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
for (unsigned i = 0; i < LEN; i++) {
assert(0 != A[i]);
}
}
+88 -88
Переглянути файл
@@ -1,88 +1,88 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "error_handling_common.hh"
/**
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
* @{
* @ingroup ErrorTest
* `hipDrvGetErrorName(hipError_t hip_error)` -
* Return hip error as text string form.
*/
/**
* Test Description
* ------------------------
* - Validate that the correct string is returned for each supported
* device error enumeration.
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorName.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
REQUIRE(error_string != nullptr);
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
}
/**
* Test Description
* ------------------------
* - Validate handling of invalid arguments:
* -# When error enumerator is invalid (-1)
* - AMD expected output: return "hipErrorUnknown"
* - NVIDIA expected output: return "cudaErrorUnknown"
* -# When nullptr is passed as store location
* - Expected output: return "hipErrorInvalidValue"
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorName.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
const char* error_string = nullptr;
SECTION("pass unknown value to hipError") {
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
hipErrorInvalidValue);
}
#if HT_AMD // segfaults on NVIDIA
SECTION("pass nullptr to error string") {
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
}
#endif
}
/**
* End doxygen group ErrorTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "error_handling_common.hh"
/**
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
* @{
* @ingroup ErrorTest
* `hipDrvGetErrorName(hipError_t hip_error)` -
* Return hip error as text string form.
*/
/**
* Test Description
* ------------------------
* - Validate that the correct string is returned for each supported
* device error enumeration.
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorName.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
REQUIRE(error_string != nullptr);
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
}
/**
* Test Description
* ------------------------
* - Validate handling of invalid arguments:
* -# When error enumerator is invalid (-1)
* - AMD expected output: return "hipErrorUnknown"
* - NVIDIA expected output: return "cudaErrorUnknown"
* -# When nullptr is passed as store location
* - Expected output: return "hipErrorInvalidValue"
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorName.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
const char* error_string = nullptr;
SECTION("pass unknown value to hipError") {
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
hipErrorInvalidValue);
}
#if HT_AMD // segfaults on NVIDIA
SECTION("pass nullptr to error string") {
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
}
#endif
}
/**
* End doxygen group ErrorTest.
* @}
*/
+88 -88
Переглянути файл
@@ -1,88 +1,88 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "error_handling_common.hh"
/**
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
* @{
* @ingroup ErrorTest
* `hipDrvGetErrorString(hipError_t hipError)` -
* Return handy text string message to explain the error which occurred.
*/
/**
* Test Description
* ------------------------
* - Validate that the correct string is returned for each supported
* device error enumeration.
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorString.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
REQUIRE(error_string != nullptr);
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
}
/**
* Test Description
* ------------------------
* - Validate handling of invalid arguments:
* -# When error enumerator is invalid (-1)
* - Expected output: return "hipErrorInvalidValue"
* -# When nullptr is passed as store location
* - Expected output: return "hipErrorInvalidValue"
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorString.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
const char* error_string = nullptr;
SECTION("pass unknown value to hipError") {
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
hipErrorInvalidValue);
}
#if HT_AMD // segfaults on NVIDIA
SECTION("pass nullptr to error string") {
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
hipErrorInvalidValue);
}
#endif
}
/**
* End doxygen group ErrorTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "error_handling_common.hh"
/**
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
* @{
* @ingroup ErrorTest
* `hipDrvGetErrorString(hipError_t hipError)` -
* Return handy text string message to explain the error which occurred.
*/
/**
* Test Description
* ------------------------
* - Validate that the correct string is returned for each supported
* device error enumeration.
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorString.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
const char* error_string = nullptr;
const auto enumerator =
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
INFO("Error: " << enumerator);
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
REQUIRE(error_string != nullptr);
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
}
/**
* Test Description
* ------------------------
* - Validate handling of invalid arguments:
* -# When error enumerator is invalid (-1)
* - Expected output: return "hipErrorInvalidValue"
* -# When nullptr is passed as store location
* - Expected output: return "hipErrorInvalidValue"
* Test source
* ------------------------
* - unit/errorHandling/hipDrvGetErrorString.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.4
*/
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
const char* error_string = nullptr;
SECTION("pass unknown value to hipError") {
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
hipErrorInvalidValue);
}
#if HT_AMD // segfaults on NVIDIA
SECTION("pass nullptr to error string") {
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
hipErrorInvalidValue);
}
#endif
}
/**
* End doxygen group ErrorTest.
* @}
*/
+19 -19
Переглянути файл
@@ -1,19 +1,19 @@
# AMD specific test
if(HIP_PLATFORM MATCHES "amd")
if(UNIX)
set(TEST_SRC
hipMalloc.cc
)
# Creating Custom object file
add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
add_library(malloc_gpp OBJECT IMPORTED)
set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
hip_add_exe_to_target(NAME gppTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS malloc_gpp)
add_dependencies(gppTests malloc_custom)
endif()
endif()
# AMD specific test
if(HIP_PLATFORM MATCHES "amd")
if(UNIX)
set(TEST_SRC
hipMalloc.cc
)
# Creating Custom object file
add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
add_library(malloc_gpp OBJECT IMPORTED)
set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
hip_add_exe_to_target(NAME gppTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS malloc_gpp)
add_dependencies(gppTests malloc_custom)
endif()
endif()
+54 -54
Переглянути файл
@@ -1,54 +1,54 @@
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* */
#include <hip_test_common.hh>
#include "hipMalloc.h"
/**
* @addtogroup hipMalloc hipMalloc
* @{
* @ingroup MemoryTest
* `hipError_t hipMalloc(void** ptr, size_t size)` -
* Allocate memory on the default accelerator.
* @}
*/
/**
* Test Description
* ------------------------
* - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
* Test source
* ------------------------
* - catch/unit/g++/hipMalloc.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipMalloc_gpptest") {
printf("calling cpp function from here\n");
int result = MallocFunc();
REQUIRE(result == 1);
}
/**
* End doxygen group MemoryTest.
* @}
*/
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* */
#include <hip_test_common.hh>
#include "hipMalloc.h"
/**
* @addtogroup hipMalloc hipMalloc
* @{
* @ingroup MemoryTest
* `hipError_t hipMalloc(void** ptr, size_t size)` -
* Allocate memory on the default accelerator.
* @}
*/
/**
* Test Description
* ------------------------
* - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
* Test source
* ------------------------
* - catch/unit/g++/hipMalloc.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipMalloc_gpptest") {
printf("calling cpp function from here\n");
int result = MallocFunc();
REQUIRE(result == 1);
}
/**
* End doxygen group MemoryTest.
* @}
*/
+21 -21
Переглянути файл
@@ -1,22 +1,22 @@
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* */
#include <iostream>
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* */
#include <iostream>
extern int MallocFunc();
+28 -28
Переглянути файл
@@ -1,28 +1,28 @@
# Common Tests - Test independent of all platforms
if(HIP_PLATFORM MATCHES "amd")
if(UNIX)
set(TEST_SRC
gccTest.cc
gpu.cpp
)
# Creating Custom object file
add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
add_library(LaunchKernel_lib OBJECT IMPORTED)
add_library(hipMalloc_lib OBJECT IMPORTED)
set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
hip_add_exe_to_target(NAME gccTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
endif()
endif()
# Common Tests - Test independent of all platforms
if(HIP_PLATFORM MATCHES "amd")
if(UNIX)
set(TEST_SRC
gccTest.cc
gpu.cpp
)
# Creating Custom object file
add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
add_library(LaunchKernel_lib OBJECT IMPORTED)
add_library(hipMalloc_lib OBJECT IMPORTED)
set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
hip_add_exe_to_target(NAME gccTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
endif()
endif()
+64 -64
Переглянути файл
@@ -1,64 +1,64 @@
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* */
#include <hip_test_common.hh>
extern "C" {
#include "LaunchKernel.h"
}
/**
* Test Description
* ------------------------
* - calling launchKernel which is c function from catch2
* and compile with gcc compiler and verify the results.
* Test source
* ------------------------
* - catch/unit/gcc/gccTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_LaunchKernelgccTests") {
printf("Calling launchKernel files from here\n");
int result = launchKernel();
REQUIRE(result == 1);
}
/**
* Test Description
* ------------------------
* - Calling hipMalloc which is c file from catch2 and compile
* with gcc compiler and verify the results.
* Test source
* ------------------------
* - catch/unit/gcc/gccTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipMallocgccTests") {
printf("Calling hipMalloc files from here\n");
int result = hipMallocfunc();
REQUIRE(result == 1);
}
/*
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* */
#include <hip_test_common.hh>
extern "C" {
#include "LaunchKernel.h"
}
/**
* Test Description
* ------------------------
* - calling launchKernel which is c function from catch2
* and compile with gcc compiler and verify the results.
* Test source
* ------------------------
* - catch/unit/gcc/gccTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_LaunchKernelgccTests") {
printf("Calling launchKernel files from here\n");
int result = launchKernel();
REQUIRE(result == 1);
}
/**
* Test Description
* ------------------------
* - Calling hipMalloc which is c file from catch2 and compile
* with gcc compiler and verify the results.
* Test source
* ------------------------
* - catch/unit/gcc/gccTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipMallocgccTests") {
printf("Calling hipMalloc files from here\n");
int result = hipMallocfunc();
REQUIRE(result == 1);
}
+176 -176
Переглянути файл
@@ -1,176 +1,176 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma clang diagnostic ignored "-Wunused-parameter"
unsigned threadsPerBlock = 256;
template <unsigned batch, typename T>
__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
T tmp;
if (groupElements < batch)
return;
// sdata[tid] += sdata[tid - batch/2] does not work when block size is
// greater than wave size because one wave may complete before another
// wave.
if (tid >= batch/2 && tid < groupElements)
tmp = sdata[tid - batch/2];
__syncthreads();
if (tid >= batch/2 && tid < groupElements)
sdata[tid] += tmp;
__syncthreads();
}
template <typename T>
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
size_t numElements, size_t groupElements) {
// declare dynamic shared memory
extern __shared__ double sdata0[];
T* sdata = reinterpret_cast<T *>(sdata0);
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
size_t tid = threadIdx.x;
// initialize dynamic shared memory
if (tid < groupElements) {
sdata[tid] = static_cast<T>(tid);
}
__syncthreads();
// prefix sum inside dynamic shared memory
sum<512>(sdata, groupElements, tid);
sum<256>(sdata, groupElements, tid);
sum<128>(sdata, groupElements, tid);
sum<64>(sdata, groupElements, tid);
sum<32>(sdata, groupElements, tid);
sum<16>(sdata, groupElements, tid);
sum<8>(sdata, groupElements, tid);
sum<4>(sdata, groupElements, tid);
sum<2>(sdata, groupElements, tid);
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
}
template <typename T>
void testExternShared(size_t N, unsigned groupElements) {
size_t Nbytes = N * sizeof(T);
T *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = N/threadsPerBlock;
assert(N == blocks * threadsPerBlock);
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
// calculate the amount of dynamic shared memory required
size_t groupMemBytes = groupElements * sizeof(T);
// launch kernel with dynamic shared memory
hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
N, groupElements);
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
// verify
for (size_t i = 0; i < N; ++i) {
size_t tid = (i % min(threadsPerBlock, groupElements));
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
REQUIRE(C_h[i] == expected);
}
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - launch kernel with dynamic shared memory for float and double
* datatypes and verify the results.
* Test source
* ------------------------
* - catch/unit/kernel/hipDynamicShared.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipDynamicShared") {
SECTION("test case with float for least size") {
testExternShared<float>(1024, 4);
testExternShared<float>(1024, 8);
testExternShared<float>(1024, 16);
testExternShared<float>(1024, 32);
testExternShared<float>(1024, 64);
}
SECTION("test case with float for max size") {
testExternShared<float>(65536, 4);
testExternShared<float>(65536, 8);
testExternShared<float>(65536, 16);
testExternShared<float>(65536, 32);
testExternShared<float>(65536, 64);
}
SECTION("test case with double for least size") {
testExternShared<double>(1024, 4);
testExternShared<double>(1024, 8);
testExternShared<double>(1024, 16);
testExternShared<double>(1024, 32);
testExternShared<double>(1024, 64);
}
SECTION("test case with double for max size") {
testExternShared<double>(65536, 4);
testExternShared<double>(65536, 8);
testExternShared<double>(65536, 16);
testExternShared<double>(65536, 32);
testExternShared<double>(65536, 64);
}
SECTION("test case with float for max LDS size") {
int maxLDS = 0;
HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
testExternShared<float>(1024, maxLDS/sizeof(float));
}
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma clang diagnostic ignored "-Wunused-parameter"
unsigned threadsPerBlock = 256;
template <unsigned batch, typename T>
__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
T tmp;
if (groupElements < batch)
return;
// sdata[tid] += sdata[tid - batch/2] does not work when block size is
// greater than wave size because one wave may complete before another
// wave.
if (tid >= batch/2 && tid < groupElements)
tmp = sdata[tid - batch/2];
__syncthreads();
if (tid >= batch/2 && tid < groupElements)
sdata[tid] += tmp;
__syncthreads();
}
template <typename T>
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
size_t numElements, size_t groupElements) {
// declare dynamic shared memory
extern __shared__ double sdata0[];
T* sdata = reinterpret_cast<T *>(sdata0);
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
size_t tid = threadIdx.x;
// initialize dynamic shared memory
if (tid < groupElements) {
sdata[tid] = static_cast<T>(tid);
}
__syncthreads();
// prefix sum inside dynamic shared memory
sum<512>(sdata, groupElements, tid);
sum<256>(sdata, groupElements, tid);
sum<128>(sdata, groupElements, tid);
sum<64>(sdata, groupElements, tid);
sum<32>(sdata, groupElements, tid);
sum<16>(sdata, groupElements, tid);
sum<8>(sdata, groupElements, tid);
sum<4>(sdata, groupElements, tid);
sum<2>(sdata, groupElements, tid);
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
}
template <typename T>
void testExternShared(size_t N, unsigned groupElements) {
size_t Nbytes = N * sizeof(T);
T *A_d, *B_d, *C_d;
T *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
unsigned blocks = N/threadsPerBlock;
assert(N == blocks * threadsPerBlock);
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
// calculate the amount of dynamic shared memory required
size_t groupMemBytes = groupElements * sizeof(T);
// launch kernel with dynamic shared memory
hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
N, groupElements);
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
// verify
for (size_t i = 0; i < N; ++i) {
size_t tid = (i % min(threadsPerBlock, groupElements));
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
REQUIRE(C_h[i] == expected);
}
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - launch kernel with dynamic shared memory for float and double
* datatypes and verify the results.
* Test source
* ------------------------
* - catch/unit/kernel/hipDynamicShared.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipDynamicShared") {
SECTION("test case with float for least size") {
testExternShared<float>(1024, 4);
testExternShared<float>(1024, 8);
testExternShared<float>(1024, 16);
testExternShared<float>(1024, 32);
testExternShared<float>(1024, 64);
}
SECTION("test case with float for max size") {
testExternShared<float>(65536, 4);
testExternShared<float>(65536, 8);
testExternShared<float>(65536, 16);
testExternShared<float>(65536, 32);
testExternShared<float>(65536, 64);
}
SECTION("test case with double for least size") {
testExternShared<double>(1024, 4);
testExternShared<double>(1024, 8);
testExternShared<double>(1024, 16);
testExternShared<double>(1024, 32);
testExternShared<double>(1024, 64);
}
SECTION("test case with double for max size") {
testExternShared<double>(65536, 4);
testExternShared<double>(65536, 8);
testExternShared<double>(65536, 16);
testExternShared<double>(65536, 32);
testExternShared<double>(65536, 64);
}
SECTION("test case with float for max LDS size") {
int maxLDS = 0;
HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
testExternShared<float>(1024, maxLDS/sizeof(float));
}
}
/**
* End doxygen group KernelTest.
* @}
*/
+94 -94
Переглянути файл
@@ -1,94 +1,94 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#define LEN (16 * 1024)
#define SIZE (LEN * sizeof(float))
__global__ void vectorAdd(float* Ad, float* Bd) {
extern __shared__ float sBd[];
int tx = threadIdx.x;
for (int i = 0; i < LEN / 64; i++) {
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
Bd[tx + i * 64] = sBd[tx + i * 64];
}
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Assign max dynamic shared memory to kernel function and
* verify the results.
* Test source
* ------------------------
* - catch/unit/kernel/hipDynamicShared2.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipDynamicShared2") {
float *A, *B, *Ad, *Bd;
A = new float[LEN];
B = new float[LEN];
for (int i = 0; i < LEN; i++) {
A[i] = 1.0f;
B[i] = 1.0f;
}
HIP_CHECK(hipMalloc(&Ad, SIZE));
HIP_CHECK(hipMalloc(&Bd, SIZE));
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
hipError_t ret = hipFuncSetAttribute(
reinterpret_cast<const void*>(&vectorAdd),
hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
REQUIRE(ret == hipSuccess);
hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
for (int i = 0; i < LEN; i++) {
assert(B[i] > 1.0f && B[i] < 3.0f);
}
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
delete[] A;
delete[] B;
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#define LEN (16 * 1024)
#define SIZE (LEN * sizeof(float))
__global__ void vectorAdd(float* Ad, float* Bd) {
extern __shared__ float sBd[];
int tx = threadIdx.x;
for (int i = 0; i < LEN / 64; i++) {
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
Bd[tx + i * 64] = sBd[tx + i * 64];
}
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Assign max dynamic shared memory to kernel function and
* verify the results.
* Test source
* ------------------------
* - catch/unit/kernel/hipDynamicShared2.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipDynamicShared2") {
float *A, *B, *Ad, *Bd;
A = new float[LEN];
B = new float[LEN];
for (int i = 0; i < LEN; i++) {
A[i] = 1.0f;
B[i] = 1.0f;
}
HIP_CHECK(hipMalloc(&Ad, SIZE));
HIP_CHECK(hipMalloc(&Bd, SIZE));
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
hipError_t ret = hipFuncSetAttribute(
reinterpret_cast<const void*>(&vectorAdd),
hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
REQUIRE(ret == hipSuccess);
hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
for (int i = 0; i < LEN; i++) {
assert(B[i] > 1.0f && B[i] < 3.0f);
}
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
delete[] A;
delete[] B;
}
/**
* End doxygen group KernelTest.
* @}
*/
+59 -59
Переглянути файл
@@ -1,59 +1,59 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma clang diagnostic ignored "-Wunused-parameter"
__global__ void Empty(int param) {}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - pass empty Kernel function.
* Test source
* ------------------------
* - catch/unit/kernel/hipEmptyKernel.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipEmptyKernel") {
hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
HIP_CHECK(hipDeviceSynchronize());
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#pragma clang diagnostic ignored "-Wunused-parameter"
__global__ void Empty(int param) {}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - pass empty Kernel function.
* Test source
* ------------------------
* - catch/unit/kernel/hipEmptyKernel.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipEmptyKernel") {
hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
HIP_CHECK(hipDeviceSynchronize());
}
/**
* End doxygen group KernelTest.
* @}
*/
+138 -138
Переглянути файл
@@ -1,138 +1,138 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Test the Grid_Launch syntax.
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include "hip/hip_ext.h"
static unsigned threadsPerBlock = 256;
static unsigned blocksPerCU = 6;
struct _t {
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
};
typedef struct _t _T;
__global__ void sKernel(_T s, double *a) {
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
}
__global__ void mKernel(char f, int16_t a, int b, double c,
int16_t d, int e, double* res) {
*res = a + b + c + d + e + f;
}
void testMixData() {
double m = 0;
double *d_m;
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
int a = 1, e = 10;
int16_t b = 2, d = 4;
double c = 3.0;
char ff = 10;
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
b, a, c, d, e, d_m);
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(m == 30.0);
HIP_CHECK(hipFree(d_m));
}
void testStruct() {
double m = 0;
double *d_m;
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(m == 55.0);
HIP_CHECK(hipFree(d_m));
}
void test(size_t N) {
size_t Nbytes = N * sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD(A_h, B_h, C_h, N);
}
/**
* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream,
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
Args... args)` -
* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
*/
/**
* Test Description
* ------------------------
* - Test case to verify sample array with hipExtLaunchKernelGGL()
* and verify the results.
* - Test case to verify struct data with hipExtLaunchKernelGGL()
* and verify the results.
* - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
* and verify the results.
* Test source
* ------------------------
* - catch/unit/kernel/hipExtLaunchKernelGGL.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipExtLaunchKernelGGL") {
SECTION("test run") {
size_t N = 4 * 1024 * 1024;
test(N);
}
SECTION("testStruct run") {
testStruct();
}
SECTION("testMixData run") {
testMixData();
}
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Test the Grid_Launch syntax.
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include "hip/hip_ext.h"
static unsigned threadsPerBlock = 256;
static unsigned blocksPerCU = 6;
struct _t {
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
};
typedef struct _t _T;
__global__ void sKernel(_T s, double *a) {
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
}
__global__ void mKernel(char f, int16_t a, int b, double c,
int16_t d, int e, double* res) {
*res = a + b + c + d + e + f;
}
void testMixData() {
double m = 0;
double *d_m;
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
int a = 1, e = 10;
int16_t b = 2, d = 4;
double c = 3.0;
char ff = 10;
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
b, a, c, d, e, d_m);
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(m == 30.0);
HIP_CHECK(hipFree(d_m));
}
void testStruct() {
double m = 0;
double *d_m;
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(m == 55.0);
HIP_CHECK(hipFree(d_m));
}
void test(size_t N) {
size_t Nbytes = N * sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD(A_h, B_h, C_h, N);
}
/**
* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream,
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
Args... args)` -
* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
*/
/**
* Test Description
* ------------------------
* - Test case to verify sample array with hipExtLaunchKernelGGL()
* and verify the results.
* - Test case to verify struct data with hipExtLaunchKernelGGL()
* and verify the results.
* - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
* and verify the results.
* Test source
* ------------------------
* - catch/unit/kernel/hipExtLaunchKernelGGL.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipExtLaunchKernelGGL") {
SECTION("test run") {
size_t N = 4 * 1024 * 1024;
test(N);
}
SECTION("testStruct run") {
testStruct();
}
SECTION("testMixData run") {
testMixData();
}
}
/**
* End doxygen group KernelTest.
* @}
*/
+122 -122
Переглянути файл
@@ -1,122 +1,122 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Test the Grid_Launch syntax.
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
static unsigned threadsPerBlock = 256;
static unsigned blocksPerCU = 6;
// __device__ maps to __attribute__((hc))
__device__ int foo(int i) { return i + 1; }
template <typename T>
__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
double foo = __hiloint2double(A_d[i], B_d[i]);
C_d[i] = __double2loint(foo) + __double2hiint(foo);
}
}
int test_gl2(size_t N) {
size_t Nbytes = N * sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
// Full vadd in one large chunk, to get things started:
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
0, 0, A_d, B_d, C_d, N);
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// verify
HipTest::checkVectorADD(A_h, B_h, C_h, N);
return 0;
}
#if __HIP__
int test_triple_chevron(size_t N) {
size_t Nbytes = N * sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
// Full vadd in one large chunk, to get things started:
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// verify
HipTest::checkVectorADD(A_h, B_h, C_h, N);
return 0;
}
#endif
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Test case to verify the Grid_Launch syntax.
* Test source
* ------------------------
* - catch/unit/kernel/hipGridLaunch.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipGridLaunch") {
size_t N = 4 * 1024 * 1024;
SECTION("Test test_gl2") {
test_gl2(N);
}
#if __HIP__
SECTION("Test triple_chevron") {
test_triple_chevron(N);
}
#endif
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Test the Grid_Launch syntax.
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
static unsigned threadsPerBlock = 256;
static unsigned blocksPerCU = 6;
// __device__ maps to __attribute__((hc))
__device__ int foo(int i) { return i + 1; }
template <typename T>
__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
double foo = __hiloint2double(A_d[i], B_d[i]);
C_d[i] = __double2loint(foo) + __double2hiint(foo);
}
}
int test_gl2(size_t N) {
size_t Nbytes = N * sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
// Full vadd in one large chunk, to get things started:
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
0, 0, A_d, B_d, C_d, N);
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// verify
HipTest::checkVectorADD(A_h, B_h, C_h, N);
return 0;
}
#if __HIP__
int test_triple_chevron(size_t N) {
size_t Nbytes = N * sizeof(int);
int *A_d, *B_d, *C_d;
int *A_h, *B_h, *C_h;
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
// Full vadd in one large chunk, to get things started:
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
// verify
HipTest::checkVectorADD(A_h, B_h, C_h, N);
return 0;
}
#endif
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Test case to verify the Grid_Launch syntax.
* Test source
* ------------------------
* - catch/unit/kernel/hipGridLaunch.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipGridLaunch") {
size_t N = 4 * 1024 * 1024;
SECTION("Test test_gl2") {
test_gl2(N);
}
#if __HIP__
SECTION("Test triple_chevron") {
test_triple_chevron(N);
}
#endif
}
/**
* End doxygen group KernelTest.
* @}
*/
+111 -111
Переглянути файл
@@ -1,111 +1,111 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/math_functions.h>
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wuninitialized"
// Simple tests for variable type qualifiers:
__device__ int deviceVar;
// TODO-HCC __constant__ not working yet.
__constant__ int constantVar1;
__constant__ __device__ int constantVar2;
// Test HOST space:
__host__ void foo() { printf("foo!\n"); }
__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
__device__ __host__ float PlusOne(float x) { return x + 1.0; }
__global__ void MyKernel(const float* a, const float* b, float* c,
unsigned N) {
unsigned gid = threadIdx.x;
if (gid < N) {
c[gid] = a[gid] + PlusOne(b[gid]);
}
}
void callMyKernel() {
float *a, *b, *c;
const unsigned blockSize = 256;
unsigned N = blockSize;
hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
0, 0, a, b, c, N);
}
template <typename T>
__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
#ifdef NOT_YET
int a = __shfl_up(x, 1);
#endif
float x = 1.0;
#ifdef NOT_YET
float fastZ = __sin(x);
#endif
__syncthreads();
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
C_d[i] = A_d[i] + B_d[i];
}
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Collection of code to make sure that various features
* in the hip kernel language compile.
* Test source
* ------------------------
* - catch/unit/kernel/hipLanguageExtensions.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipLanguageExtensions") {
REQUIRE(true);
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip/math_functions.h>
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wuninitialized"
// Simple tests for variable type qualifiers:
__device__ int deviceVar;
// TODO-HCC __constant__ not working yet.
__constant__ int constantVar1;
__constant__ __device__ int constantVar2;
// Test HOST space:
__host__ void foo() { printf("foo!\n"); }
__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
__device__ __host__ float PlusOne(float x) { return x + 1.0; }
__global__ void MyKernel(const float* a, const float* b, float* c,
unsigned N) {
unsigned gid = threadIdx.x;
if (gid < N) {
c[gid] = a[gid] + PlusOne(b[gid]);
}
}
void callMyKernel() {
float *a, *b, *c;
const unsigned blockSize = 256;
unsigned N = blockSize;
hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
0, 0, a, b, c, N);
}
template <typename T>
__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
#ifdef NOT_YET
int a = __shfl_up(x, 1);
#endif
float x = 1.0;
#ifdef NOT_YET
float fastZ = __sin(x);
#endif
__syncthreads();
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < N; i += stride) {
C_d[i] = A_d[i] + B_d[i];
}
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Collection of code to make sure that various features
* in the hip kernel language compile.
* Test source
* ------------------------
* - catch/unit/kernel/hipLanguageExtensions.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipLanguageExtensions") {
REQUIRE(true);
}
/**
* End doxygen group KernelTest.
* @}
*/
Різницю між файлами не показано, бо вона завелика Завантажити різницю
+464 -464
Переглянути файл
@@ -1,464 +1,464 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
class HipFunctorTests {
public:
// Test that a class functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForSimpleClassFunctor(void);
// Test that a templated class functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForClassTemplateFunctor(void);
// Test that a class functor object ptr can be passed to hiplaunchparam
// and can be used in kernel
void TestForClassObjPtrFunctor(void);
// Test that a class object containing functor can be passed
// to hiplaunchparam and can be used in kernel
void TestForFunctorContainInClassObj(void);
// Test that a stuct functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForSimpleStructFunctor(void);
// Test that a stuct functor object ptr can be passed to hiplaunchparam
// and can be used in kernel
void TestForStructObjPtrFunctor(void);
// Test that a templated struct functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForStructTemplateFunctor(void);
// Test that a struct object containing functor can be
// passed to hiplaunchparam and can be used in kernel
void TestForFunctorContainInStructObj(void);
};
static const int BLOCK_DIM_SIZE = 1024;
static const int THREADS_PER_BLOCK = 1;
// class functor tests
// Simple doubler Functor
class DoublerFunctor{
public:
__device__ int operator()(int x) { return x * 2;}
};
// simple doubler functor passed to kernel
__global__ void DoublerFunctorKernel(
DoublerFunctor doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForSimpleClassFunctor(void) {
DoublerFunctor doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
// Validation part of TestForSimpleClassFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// pointer functor passed to kernel
__global__ void PtrDoublerFunctorKernel(
DoublerFunctor *doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = (*doubler_)(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForClassObjPtrFunctor(void) {
DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
// Validation part of TestForClassObjPtrFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
delete[] ptrdoubler;
}
class compare {
public:
template<typename T1, typename T2>
__device__ bool operator()(const T1& v1, const T2& v2) {
return v1 > v2;
}
};
// template functor passed to kernel
__global__ void TemplateFunctorKernel(
compare compare_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
deviceResult[x] = compare_(2.2, 2.1);
deviceResult[x] = compare_(2, 1);
deviceResult[x] = compare_('b', 'a');
}
void HipFunctorTests::TestForClassTemplateFunctor(void) {
compare comparefunctor;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
// Validation part of TestForClassTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// Doubler calculator
class DoublerCalculator {
public:
int a, result;
// fucntor contained in class object
DoublerFunctor doubler;
};
// doubler functor conatined in class obj passed to kernel
__global__ void DoublerCalculatorFunctorKernel(
DoublerCalculator doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_.doubler(doubler_.a);
deviceResult[x] = (doubler_.result == result);
}
void HipFunctorTests::TestForFunctorContainInClassObj(void) {
DoublerCalculator Doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
Doubler.a = 5;
Doubler.result = 10;
// pass comparefunctor to hipLaunchParm
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
// Validation part of TestForStructTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// Struct functor tests
// Simple doubler Functor
struct sDoublerFunctor {
public:
__device__ int operator()(int x) { return x * 2;}
};
// simple sturct doubler functor passed to kernel
__global__ void structDoublerFunctorKernel(
sDoublerFunctor doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForSimpleStructFunctor(void) {
sDoublerFunctor doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
// Validation part of TestForSimpleStructFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// ptr functor passed to kernel
__global__ void structPtrDoublerFunctorKernel(
sDoublerFunctor *doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = (*doubler_)(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForStructObjPtrFunctor(void) {
sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
// Validation part of TestForStructObjPtrFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
delete[] ptrdoubler;
}
struct sCompare {
public:
template< typename T1, typename T2 >
__device__ bool operator()(const T1& v1, const T2& v2) {
return v1 > v2;
}
};
// template functor passed to kernel
__global__ void structTemplateFunctorKernel(
sCompare compare_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
deviceResult[x] = compare_(2.2, 2.1);
deviceResult[x] = compare_(2, 1);
deviceResult[x] = compare_('b', 'a');
}
void HipFunctorTests::TestForStructTemplateFunctor(void) {
sCompare comparefunctor;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
// pass comparefunctor to hipLaunchKernelGGL
hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
// Validation part of TestForStructTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// Doubler calculator struct
struct sDoublerCalculator {
public:
int a, result;
// fucntor contained in class object
DoublerFunctor doubler;
};
// doubler functor contained in struct passed to kernel
__global__ void DoublerCalculatorFunctorKernel(
sDoublerCalculator doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_.doubler(doubler_.a);
deviceResult[x] = (doubler_.result == result);
}
void HipFunctorTests::TestForFunctorContainInStructObj(void) {
sDoublerCalculator Doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
Doubler.a = 5;
Doubler.result = 10;
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
// pass comparefunctor to hipLaunchKernelGGL
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
// Validation part of TestForStructTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Test that a class functor can be passed to hiplaunchparam
* and can be used in kernel.
* - Test that a templated class functor can be passed to hiplaunchparam
* and can be used in kernel.
* - Test that a class functor object ptr can be passed to hiplaunchparam
* and can be used in kernel.
* - Test that a class object containing functor can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a stuct functor can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a stuct functor object ptr can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a templated struct functor can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a struct object containing functor can be passed to hiplaunchparam
* and can be used in kernel
* Test source
* ------------------------
* - catch/unit/kernel/hipLaunchParmFunctor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipLaunchParmFunctor") {
HipFunctorTests FunctorTests;
SECTION("test for simple class functor") {
FunctorTests.TestForSimpleClassFunctor();
}
SECTION("test for class objptr functor") {
FunctorTests.TestForClassObjPtrFunctor();
}
SECTION("test for class templete functor") {
FunctorTests.TestForClassTemplateFunctor();
}
SECTION("test for simple struct functor") {
FunctorTests.TestForSimpleStructFunctor();
}
SECTION("test for struct objptr functor") {
FunctorTests.TestForStructObjPtrFunctor();
}
SECTION("test for struct templete functor") {
FunctorTests.TestForStructTemplateFunctor();
}
SECTION("test for functor contain in classobj") {
FunctorTests.TestForFunctorContainInClassObj();
}
SECTION("test for functor contain in structobj") {
FunctorTests.TestForFunctorContainInStructObj();
}
}
/**
* End doxygen group KernelTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
class HipFunctorTests {
public:
// Test that a class functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForSimpleClassFunctor(void);
// Test that a templated class functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForClassTemplateFunctor(void);
// Test that a class functor object ptr can be passed to hiplaunchparam
// and can be used in kernel
void TestForClassObjPtrFunctor(void);
// Test that a class object containing functor can be passed
// to hiplaunchparam and can be used in kernel
void TestForFunctorContainInClassObj(void);
// Test that a stuct functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForSimpleStructFunctor(void);
// Test that a stuct functor object ptr can be passed to hiplaunchparam
// and can be used in kernel
void TestForStructObjPtrFunctor(void);
// Test that a templated struct functor can be passed to hiplaunchparam
// and can be used in kernel
void TestForStructTemplateFunctor(void);
// Test that a struct object containing functor can be
// passed to hiplaunchparam and can be used in kernel
void TestForFunctorContainInStructObj(void);
};
static const int BLOCK_DIM_SIZE = 1024;
static const int THREADS_PER_BLOCK = 1;
// class functor tests
// Simple doubler Functor
class DoublerFunctor{
public:
__device__ int operator()(int x) { return x * 2;}
};
// simple doubler functor passed to kernel
__global__ void DoublerFunctorKernel(
DoublerFunctor doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForSimpleClassFunctor(void) {
DoublerFunctor doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
// Validation part of TestForSimpleClassFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// pointer functor passed to kernel
__global__ void PtrDoublerFunctorKernel(
DoublerFunctor *doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = (*doubler_)(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForClassObjPtrFunctor(void) {
DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
// Validation part of TestForClassObjPtrFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
delete[] ptrdoubler;
}
class compare {
public:
template<typename T1, typename T2>
__device__ bool operator()(const T1& v1, const T2& v2) {
return v1 > v2;
}
};
// template functor passed to kernel
__global__ void TemplateFunctorKernel(
compare compare_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
deviceResult[x] = compare_(2.2, 2.1);
deviceResult[x] = compare_(2, 1);
deviceResult[x] = compare_('b', 'a');
}
void HipFunctorTests::TestForClassTemplateFunctor(void) {
compare comparefunctor;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
// Validation part of TestForClassTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// Doubler calculator
class DoublerCalculator {
public:
int a, result;
// fucntor contained in class object
DoublerFunctor doubler;
};
// doubler functor conatined in class obj passed to kernel
__global__ void DoublerCalculatorFunctorKernel(
DoublerCalculator doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_.doubler(doubler_.a);
deviceResult[x] = (doubler_.result == result);
}
void HipFunctorTests::TestForFunctorContainInClassObj(void) {
DoublerCalculator Doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
Doubler.a = 5;
Doubler.result = 10;
// pass comparefunctor to hipLaunchParm
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
// Validation part of TestForStructTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// Struct functor tests
// Simple doubler Functor
struct sDoublerFunctor {
public:
__device__ int operator()(int x) { return x * 2;}
};
// simple sturct doubler functor passed to kernel
__global__ void structDoublerFunctorKernel(
sDoublerFunctor doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForSimpleStructFunctor(void) {
sDoublerFunctor doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
// Validation part of TestForSimpleStructFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// ptr functor passed to kernel
__global__ void structPtrDoublerFunctorKernel(
sDoublerFunctor *doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = (*doubler_)(5);
deviceResult[x] = (result == 10);
}
void HipFunctorTests::TestForStructObjPtrFunctor(void) {
sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
// Validation part of TestForStructObjPtrFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
delete[] ptrdoubler;
}
struct sCompare {
public:
template< typename T1, typename T2 >
__device__ bool operator()(const T1& v1, const T2& v2) {
return v1 > v2;
}
};
// template functor passed to kernel
__global__ void structTemplateFunctorKernel(
sCompare compare_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
deviceResult[x] = compare_(2.2, 2.1);
deviceResult[x] = compare_(2, 1);
deviceResult[x] = compare_('b', 'a');
}
void HipFunctorTests::TestForStructTemplateFunctor(void) {
sCompare comparefunctor;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
// pass comparefunctor to hipLaunchKernelGGL
hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
// Validation part of TestForStructTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
// Doubler calculator struct
struct sDoublerCalculator {
public:
int a, result;
// fucntor contained in class object
DoublerFunctor doubler;
};
// doubler functor contained in struct passed to kernel
__global__ void DoublerCalculatorFunctorKernel(
sDoublerCalculator doubler_,
bool* deviceResult) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int result = doubler_.doubler(doubler_.a);
deviceResult[x] = (doubler_.result == result);
}
void HipFunctorTests::TestForFunctorContainInStructObj(void) {
sDoublerCalculator Doubler;
bool *deviceResults, *hostResults;
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
// initialize to false, will be set to
// true if the functor is called in device code
hostResults[k] = false;
}
Doubler.a = 5;
Doubler.result = 10;
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyHostToDevice));
// pass comparefunctor to hipLaunchKernelGGL
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
// Validation part of TestForStructTemplateFunctor
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
hipMemcpyDeviceToHost));
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
REQUIRE(hostResults[k] == true);
HIP_CHECK(hipHostFree(hostResults));
HIP_CHECK(hipFree(deviceResults));
}
/**
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
* @{
* @ingroup KernelTest
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
* Method to invocate kernel functions
*/
/**
* Test Description
* ------------------------
* - Test that a class functor can be passed to hiplaunchparam
* and can be used in kernel.
* - Test that a templated class functor can be passed to hiplaunchparam
* and can be used in kernel.
* - Test that a class functor object ptr can be passed to hiplaunchparam
* and can be used in kernel.
* - Test that a class object containing functor can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a stuct functor can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a stuct functor object ptr can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a templated struct functor can be passed to hiplaunchparam
* and can be used in kernel
* - Test that a struct object containing functor can be passed to hiplaunchparam
* and can be used in kernel
* Test source
* ------------------------
* - catch/unit/kernel/hipLaunchParmFunctor.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipLaunchParmFunctor") {
HipFunctorTests FunctorTests;
SECTION("test for simple class functor") {
FunctorTests.TestForSimpleClassFunctor();
}
SECTION("test for class objptr functor") {
FunctorTests.TestForClassObjPtrFunctor();
}
SECTION("test for class templete functor") {
FunctorTests.TestForClassTemplateFunctor();
}
SECTION("test for simple struct functor") {
FunctorTests.TestForSimpleStructFunctor();
}
SECTION("test for struct objptr functor") {
FunctorTests.TestForStructObjPtrFunctor();
}
SECTION("test for struct templete functor") {
FunctorTests.TestForStructTemplateFunctor();
}
SECTION("test for functor contain in classobj") {
FunctorTests.TestForFunctorContainInClassObj();
}
SECTION("test for functor contain in structobj") {
FunctorTests.TestForFunctorContainInStructObj();
}
}
/**
* End doxygen group KernelTest.
* @}
*/
+1 -1
Переглянути файл
@@ -119,7 +119,7 @@ void verify_linked_lists_on_device(hipStream_t stream, Node* pNodes,
unsigned int* pNumCorrect, unsigned int numLists,
unsigned int ListLength) {
*pNumCorrect = 0; // reset numCorrect to zero
verify_linked_lists_on_device<<<(numLists + 255) / 256, 256, 0, stream>>>(pNodes, pNumCorrect,
ListLength);
+24 -24
Переглянути файл
@@ -1,24 +1,24 @@
# Common Tests - Test independent of all platforms
# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
# /catch/unit/p2p folder and its dependent files.
set(TEST_SRC
hipDeviceGetP2PAttribute.cc
)
# only for AMD
if(HIP_PLATFORM MATCHES "amd")
set(AMD_SRC
hipP2pLinkTypeAndHopFunc.cc
)
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
endif()
set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
hip_add_exe_to_target(NAME p2pTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests)
add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
# Common Tests - Test independent of all platforms
# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
# /catch/unit/p2p folder and its dependent files.
set(TEST_SRC
hipDeviceGetP2PAttribute.cc
)
# only for AMD
if(HIP_PLATFORM MATCHES "amd")
set(AMD_SRC
hipP2pLinkTypeAndHopFunc.cc
)
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
endif()
set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
hip_add_exe_to_target(NAME p2pTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests)
add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
+356 -356
Переглянути файл
@@ -1,356 +1,356 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hipP2pLinkTypeAndHopFunc.h"
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#include <dlfcn.h>
#endif
#include <vector>
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Fetches Gpu device count
*/
#ifdef __linux__
void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 1;
return;
}
}
bool testMaskedDevice(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
uint32_t linktype;
uint32_t hopcount;
for (int count = 1;
count < actualNumGPUs; count++) {
err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
VISIBLE_DEVICE+count, &linktype, &hopcount);
REQUIRE(err == hipSuccess);
}
close(fd[0]);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("Info:fork() failed\n");
testResult = false;
}
return testResult;
}
#endif
bool testhipInvalidDevice(int numDevices) {
hipError_t ret;
uint32_t linktype;
uint32_t hopcount;
SECTION("Invalid device number case 1") {
ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 2") {
ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 3") {
ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 4") {
ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 5") {
ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
return true;
}
#ifdef __linux__
bool testhipInvalidLinkType() {
uint32_t hopcount;
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
&hopcount));
return true;
}
bool testhipInvalidHopcount() {
uint32_t linktype;
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
return true;
}
bool testhipSameDevice(int numGPUs) {
hipError_t ret;
uint32_t linktype = 0;
uint32_t hopcount = 0;
for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
return true;
}
bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
bool TestPassed = true;
// Get the unique pair of devices
for (int x = 0; x < numDevices; x++) {
for (int y = x+1; y < numDevices; y++) {
uint32_t linktype1 = 0, linktype2 = 0;
uint32_t hopcount1 = 0, hopcount2 = 0;
HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
&linktype1, &hopcount1));
HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
&linktype2, &hopcount2));
if (hopcount1 != hopcount2) {
TestPassed = false;
break;
}
}
}
return TestPassed;
}
/**
* Internal Function
*/
bool validateLinkType(uint32_t linktype_Hip,
RSMI_IO_LINK_TYPE linktype_RocmSmi) {
bool TestPassed = false;
if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
(linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
TestPassed = true;
} else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
(linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
TestPassed = true;
} else {
printf("linktype Hip = %u, linktype RocmSmi = %u\n",
linktype_Hip, linktype_RocmSmi);
TestPassed = false;
}
return TestPassed;
}
bool testhipLinkTypeHopcountDevice(int numDevices) {
bool TestPassed = true;
// Opening and initializing rocm-smi library
void *lib_rocm_smi_hdl;
rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
RSMI_IO_LINK_TYPE*);
rsmi_status_t (*fntopo_init)(uint64_t);
rsmi_status_t (*fntopo_shut_down)();
lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
RTLD_LAZY);
REQUIRE(lib_rocm_smi_hdl);
void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
REQUIRE(fnsym);
fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
REQUIRE(fnsym);
fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
REQUIRE(fnsym);
fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
uint64_t init_flags = 0;
rsmi_status_t retsmi_init;
retsmi_init = fntopo_init(init_flags);
REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
// Use rocm-smi API rsmi_topo_get_link_type() to validate
struct devicePair {
int device1;
int device2;
};
std::vector<struct devicePair> devicePairList;
// Get the unique pair of devices
for (int x = 0; x < numDevices; x++) {
for (int y = x+1; y < numDevices; y++) {
devicePairList.push_back({x, y});
}
}
for (auto pos=devicePairList.begin();
pos != devicePairList.end(); pos++) {
uint32_t linktype1 = 0;
uint32_t hopcount1 = 0;
RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
uint64_t hopcount2 = 0;
rsmi_status_t retsmi;
HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
(*pos).device2, &linktype1, &hopcount1));
retsmi = fntopo_get_link_type((*pos).device1,
(*pos).device2, &hopcount2, &linktype2);
REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
// Validate linktype
TestPassed = validateLinkType(linktype1, linktype2);
}
fntopo_shut_down();
dlclose(lib_rocm_smi_hdl);
return TestPassed;
}
#endif
/**
* @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
* @{
* @ingroup p2pTest
* `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
* Returns the link type and hop count between two devices
* @}
*/
/**
* Test Description
* ------------------------
* - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
* 1)Test Scenario to verify when device1 is visible and device2 is masked
* 2)Test Scenario to verify Invalid Device Number(s)
* 3)Test Scenario to verify when linktype = NULL
* 4)Test Scenario to verify when hopcount = NULL
* 5)Test Scenario to verify when device1 = device2
* 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
* and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
* 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
* GPUs with the output of rocm_smi tool.
* Test source
* ------------------------
* - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
int numDevices = 0;
bool TestPassed = true;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return;
}
SECTION("Test running for testhipInvalidDevice") {
TestPassed = testhipInvalidDevice(numDevices);
REQUIRE(TestPassed == true);
}
#ifdef __linux__
getDeviceCount(&numDevices);
if (numDevices < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return;
}
SECTION("Test running for testMaskedDevice") {
TestPassed = testMaskedDevice(numDevices);
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipInvalidLinkType") {
TestPassed = testhipInvalidLinkType();
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipInvalidHopcount") {
TestPassed = testhipInvalidHopcount();
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipSameDevice") {
TestPassed = testhipSameDevice(numDevices);
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipLinkTypeHopcountDevice") {
TestPassed = testhipLinkTypeHopcountDevice(numDevices);
REQUIRE(TestPassed == true);
}
#else
printf("This test is skipped due to non linux environment.\n");
#endif
}
/**
* End doxygen group p2pTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hipP2pLinkTypeAndHopFunc.h"
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#ifdef __linux__
#include <unistd.h>
#include <sys/wait.h>
#include <dlfcn.h>
#endif
#include <vector>
#define MAX_SIZE 30
#define VISIBLE_DEVICE 0
/**
* Fetches Gpu device count
*/
#ifdef __linux__
void getDeviceCount(int *pdevCnt) {
int fd[2], val = 0;
pid_t childpid;
// create pipe descriptors
pipe(fd);
// disable visible_devices env from shell
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// parent will wait to read the device cnt
read(fd[0], &val, sizeof(val));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
*pdevCnt = val;
} else if (!childpid) { // Child
int devCnt = 1;
// writing only, no need for read-descriptor
close(fd[0]);
HIP_CHECK(hipGetDeviceCount(&devCnt));
// send the value on the write-descriptor:
write(fd[1], &devCnt, sizeof(devCnt));
// close the write descriptor:
close(fd[1]);
exit(0);
} else { // failure
*pdevCnt = 1;
return;
}
}
bool testMaskedDevice(int actualNumGPUs) {
bool testResult = true;
int fd[2];
pipe(fd);
pid_t cPid;
cPid = fork();
if (cPid == 0) { // child
hipError_t err;
char visibleDeviceString[MAX_SIZE] = {};
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
// disable visible_devices env from shell
unsetenv("ROCR_VISIBLE_DEVICES");
unsetenv("HIP_VISIBLE_DEVICES");
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
uint32_t linktype;
uint32_t hopcount;
for (int count = 1;
count < actualNumGPUs; count++) {
err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
VISIBLE_DEVICE+count, &linktype, &hopcount);
REQUIRE(err == hipSuccess);
}
close(fd[0]);
write(fd[1], &testResult, sizeof(testResult));
close(fd[1]);
exit(0);
} else if (cPid > 0) { // parent
close(fd[1]);
read(fd[0], &testResult, sizeof(testResult));
close(fd[0]);
wait(NULL);
} else {
printf("Info:fork() failed\n");
testResult = false;
}
return testResult;
}
#endif
bool testhipInvalidDevice(int numDevices) {
hipError_t ret;
uint32_t linktype;
uint32_t hopcount;
SECTION("Invalid device number case 1") {
ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 2") {
ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 3") {
ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 4") {
ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
SECTION("Invalid device number case 5") {
ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
return true;
}
#ifdef __linux__
bool testhipInvalidLinkType() {
uint32_t hopcount;
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
&hopcount));
return true;
}
bool testhipInvalidHopcount() {
uint32_t linktype;
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
return true;
}
bool testhipSameDevice(int numGPUs) {
hipError_t ret;
uint32_t linktype = 0;
uint32_t hopcount = 0;
for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
REQUIRE(ret != hipSuccess);
}
return true;
}
bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
bool TestPassed = true;
// Get the unique pair of devices
for (int x = 0; x < numDevices; x++) {
for (int y = x+1; y < numDevices; y++) {
uint32_t linktype1 = 0, linktype2 = 0;
uint32_t hopcount1 = 0, hopcount2 = 0;
HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
&linktype1, &hopcount1));
HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
&linktype2, &hopcount2));
if (hopcount1 != hopcount2) {
TestPassed = false;
break;
}
}
}
return TestPassed;
}
/**
* Internal Function
*/
bool validateLinkType(uint32_t linktype_Hip,
RSMI_IO_LINK_TYPE linktype_RocmSmi) {
bool TestPassed = false;
if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
(linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
TestPassed = true;
} else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
(linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
TestPassed = true;
} else {
printf("linktype Hip = %u, linktype RocmSmi = %u\n",
linktype_Hip, linktype_RocmSmi);
TestPassed = false;
}
return TestPassed;
}
bool testhipLinkTypeHopcountDevice(int numDevices) {
bool TestPassed = true;
// Opening and initializing rocm-smi library
void *lib_rocm_smi_hdl;
rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
RSMI_IO_LINK_TYPE*);
rsmi_status_t (*fntopo_init)(uint64_t);
rsmi_status_t (*fntopo_shut_down)();
lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
RTLD_LAZY);
REQUIRE(lib_rocm_smi_hdl);
void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
REQUIRE(fnsym);
fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
REQUIRE(fnsym);
fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
REQUIRE(fnsym);
fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
uint64_t init_flags = 0;
rsmi_status_t retsmi_init;
retsmi_init = fntopo_init(init_flags);
REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
// Use rocm-smi API rsmi_topo_get_link_type() to validate
struct devicePair {
int device1;
int device2;
};
std::vector<struct devicePair> devicePairList;
// Get the unique pair of devices
for (int x = 0; x < numDevices; x++) {
for (int y = x+1; y < numDevices; y++) {
devicePairList.push_back({x, y});
}
}
for (auto pos=devicePairList.begin();
pos != devicePairList.end(); pos++) {
uint32_t linktype1 = 0;
uint32_t hopcount1 = 0;
RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
uint64_t hopcount2 = 0;
rsmi_status_t retsmi;
HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
(*pos).device2, &linktype1, &hopcount1));
retsmi = fntopo_get_link_type((*pos).device1,
(*pos).device2, &hopcount2, &linktype2);
REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
// Validate linktype
TestPassed = validateLinkType(linktype1, linktype2);
}
fntopo_shut_down();
dlclose(lib_rocm_smi_hdl);
return TestPassed;
}
#endif
/**
* @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
* @{
* @ingroup p2pTest
* `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
* Returns the link type and hop count between two devices
* @}
*/
/**
* Test Description
* ------------------------
* - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
* 1)Test Scenario to verify when device1 is visible and device2 is masked
* 2)Test Scenario to verify Invalid Device Number(s)
* 3)Test Scenario to verify when linktype = NULL
* 4)Test Scenario to verify when hopcount = NULL
* 5)Test Scenario to verify when device1 = device2
* 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
* and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
* 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
* GPUs with the output of rocm_smi tool.
* Test source
* ------------------------
* - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
int numDevices = 0;
bool TestPassed = true;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return;
}
SECTION("Test running for testhipInvalidDevice") {
TestPassed = testhipInvalidDevice(numDevices);
REQUIRE(TestPassed == true);
}
#ifdef __linux__
getDeviceCount(&numDevices);
if (numDevices < 2) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return;
}
SECTION("Test running for testMaskedDevice") {
TestPassed = testMaskedDevice(numDevices);
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipInvalidLinkType") {
TestPassed = testhipInvalidLinkType();
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipInvalidHopcount") {
TestPassed = testhipInvalidHopcount();
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipSameDevice") {
TestPassed = testhipSameDevice(numDevices);
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
REQUIRE(TestPassed == true);
}
SECTION("Test running for testhipLinkTypeHopcountDevice") {
TestPassed = testhipLinkTypeHopcountDevice(numDevices);
REQUIRE(TestPassed == true);
}
#else
printf("This test is skipped due to non linux environment.\n");
#endif
}
/**
* End doxygen group p2pTest.
* @}
*/
+110 -110
Переглянути файл
@@ -1,110 +1,110 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
/**
* rocm_smi.h enums
*/
typedef enum {
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
//!< action is not available for the
//!< given input, on the given system
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
//!< may because the operation is not
//!< supported by the Linux kernel
//!< version running on the executing
//!< machine
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
//!< error. Many functions require
//!< root access to run.
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
//!< resource
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
//!< allowable or safe range
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
//!< initializing internal data
//!< structures
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
//!< yet been implemented in the
//!< current system for the current
//!< devices
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
//!< found
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
//!< available for the operation
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
//!< execution of function
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
//!< was read
RSMI_STATUS_NO_DATA, //!< No data was found for a given
//!< input
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
//!< function is not what was expected
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
//!< acquired because it is already
//!< being used
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
//!< exceeded INT32_MAX
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rsmi_status_t;
/**
* Types for IO Link returned from rocm_smi
*/
typedef enum _RSMI_IO_LINK_TYPE {
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
} RSMI_IO_LINK_TYPE;
/**
* Types for IO Link returned from rocm runtime
*/
typedef enum {
/**
* Hyper-transport bus type.
*/
HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
/**
* QPI bus type.
*/
HSA_AMD_LINK_INFO_TYPE_QPI = 1,
/**
* PCIe bus type.
*/
HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
/**
* Infiniband bus type.
*/
HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
/**
* xGMI link type.
*/
HSA_AMD_LINK_INFO_TYPE_XGMI = 4
} hsa_amd_link_info_type_t;
#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
/**
* rocm_smi.h enums
*/
typedef enum {
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
//!< action is not available for the
//!< given input, on the given system
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
//!< may because the operation is not
//!< supported by the Linux kernel
//!< version running on the executing
//!< machine
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
//!< error. Many functions require
//!< root access to run.
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
//!< resource
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
//!< allowable or safe range
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
//!< initializing internal data
//!< structures
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
//!< yet been implemented in the
//!< current system for the current
//!< devices
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
//!< found
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
//!< available for the operation
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
//!< execution of function
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
//!< was read
RSMI_STATUS_NO_DATA, //!< No data was found for a given
//!< input
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
//!< function is not what was expected
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
//!< acquired because it is already
//!< being used
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
//!< exceeded INT32_MAX
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rsmi_status_t;
/**
* Types for IO Link returned from rocm_smi
*/
typedef enum _RSMI_IO_LINK_TYPE {
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
} RSMI_IO_LINK_TYPE;
/**
* Types for IO Link returned from rocm runtime
*/
typedef enum {
/**
* Hyper-transport bus type.
*/
HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
/**
* QPI bus type.
*/
HSA_AMD_LINK_INFO_TYPE_QPI = 1,
/**
* PCIe bus type.
*/
HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
/**
* Infiniband bus type.
*/
HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
/**
* xGMI link type.
*/
HSA_AMD_LINK_INFO_TYPE_XGMI = 4
} hsa_amd_link_info_type_t;
#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
Різницю між файлами не показано, бо вона завелика Завантажити різницю
Різницю між файлами не показано, бо вона завелика Завантажити різницю
+178 -178
Переглянути файл
@@ -1,178 +1,178 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
*/
#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
#include <string>
bool check_architecture(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_rdc(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_denormals_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_denormals_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_ffp_contract_off(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_ffp_contract_on(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_ffp_contract_fast(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_fast_math_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_fast_math_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_slp_vectorize_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_slp_vectorize_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_macro(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_undef_macro(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_header_dir(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_warning(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_Rpass_inline(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_conversionerror_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_conversionerror_disabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_conversionwarning_enabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_conversionwarning_disabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_max_thread(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_unsafe_atomic_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_unsafe_atomic_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_infinite_num_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_infinite_num_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_NAN_num_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_NAN_num_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_finite_math_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_finite_math_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_associative_math_enabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_associative_math_disabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_signed_zeros_enabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_signed_zeros_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_trapping_math_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_trapping_math_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
int extra_CO_IRadded_size, const char** Combination_CO,
int Combination_CO_size);
#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
*/
#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
#include <string>
bool check_architecture(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_rdc(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_denormals_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_denormals_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_ffp_contract_off(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_ffp_contract_on(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_ffp_contract_fast(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_fast_math_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_fast_math_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_slp_vectorize_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_slp_vectorize_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_macro(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_undef_macro(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_header_dir(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_warning(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_Rpass_inline(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_conversionerror_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_conversionerror_disabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_conversionwarning_enabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_conversionwarning_disabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_max_thread(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_unsafe_atomic_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_unsafe_atomic_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_infinite_num_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_infinite_num_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_NAN_num_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_NAN_num_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_finite_math_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_finite_math_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_associative_math_enabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_associative_math_disabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_signed_zeros_enabled(const char** Combination_CO,
int Combination_CO_size,
int max_thread_pos,
int fast_math_present);
bool check_signed_zeros_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_trapping_math_enabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
bool check_trapping_math_disabled(const char** Combination_CO,
int Combination_CO_size, int max_thread_pos,
int fast_math_present);
std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
int extra_CO_IRadded_size, const char** Combination_CO,
int Combination_CO_size);
#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
+163 -163
Переглянути файл
@@ -1,163 +1,163 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
RtcKernels.h contains the string's with the which includes the kernel code.
They are utilized by the compiler option functions, defined in RtcFunctions.cpp
*/
#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
#include <hip/hiprtc.h>
#include <hip/hip_runtime.h>
#include <math.h>
static constexpr auto max_thread_string {
R"(
extern "C"
__global__ void max_thread(int* a) {
int BD = blockDim.x;
*a = BD;
}
)"};
static constexpr auto denormals_string {
R"(
extern "C"
__global__ void denormals(double* base, double* power, double* result) {
float denorm = powf(*base, *power);
if (*result == 0 || *result ==1 )
*result = (denorm==0) ? 0 : 1;
else
*result = powf(*base, *power);
}
)"};
static constexpr auto warning_string {
R"(
extern "C"
__global__ void warning() {
#warning "Just printing a WARNING message onto the terminal";
}
)"};
static constexpr auto fp32_div_sqrt_string {
R"(
extern "C"
__global__ void fp32_div_sqrt(float* result) {
float input = 109.6209;
*result = sqrt(input);
}
)"};
static constexpr auto error_string {
R"(
extern "C"
__global__ void error() {
unsigned int a = -1;
unsigned int b = +1;
signed int c = -1;
signed int d = +1;
}
)"};
static constexpr auto macro_string {
R"(
extern "C"
__global__ void macro(int *result) {
*result = PI;
}
)"};
static constexpr auto undef_macro_string {
R"(
extern "C"
__global__ void undef_macro() {
int a = Z;
}
)"};
static constexpr auto header_dir_string {
R"(
#include "RtcFact.h"
extern "C"
__global__ void header_dir(int* a, int* val) {
*a = fact(*val);
}
)"};
static constexpr auto rdc_string {
R"(
extern "C"
__global__ void rdc(float* a, float* b, float* c) {
*c = *a * *b;
}
)"};
static constexpr auto ffp_contract_string {
R"(
extern "C"
__global__ void ffp_contract(float* a, float* b, float* c) {
*c = *a * *b + *c;
}
)"};
static constexpr auto slp_vectorize_string {
R"(
extern "C"
__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
(*y).data.x = x.data.x + a.data.x;
(*y).data.y = x.data.y + a.data.y;
}
)"};
static constexpr auto unsafe_atomic_string {
R"(
extern "C"
__global__ void unsafe_atomic(float* a) {
int id = threadIdx.x + blockIdx.x * blockDim.x;
if (id < 1000) {
unsafeAtomicAdd(&a[id], 0.2f);
}
}
)"};
static constexpr auto amdgpu_ieee_string {
R"(
extern "C"
__global__ void amdgpu_ieee(float* a, float* b, float* c) {
*c = sqrt(*a / *b);
printf("sqrt(a * b) = %f\n", *c);
}
)"};
static constexpr auto associative_math_string {
R"(
extern "C"
__global__ void associative_math(int* check) {
double x = 0.1f;
double y = 0.2f;
double z = 0.3f;
if((x*y)*z != x*(y*z))
*check = 1;
else *check = 0;
}
)"};
#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
RtcKernels.h contains the string's with the which includes the kernel code.
They are utilized by the compiler option functions, defined in RtcFunctions.cpp
*/
#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
#include <hip/hiprtc.h>
#include <hip/hip_runtime.h>
#include <math.h>
static constexpr auto max_thread_string {
R"(
extern "C"
__global__ void max_thread(int* a) {
int BD = blockDim.x;
*a = BD;
}
)"};
static constexpr auto denormals_string {
R"(
extern "C"
__global__ void denormals(double* base, double* power, double* result) {
float denorm = powf(*base, *power);
if (*result == 0 || *result ==1 )
*result = (denorm==0) ? 0 : 1;
else
*result = powf(*base, *power);
}
)"};
static constexpr auto warning_string {
R"(
extern "C"
__global__ void warning() {
#warning "Just printing a WARNING message onto the terminal";
}
)"};
static constexpr auto fp32_div_sqrt_string {
R"(
extern "C"
__global__ void fp32_div_sqrt(float* result) {
float input = 109.6209;
*result = sqrt(input);
}
)"};
static constexpr auto error_string {
R"(
extern "C"
__global__ void error() {
unsigned int a = -1;
unsigned int b = +1;
signed int c = -1;
signed int d = +1;
}
)"};
static constexpr auto macro_string {
R"(
extern "C"
__global__ void macro(int *result) {
*result = PI;
}
)"};
static constexpr auto undef_macro_string {
R"(
extern "C"
__global__ void undef_macro() {
int a = Z;
}
)"};
static constexpr auto header_dir_string {
R"(
#include "RtcFact.h"
extern "C"
__global__ void header_dir(int* a, int* val) {
*a = fact(*val);
}
)"};
static constexpr auto rdc_string {
R"(
extern "C"
__global__ void rdc(float* a, float* b, float* c) {
*c = *a * *b;
}
)"};
static constexpr auto ffp_contract_string {
R"(
extern "C"
__global__ void ffp_contract(float* a, float* b, float* c) {
*c = *a * *b + *c;
}
)"};
static constexpr auto slp_vectorize_string {
R"(
extern "C"
__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
(*y).data.x = x.data.x + a.data.x;
(*y).data.y = x.data.y + a.data.y;
}
)"};
static constexpr auto unsafe_atomic_string {
R"(
extern "C"
__global__ void unsafe_atomic(float* a) {
int id = threadIdx.x + blockIdx.x * blockDim.x;
if (id < 1000) {
unsafeAtomicAdd(&a[id], 0.2f);
}
}
)"};
static constexpr auto amdgpu_ieee_string {
R"(
extern "C"
__global__ void amdgpu_ieee(float* a, float* b, float* c) {
*c = sqrt(*a / *b);
printf("sqrt(a * b) = %f\n", *c);
}
)"};
static constexpr auto associative_math_string {
R"(
extern "C"
__global__ void associative_math(int* check) {
double x = 0.1f;
double y = 0.2f;
double z = 0.3f;
if((x*y)*z != x*(y*z))
*check = 1;
else *check = 0;
}
)"};
#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
+53 -53
Переглянути файл
@@ -1,53 +1,53 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
*/
#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
#include <picojson.h>
#include <vector>
#include <string>
std::vector<std::string> get_combi_string_vec();
int split_comb_string(std::string option);
int calling_combination_function(std::vector<std::string> combi_vec_list);
int check_positive_CO_present(std::string find_string);
int check_negative_CO_present(std::string find_string);
bool calling_resp_function(const std::string block_name,
const char** Combination_CO,
int Combination_CO_size, int max_thread_position,
int fast_math_present);
picojson::array getblock_fromconfig();
std::string get_string_parameters(std::string para_name_to_retrieve,
std::string block_name);
picojson::array get_array_parameters(std::string para_name_to_retrieve,
std::string block_name);
#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
*/
#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
#include <picojson.h>
#include <vector>
#include <string>
std::vector<std::string> get_combi_string_vec();
int split_comb_string(std::string option);
int calling_combination_function(std::vector<std::string> combi_vec_list);
int check_positive_CO_present(std::string find_string);
int check_negative_CO_present(std::string find_string);
bool calling_resp_function(const std::string block_name,
const char** Combination_CO,
int Combination_CO_size, int max_thread_position,
int fast_math_present);
picojson::array getblock_fromconfig();
std::string get_string_parameters(std::string para_name_to_retrieve,
std::string block_name);
picojson::array get_array_parameters(std::string para_name_to_retrieve,
std::string block_name);
#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
+25 -25
Переглянути файл
@@ -1,25 +1,25 @@
# Common Tests - Test independent of all platforms
set(TEST_SRC
copy_coherency.cc
)
add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
${HIP_PATH}/include -I
${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
# only for AMD
if(HIP_PLATFORM MATCHES "amd")
set(AMD_SRC
cache_coherency_cpu_gpu.cc
cache_coherency_gpu_gpu.cc
)
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
endif()
hip_add_exe_to_target(NAME synchronizationTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++14)
add_dependencies(synchronizationTests memcpyInt.hsaco)
# Common Tests - Test independent of all platforms
set(TEST_SRC
copy_coherency.cc
)
add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
${HIP_PATH}/include -I
${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
# only for AMD
if(HIP_PLATFORM MATCHES "amd")
set(AMD_SRC
cache_coherency_cpu_gpu.cc
cache_coherency_gpu_gpu.cc
)
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
endif()
hip_add_exe_to_target(NAME synchronizationTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++14)
add_dependencies(synchronizationTests memcpyInt.hsaco)
+282 -282
Переглянути файл
@@ -1,282 +1,282 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test for Fine Grained CPU-GPU coherency.
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
typedef _Atomic(unsigned int) atomic_uint;
// Helper function to spin on address until address equals value.
// If the address holds the value of -1, abort because the other thread failed.
__device__ int
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
unsigned int value) {
unsigned int compare;
bool check = false;
do {
compare = value;
check = __opencl_atomic_compare_exchange_strong(
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (compare == -1)
return -1;
} while (!check);
return 0;
}
// This kernel requires a single block, single thread dispatch.
__global__ void
gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
for (size_t i = 0; i < N; i++) {
// Store data into A, system fence, and atomically mark flag.
// This guarantees this global write is visible by device 1.
A[i] = X[i];
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on device 1's global write to B.
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
*dresult = -1;
break;
}
// Check device 1 properly stored Y into B.
bool stored_data_matches = (B[i] == Y[i]);
if (!stored_data_matches) {
// If the data does not match, alert other thread and abort.
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
i, B[i], Y[i]);
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
*dresult = -1;
}
// Otherwise tell the other thread to continue.
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
*dresult = -1;
break;
}
}
*dresult = 0;
}
__host__ int
cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
unsigned int value) {
unsigned int compare;
bool check = false;
do {
compare = value;
check = __atomic_compare_exchange_n(
address, /*expected=*/ &compare, /*desired=*/ value,
/*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
if (compare == -1)
return -1;
} while (!check);
return 0;
}
// This host thread runs only on a single CPU thread.
__host__ void
cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
for (size_t i = 0; i < N; i++) {
B[i] = Y[i];
__atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
*hresult = -1;
break;
}
bool stored_data_matches = (A[i] == X[i]);
if (!stored_data_matches) {
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
i, A[i], X[i]);
__atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
*hresult = -1;
break;
}
__atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
*hresult = -1;
break;
}
}
*hresult = 0;
}
static bool cpu_to_gpu_coherency() {
int *A_d, *B_d, *X_d, *Y_d;
int *A_res, *A_h, *B_h, *X_h, *Y_h;
unsigned int hresult, dresult;
size_t N = 1024;
size_t Nbytes = N * sizeof(int);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < 1) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
return 0;
}
// Skip this test if feature is not supported.
static int device0 = 0;
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, device0));
if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
strncmp(props.gcnArchName, "gfx940", 6) != 0) {
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
return true;
}
// Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
(hipHostMallocCoherent | hipHostMallocMapped)));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
X_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
// Initialize the arrays and atomic variables.
for (size_t i = 0; i < N; i++) {
X_h[i] = 100000000 + i;
Y_h[i] = 300000000 + i;
}
// Initialize shared atomic flags between CPU and GPU.
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
AA1_h, 0));
*AA1_h = 0;
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
AA2_h, 0));
*AA2_h = 0;
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
BA1_h, 0));
*BA1_h = 0;
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
BA2_h, 0));
*BA2_h = 0;
// Skip the first stream, ensure stream is non-blocking.
hipStream_t stream[2];
HIP_CHECK(hipStreamCreate(&stream[0]));
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
// Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
Nbytes, hipDeviceMallocFinegrained);
REQUIRE(status == hipSuccess);
// SVM memory - host pointer is the same as device pointer to array A.
A_h = A_d;
HIP_CHECK(hipMalloc(&X_d, Nbytes));
HIP_CHECK(hipMalloc(&Y_d, Nbytes));
HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
// Launch the GPU kernel.
const unsigned blocks = 1;
const unsigned threadsPerBlock = 1;
hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
0, stream[1],
A_d, B_d, X_d, Y_d, N,
AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
// Check if launch failed.
HIP_CHECK(hipGetLastError());
REQUIRE(dresult == 0);
// Do not sync the launched stream, instead run the cpu_thread.
std::thread host_thread(cpu_thread,
A_h, B_h, X_h, Y_h, N,
AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
host_thread.detach();
REQUIRE(hresult == 0);
// Wait for Device side to finish.
HIP_CHECK(hipStreamSynchronize(stream[1]));
// Evaluate the resultant arrays A and B.
A_res = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
REQUIRE(A_res[i] == (100000000 + i));
REQUIRE(B_h[i] == (300000000 + i));
}
// Free all the device and host memory allocated.
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(X_d));
HIP_CHECK(hipFree(Y_d));
HIP_CHECK(hipHostFree(AA1_h));
HIP_CHECK(hipHostFree(AA2_h));
HIP_CHECK(hipHostFree(BA1_h));
HIP_CHECK(hipHostFree(BA2_h));
HIP_CHECK(hipHostFree(B_h));
free(X_h);
free(Y_h);
free(A_res);
return true;
}
/**
* Test Description
* ------------------------
* - This test runs on devices where XGMI enables fine-grained communication
* between GPUs. This performs a message passing test.
* Array A is allocated on Device 0, and remotely on host.
* Device 0 also increments atomic ints AA1 and AA2.
* Array B is allocated on host, and remotely on Device 0.
* Host also increments atomic ints BA1 and BA2.
* Kernel will launch on Device 0, and store array X into array A.
* Host Thread will store array Y into array B.
* Kernel will validate that the correct values of array Y are stored in B.
* Host Thread will validate that the correct values of array X are stored in A.
* Test source
* ------------------------
* - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
* - Test to be run only on AMD.
*/
TEST_CASE("Unit_cache_coherency_cpu_gpu") {
bool passed = true;
// Coherency between CPU and GPU sharing host and device memory.
REQUIRE(passed == cpu_to_gpu_coherency());
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test for Fine Grained CPU-GPU coherency.
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
typedef _Atomic(unsigned int) atomic_uint;
// Helper function to spin on address until address equals value.
// If the address holds the value of -1, abort because the other thread failed.
__device__ int
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
unsigned int value) {
unsigned int compare;
bool check = false;
do {
compare = value;
check = __opencl_atomic_compare_exchange_strong(
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (compare == -1)
return -1;
} while (!check);
return 0;
}
// This kernel requires a single block, single thread dispatch.
__global__ void
gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
for (size_t i = 0; i < N; i++) {
// Store data into A, system fence, and atomically mark flag.
// This guarantees this global write is visible by device 1.
A[i] = X[i];
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on device 1's global write to B.
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
*dresult = -1;
break;
}
// Check device 1 properly stored Y into B.
bool stored_data_matches = (B[i] == Y[i]);
if (!stored_data_matches) {
// If the data does not match, alert other thread and abort.
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
i, B[i], Y[i]);
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
*dresult = -1;
}
// Otherwise tell the other thread to continue.
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
*dresult = -1;
break;
}
}
*dresult = 0;
}
__host__ int
cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
unsigned int value) {
unsigned int compare;
bool check = false;
do {
compare = value;
check = __atomic_compare_exchange_n(
address, /*expected=*/ &compare, /*desired=*/ value,
/*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
if (compare == -1)
return -1;
} while (!check);
return 0;
}
// This host thread runs only on a single CPU thread.
__host__ void
cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
for (size_t i = 0; i < N; i++) {
B[i] = Y[i];
__atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
*hresult = -1;
break;
}
bool stored_data_matches = (A[i] == X[i]);
if (!stored_data_matches) {
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
i, A[i], X[i]);
__atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
*hresult = -1;
break;
}
__atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
*hresult = -1;
break;
}
}
*hresult = 0;
}
static bool cpu_to_gpu_coherency() {
int *A_d, *B_d, *X_d, *Y_d;
int *A_res, *A_h, *B_h, *X_h, *Y_h;
unsigned int hresult, dresult;
size_t N = 1024;
size_t Nbytes = N * sizeof(int);
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < 1) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
return 0;
}
// Skip this test if feature is not supported.
static int device0 = 0;
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, device0));
if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
strncmp(props.gcnArchName, "gfx940", 6) != 0) {
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
return true;
}
// Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
(hipHostMallocCoherent | hipHostMallocMapped)));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
X_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
// Initialize the arrays and atomic variables.
for (size_t i = 0; i < N; i++) {
X_h[i] = 100000000 + i;
Y_h[i] = 300000000 + i;
}
// Initialize shared atomic flags between CPU and GPU.
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
AA1_h, 0));
*AA1_h = 0;
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
AA2_h, 0));
*AA2_h = 0;
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
BA1_h, 0));
*BA1_h = 0;
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
BA2_h, 0));
*BA2_h = 0;
// Skip the first stream, ensure stream is non-blocking.
hipStream_t stream[2];
HIP_CHECK(hipStreamCreate(&stream[0]));
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
// Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
Nbytes, hipDeviceMallocFinegrained);
REQUIRE(status == hipSuccess);
// SVM memory - host pointer is the same as device pointer to array A.
A_h = A_d;
HIP_CHECK(hipMalloc(&X_d, Nbytes));
HIP_CHECK(hipMalloc(&Y_d, Nbytes));
HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
// Launch the GPU kernel.
const unsigned blocks = 1;
const unsigned threadsPerBlock = 1;
hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
0, stream[1],
A_d, B_d, X_d, Y_d, N,
AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
// Check if launch failed.
HIP_CHECK(hipGetLastError());
REQUIRE(dresult == 0);
// Do not sync the launched stream, instead run the cpu_thread.
std::thread host_thread(cpu_thread,
A_h, B_h, X_h, Y_h, N,
AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
host_thread.detach();
REQUIRE(hresult == 0);
// Wait for Device side to finish.
HIP_CHECK(hipStreamSynchronize(stream[1]));
// Evaluate the resultant arrays A and B.
A_res = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
REQUIRE(A_res[i] == (100000000 + i));
REQUIRE(B_h[i] == (300000000 + i));
}
// Free all the device and host memory allocated.
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(X_d));
HIP_CHECK(hipFree(Y_d));
HIP_CHECK(hipHostFree(AA1_h));
HIP_CHECK(hipHostFree(AA2_h));
HIP_CHECK(hipHostFree(BA1_h));
HIP_CHECK(hipHostFree(BA2_h));
HIP_CHECK(hipHostFree(B_h));
free(X_h);
free(Y_h);
free(A_res);
return true;
}
/**
* Test Description
* ------------------------
* - This test runs on devices where XGMI enables fine-grained communication
* between GPUs. This performs a message passing test.
* Array A is allocated on Device 0, and remotely on host.
* Device 0 also increments atomic ints AA1 and AA2.
* Array B is allocated on host, and remotely on Device 0.
* Host also increments atomic ints BA1 and BA2.
* Kernel will launch on Device 0, and store array X into array A.
* Host Thread will store array Y into array B.
* Kernel will validate that the correct values of array Y are stored in B.
* Host Thread will validate that the correct values of array X are stored in A.
* Test source
* ------------------------
* - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
* - Test to be run only on AMD.
*/
TEST_CASE("Unit_cache_coherency_cpu_gpu") {
bool passed = true;
// Coherency between CPU and GPU sharing host and device memory.
REQUIRE(passed == cpu_to_gpu_coherency());
}
+294 -294
Переглянути файл
@@ -1,294 +1,294 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test for Fine Grained GPU-GPU coherency.
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
typedef _Atomic(unsigned int) atomic_uint;
// Helper function to spin on address until address equals value.
// If the address holds the value of -1, abort because the other thread failed.
__device__ int
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
unsigned int value) {
unsigned int compare;
bool check = false;
do {
compare = value;
check = __opencl_atomic_compare_exchange_strong(
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (compare == -1)
return -1;
} while (!check);
return 0;
}
// This kernel requires a single block, single thread dispatch.
__global__ void
gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
for (size_t i = 0; i < N; i++) {
// Store data into A, system fence, and atomically mark flag.
// This guarantees this global write is visible by device 1.
A[i] = X[i];
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on device 1's global write to B.
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
*cache0_result = -1;
break;
}
// Check device 1 properly stored Y into B.
bool stored_data_matches = (B[i] == Y[i]);
if (!stored_data_matches) {
// If the data does not match, alert other thread and abort.
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
i, B[i], Y[i]);
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
*cache0_result = -1;
}
// Otherwise tell the other thread to continue.
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
*cache0_result = -1;
break;
}
}
*cache0_result = 0;
}
// This kernel requires a single block, single thread dispatch.
__global__ void
gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
for (size_t i = 0; i < N; i++) {
B[i] = Y[i];
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
*cache1_result = -1;
break;
}
bool stored_data_matches = (A[i] == X[i]);
if (!stored_data_matches) {
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
i, A[i], X[i]);
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
*cache1_result = -1;
}
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
*cache1_result = -1;
break;
}
}
*cache1_result = 0;
}
static bool gpu_to_gpu_coherency() {
int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
int *A_h, *B_h, *X_h, *Y_h;
unsigned int cache0_result, cache1_result;
size_t N = 1024;
size_t Nbytes = N * sizeof(int);
int numDevices = 0;
int numTestDevices = 2;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < numTestDevices) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return 0;
}
// Skip this test if either device does not support this feature.
hipDeviceProp_t props0, props1;
HIP_CHECK(hipGetDeviceProperties(&props0, 0));
HIP_CHECK(hipGetDeviceProperties(&props1, 1));
if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
(strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
return true;
}
// Allocate Host Side Memory.
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
B_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
X_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
// Initialize the arrays and atomic variables.
for (size_t i = 0; i < N; i++) {
X_h[i] = 100000000 + i;
Y_h[i] = 300000000 + i;
}
// Initialize shared atomic flags on host coherent memory.
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
AA1_h, 0));
*AA1_h = 0;
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
AA2_h, 0));
*AA2_h = 0;
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
BA1_h, 0));
*BA1_h = 0;
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
BA2_h, 0));
*BA2_h = 0;
// Skip the first stream.
hipStream_t stream[3];
HIP_CHECK(hipStreamCreate(&stream[0]));
// Set-up Device 0.
HIP_CHECK(hipSetDevice(0));
// Enable P2P access to Device 1.
HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
// Allocating Coherent Memory for Array A_d on Device 0.
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
Nbytes, hipDeviceMallocFinegrained);
REQUIRE(status == hipSuccess);
HIP_CHECK(hipMalloc(&X_d0, Nbytes));
HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
// Set-up Device 1.
HIP_CHECK(hipSetDevice(1));
// Enable P2P access to Device 0.
HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
// Allocating Coherent Memory for Array B_d on Device 1.
printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
Nbytes, hipDeviceMallocFinegrained);
REQUIRE(status == hipSuccess);
HIP_CHECK(hipMalloc(&X_d1, Nbytes));
HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
// Transfer initialized data onto the device arrays.
HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
// Prepare and launch the device kernels.
const unsigned blocks = 1;
const unsigned threadsPerBlock = 1;
HIP_CHECK(hipSetDevice(0));
hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
0, stream[1],
A_d, B_d, X_d0, Y_d0, N,
AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
// Check if launch failed.
HIP_CHECK(hipGetLastError());
REQUIRE(cache0_result == 0);
HIP_CHECK(hipSetDevice(1));
hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
0, stream[2],
A_d, B_d, X_d1, Y_d1, N,
AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
HIP_CHECK(hipGetLastError());
REQUIRE(cache1_result == 0);
// Wait for kernels on both devices.
HIP_CHECK(hipStreamSynchronize(stream[1]));
HIP_CHECK(hipStreamSynchronize(stream[2]));
// Evaluate the resultant arrays A and B.
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
REQUIRE(A_h[i] == (100000000 + i));
REQUIRE(B_h[i] == (300000000 + i));
}
// Free all the device and host memory allocated.
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(X_d0));
HIP_CHECK(hipFree(Y_d0));
HIP_CHECK(hipFree(X_d1));
HIP_CHECK(hipFree(Y_d1));
HIP_CHECK(hipHostFree(AA1_h));
HIP_CHECK(hipHostFree(AA2_h));
HIP_CHECK(hipHostFree(BA1_h));
HIP_CHECK(hipHostFree(BA2_h));
free(A_h);
free(B_h);
free(X_h);
free(Y_h);
return true;
}
/**
* Test Description
* ------------------------
* - This test runs on devices where XGMI enables fine-grained communication
* between GPUs. This performs a message passing test.
* Array A is allocated on Device 0, and remotely on Device 1.
* Device 0 also increments atomic ints AA1 and AA2.
* Array B is allocated on Device 1, and remotely on Device 0.
* Device 1 also increments atomic ints BA1 and BA2.
* Kernel 0 will launch on Device 0, and store array X into array A.
* Kernel 1 will launch on Device 1, and store array Y into array B.
* Kernel 0 will validate that the correct values of array Y are stored in B.
* Kernel 1 will validate that the correct values of array X are stored in A.
* Test source
* ------------------------
* - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
* - Test to be run only on AMD.
*/
TEST_CASE("Unit_cache_coherency_gpu_gpu") {
bool passed = true;
// Coherency between GPUs accessing local or remote FB.
REQUIRE(passed == gpu_to_gpu_coherency());
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Simple test for Fine Grained GPU-GPU coherency.
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
typedef _Atomic(unsigned int) atomic_uint;
// Helper function to spin on address until address equals value.
// If the address holds the value of -1, abort because the other thread failed.
__device__ int
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
unsigned int value) {
unsigned int compare;
bool check = false;
do {
compare = value;
check = __opencl_atomic_compare_exchange_strong(
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (compare == -1)
return -1;
} while (!check);
return 0;
}
// This kernel requires a single block, single thread dispatch.
__global__ void
gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
for (size_t i = 0; i < N; i++) {
// Store data into A, system fence, and atomically mark flag.
// This guarantees this global write is visible by device 1.
A[i] = X[i];
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on device 1's global write to B.
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
*cache0_result = -1;
break;
}
// Check device 1 properly stored Y into B.
bool stored_data_matches = (B[i] == Y[i]);
if (!stored_data_matches) {
// If the data does not match, alert other thread and abort.
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
i, B[i], Y[i]);
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
*cache0_result = -1;
}
// Otherwise tell the other thread to continue.
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
*cache0_result = -1;
break;
}
}
*cache0_result = 0;
}
// This kernel requires a single block, single thread dispatch.
__global__ void
gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
unsigned int *AA1, unsigned int *AA2,
unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
for (size_t i = 0; i < N; i++) {
B[i] = Y[i];
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
*cache1_result = -1;
break;
}
bool stored_data_matches = (A[i] == X[i]);
if (!stored_data_matches) {
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
i, A[i], X[i]);
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
*cache1_result = -1;
}
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
*cache1_result = -1;
break;
}
}
*cache1_result = 0;
}
static bool gpu_to_gpu_coherency() {
int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
int *A_h, *B_h, *X_h, *Y_h;
unsigned int cache0_result, cache1_result;
size_t N = 1024;
size_t Nbytes = N * sizeof(int);
int numDevices = 0;
int numTestDevices = 2;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < numTestDevices) {
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
return 0;
}
// Skip this test if either device does not support this feature.
hipDeviceProp_t props0, props1;
HIP_CHECK(hipGetDeviceProperties(&props0, 0));
HIP_CHECK(hipGetDeviceProperties(&props1, 1));
if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
(strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
return true;
}
// Allocate Host Side Memory.
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
A_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
B_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
X_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
// Initialize the arrays and atomic variables.
for (size_t i = 0; i < N; i++) {
X_h[i] = 100000000 + i;
Y_h[i] = 300000000 + i;
}
// Initialize shared atomic flags on host coherent memory.
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
AA1_h, 0));
*AA1_h = 0;
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
AA2_h, 0));
*AA2_h = 0;
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
BA1_h, 0));
*BA1_h = 0;
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
BA2_h, 0));
*BA2_h = 0;
// Skip the first stream.
hipStream_t stream[3];
HIP_CHECK(hipStreamCreate(&stream[0]));
// Set-up Device 0.
HIP_CHECK(hipSetDevice(0));
// Enable P2P access to Device 1.
HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
// Allocating Coherent Memory for Array A_d on Device 0.
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
Nbytes, hipDeviceMallocFinegrained);
REQUIRE(status == hipSuccess);
HIP_CHECK(hipMalloc(&X_d0, Nbytes));
HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
// Set-up Device 1.
HIP_CHECK(hipSetDevice(1));
// Enable P2P access to Device 0.
HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
// Allocating Coherent Memory for Array B_d on Device 1.
printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
Nbytes, hipDeviceMallocFinegrained);
REQUIRE(status == hipSuccess);
HIP_CHECK(hipMalloc(&X_d1, Nbytes));
HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
// Transfer initialized data onto the device arrays.
HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
// Prepare and launch the device kernels.
const unsigned blocks = 1;
const unsigned threadsPerBlock = 1;
HIP_CHECK(hipSetDevice(0));
hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
0, stream[1],
A_d, B_d, X_d0, Y_d0, N,
AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
// Check if launch failed.
HIP_CHECK(hipGetLastError());
REQUIRE(cache0_result == 0);
HIP_CHECK(hipSetDevice(1));
hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
0, stream[2],
A_d, B_d, X_d1, Y_d1, N,
AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
HIP_CHECK(hipGetLastError());
REQUIRE(cache1_result == 0);
// Wait for kernels on both devices.
HIP_CHECK(hipStreamSynchronize(stream[1]));
HIP_CHECK(hipStreamSynchronize(stream[2]));
// Evaluate the resultant arrays A and B.
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
REQUIRE(A_h[i] == (100000000 + i));
REQUIRE(B_h[i] == (300000000 + i));
}
// Free all the device and host memory allocated.
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(B_d));
HIP_CHECK(hipFree(X_d0));
HIP_CHECK(hipFree(Y_d0));
HIP_CHECK(hipFree(X_d1));
HIP_CHECK(hipFree(Y_d1));
HIP_CHECK(hipHostFree(AA1_h));
HIP_CHECK(hipHostFree(AA2_h));
HIP_CHECK(hipHostFree(BA1_h));
HIP_CHECK(hipHostFree(BA2_h));
free(A_h);
free(B_h);
free(X_h);
free(Y_h);
return true;
}
/**
* Test Description
* ------------------------
* - This test runs on devices where XGMI enables fine-grained communication
* between GPUs. This performs a message passing test.
* Array A is allocated on Device 0, and remotely on Device 1.
* Device 0 also increments atomic ints AA1 and AA2.
* Array B is allocated on Device 1, and remotely on Device 0.
* Device 1 also increments atomic ints BA1 and BA2.
* Kernel 0 will launch on Device 0, and store array X into array A.
* Kernel 1 will launch on Device 1, and store array Y into array B.
* Kernel 0 will validate that the correct values of array Y are stored in B.
* Kernel 1 will validate that the correct values of array X are stored in A.
* Test source
* ------------------------
* - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
* - Test to be run only on AMD.
*/
TEST_CASE("Unit_cache_coherency_gpu_gpu") {
bool passed = true;
// Coherency between GPUs accessing local or remote FB.
REQUIRE(passed == gpu_to_gpu_coherency());
}
+340 -340
Переглянути файл
@@ -1,340 +1,340 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
unsigned threadsPerBlock = 256;
unsigned blocksPerCU = 6;
class MemcpyFunction {
public:
MemcpyFunction(const char* fileName, const char* functionName) {
load(fileName, functionName);
}
void load(const char* fileName, const char* functionName);
void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
private:
hipFunction_t _function;
hipModule_t _module;
};
void MemcpyFunction::load(const char* fileName, const char* functionName) {
HIP_CHECK(hipModuleLoad(&_module, fileName));
HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
}
void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
struct {
int* _dst;
const int* _src;
size_t _numElements;
} args;
args._dst = dst;
args._src = src;
args._numElements = numElements;
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
numElements);
HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
1, 1, 0, s, NULL,
reinterpret_cast<void**>(&config)));
}
bool g_warnOnFail = true;
int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
// Set value of array to specified 32-bit integer:
__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
int stride = blockDim.x * gridDim.x;
for (size_t i = gid; i < numElements; i += stride) {
ptr[i] = val;
}
}
__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
int stride = blockDim.x * gridDim.x;
for (size_t i = gid; i < numElements; i += stride) {
dst[i] = src[i];
}
}
// Check arrays in reverse order, to more easily detect cases where
// the copy is "partially" done.
void checkReverse(const int* ptr, int numElements, int expected) {
int mismatchCnt = 0;
for (int i = numElements - 1; i >= 0; i--) {
if (!g_warnOnFail) {
REQUIRE(ptr[i] == expected);
}
if (++mismatchCnt >= 10) {
break;
}
}
}
#define ENUM_CASE_STR(x) \
case x: \
return #x
enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
const char* CmdTypeStr(CmdType c) {
switch (c) {
ENUM_CASE_STR(COPY);
ENUM_CASE_STR(KERNEL);
ENUM_CASE_STR(MODULE_KERNEL);
default:
return "UNKNOWN";
}
}
enum SyncType {
NONE,
EVENT_QUERY,
EVENT_SYNC,
STREAM_WAIT_EVENT,
STREAM_QUERY,
STREAM_SYNC,
DEVICE_SYNC,
MAX_SyncType
};
const char* SyncTypeStr(SyncType s) {
switch (s) {
ENUM_CASE_STR(NONE);
ENUM_CASE_STR(EVENT_QUERY);
ENUM_CASE_STR(EVENT_SYNC);
ENUM_CASE_STR(STREAM_WAIT_EVENT);
ENUM_CASE_STR(STREAM_QUERY);
ENUM_CASE_STR(STREAM_SYNC);
ENUM_CASE_STR(DEVICE_SYNC);
default:
return "UNKNOWN";
}
}
void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
size_t numElements) {
switch (cmd) {
case COPY:
HIP_CHECK(
hipMemcpyAsync(dst, src, numElements * sizeof(int),
hipMemcpyDeviceToDevice, s));
break;
case KERNEL: {
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
threadsPerBlock, numElements);
hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, s, dst, src, numElements);
} break;
case MODULE_KERNEL: {
MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
g_moduleMemcpy.launch(dst, src, numElements, s);
} break;
default:
printf("Info:unknown cmd=%d type", cmd);
}
}
void resetInputs(int* Ad, int* Bd, int* Ch,
size_t numElements, int expected) {
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
threadsPerBlock, numElements);
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, hipStream_t(0), Ad, expected, numElements);
// poison with bad value to ensure is overwritten correctly
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, hipStream_t(0), Bd, 0xF000BA55, numElements);
memset(Ch, 13, numElements * sizeof(int));
HIP_CHECK(hipDeviceSynchronize());
}
// Intended to test proper synchronization and cache flushing
// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
// using either hipMemcpyAsync or kernel implementations.
// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
// Correct result at the end is that Ch contains the
// contents originally in Ad (integer 0x42)
void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
hipStream_t stream1, hipStream_t stream2, int numElements,
int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
hipEvent_t e;
HIP_CHECK(hipEventCreateWithFlags(&e, 0));
resetInputs(Ad, Bd, Ch, numElements, expected);
const size_t sizeElements = numElements * sizeof(int);
fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
sizeElements, static_cast<double>(sizeElements / 1024.0),
CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
/*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
return;
}*/
// Step A:
runCmd(cmdAType, Bd, Ad, stream1, numElements);
// Sync in-between?
switch (syncType) {
case NONE:
break;
case EVENT_QUERY: {
hipError_t st = hipErrorNotReady;
HIP_CHECK(hipEventRecord(e, stream1));
do {
st = hipEventQuery(e);
} while (st == hipErrorNotReady);
HIP_CHECK(st);
} break;
case EVENT_SYNC:
HIP_CHECK(hipEventRecord(e, stream1));
HIP_CHECK(hipEventSynchronize(e));
break;
case STREAM_WAIT_EVENT:
HIP_CHECK(hipEventRecord(e, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
break;
case STREAM_QUERY: {
hipError_t st = hipErrorNotReady;
do {
st = hipStreamQuery(stream1);
} while (st == hipErrorNotReady);
HIP_CHECK(st);
} break;
case STREAM_SYNC:
HIP_CHECK(hipStreamSynchronize(stream1));
break;
case DEVICE_SYNC:
HIP_CHECK(hipDeviceSynchronize());
break;
default:
fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
return;
}
runCmd(cmdBType, Cd, Bd, stream2, numElements);
// Copy back to host, use async copy to avoid any extra synchronization
// that might mask issues.
HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
stream2));
HIP_CHECK(hipStreamSynchronize(stream2));
checkReverse(Ch, numElements, expected);
HIP_CHECK(hipEventDestroy(e));
}
void testWrapper(size_t numElements) {
const size_t sizeElements = numElements * sizeof(int);
const int expected = 0x42;
int *Ad, *Bd, *Cd, *Ch;
HIP_CHECK(hipMalloc(&Ad, sizeElements));
HIP_CHECK(hipMalloc(&Bd, sizeElements));
HIP_CHECK(hipMalloc(&Cd, sizeElements));
HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
hipStream_t stream1, stream2;
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipDeviceSynchronize());
runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
Ad, Bd, Cd, Ch, expected);
for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
switch (syncMode) {
// case NONE::
case EVENT_QUERY:
case EVENT_SYNC:
case STREAM_WAIT_EVENT:
// case STREAM_QUERY:
case STREAM_SYNC:
case DEVICE_SYNC:
runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
break;
default:
break;
}
}
}
}
#if 0
runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
numElements, Ad, Bd, Cd, Ch, expected);
runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
Ad, Bd, Cd, Ch, expected);
runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
numElements, Ad, Bd, Cd, Ch, expected);
runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
Ad, Bd, Cd, Ch, expected);
#endif
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
HIP_CHECK(hipFree(Cd));
HIP_CHECK(hipHostFree(Ch));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
}
/**
* Test Description
* ------------------------
* - Test cache management (fences) and synchronization between
* kernel and copy commands. Exhaustively tests 3 command types
* (copy, kernel, module kernel), many sync types (see SyncType), followed by
* another command, across a sweep of data sizes designed to stress
* various levels of the memory hierarchy.
* Test source
* ------------------------
* - catch/unit/synchronization/copy_coherency.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_Copy_Coherency") {
for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
size_t numElements = g_elementSizes[index];
testWrapper(numElements);
}
}
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
unsigned threadsPerBlock = 256;
unsigned blocksPerCU = 6;
class MemcpyFunction {
public:
MemcpyFunction(const char* fileName, const char* functionName) {
load(fileName, functionName);
}
void load(const char* fileName, const char* functionName);
void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
private:
hipFunction_t _function;
hipModule_t _module;
};
void MemcpyFunction::load(const char* fileName, const char* functionName) {
HIP_CHECK(hipModuleLoad(&_module, fileName));
HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
}
void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
struct {
int* _dst;
const int* _src;
size_t _numElements;
} args;
args._dst = dst;
args._src = src;
args._numElements = numElements;
size_t size = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
numElements);
HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
1, 1, 0, s, NULL,
reinterpret_cast<void**>(&config)));
}
bool g_warnOnFail = true;
int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
// Set value of array to specified 32-bit integer:
__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
int stride = blockDim.x * gridDim.x;
for (size_t i = gid; i < numElements; i += stride) {
ptr[i] = val;
}
}
__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
int stride = blockDim.x * gridDim.x;
for (size_t i = gid; i < numElements; i += stride) {
dst[i] = src[i];
}
}
// Check arrays in reverse order, to more easily detect cases where
// the copy is "partially" done.
void checkReverse(const int* ptr, int numElements, int expected) {
int mismatchCnt = 0;
for (int i = numElements - 1; i >= 0; i--) {
if (!g_warnOnFail) {
REQUIRE(ptr[i] == expected);
}
if (++mismatchCnt >= 10) {
break;
}
}
}
#define ENUM_CASE_STR(x) \
case x: \
return #x
enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
const char* CmdTypeStr(CmdType c) {
switch (c) {
ENUM_CASE_STR(COPY);
ENUM_CASE_STR(KERNEL);
ENUM_CASE_STR(MODULE_KERNEL);
default:
return "UNKNOWN";
}
}
enum SyncType {
NONE,
EVENT_QUERY,
EVENT_SYNC,
STREAM_WAIT_EVENT,
STREAM_QUERY,
STREAM_SYNC,
DEVICE_SYNC,
MAX_SyncType
};
const char* SyncTypeStr(SyncType s) {
switch (s) {
ENUM_CASE_STR(NONE);
ENUM_CASE_STR(EVENT_QUERY);
ENUM_CASE_STR(EVENT_SYNC);
ENUM_CASE_STR(STREAM_WAIT_EVENT);
ENUM_CASE_STR(STREAM_QUERY);
ENUM_CASE_STR(STREAM_SYNC);
ENUM_CASE_STR(DEVICE_SYNC);
default:
return "UNKNOWN";
}
}
void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
size_t numElements) {
switch (cmd) {
case COPY:
HIP_CHECK(
hipMemcpyAsync(dst, src, numElements * sizeof(int),
hipMemcpyDeviceToDevice, s));
break;
case KERNEL: {
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
threadsPerBlock, numElements);
hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, s, dst, src, numElements);
} break;
case MODULE_KERNEL: {
MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
g_moduleMemcpy.launch(dst, src, numElements, s);
} break;
default:
printf("Info:unknown cmd=%d type", cmd);
}
}
void resetInputs(int* Ad, int* Bd, int* Ch,
size_t numElements, int expected) {
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
threadsPerBlock, numElements);
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, hipStream_t(0), Ad, expected, numElements);
// poison with bad value to ensure is overwritten correctly
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
0, hipStream_t(0), Bd, 0xF000BA55, numElements);
memset(Ch, 13, numElements * sizeof(int));
HIP_CHECK(hipDeviceSynchronize());
}
// Intended to test proper synchronization and cache flushing
// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
// using either hipMemcpyAsync or kernel implementations.
// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
// Correct result at the end is that Ch contains the
// contents originally in Ad (integer 0x42)
void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
hipStream_t stream1, hipStream_t stream2, int numElements,
int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
hipEvent_t e;
HIP_CHECK(hipEventCreateWithFlags(&e, 0));
resetInputs(Ad, Bd, Ch, numElements, expected);
const size_t sizeElements = numElements * sizeof(int);
fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
sizeElements, static_cast<double>(sizeElements / 1024.0),
CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
/*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
return;
}*/
// Step A:
runCmd(cmdAType, Bd, Ad, stream1, numElements);
// Sync in-between?
switch (syncType) {
case NONE:
break;
case EVENT_QUERY: {
hipError_t st = hipErrorNotReady;
HIP_CHECK(hipEventRecord(e, stream1));
do {
st = hipEventQuery(e);
} while (st == hipErrorNotReady);
HIP_CHECK(st);
} break;
case EVENT_SYNC:
HIP_CHECK(hipEventRecord(e, stream1));
HIP_CHECK(hipEventSynchronize(e));
break;
case STREAM_WAIT_EVENT:
HIP_CHECK(hipEventRecord(e, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
break;
case STREAM_QUERY: {
hipError_t st = hipErrorNotReady;
do {
st = hipStreamQuery(stream1);
} while (st == hipErrorNotReady);
HIP_CHECK(st);
} break;
case STREAM_SYNC:
HIP_CHECK(hipStreamSynchronize(stream1));
break;
case DEVICE_SYNC:
HIP_CHECK(hipDeviceSynchronize());
break;
default:
fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
return;
}
runCmd(cmdBType, Cd, Bd, stream2, numElements);
// Copy back to host, use async copy to avoid any extra synchronization
// that might mask issues.
HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
stream2));
HIP_CHECK(hipStreamSynchronize(stream2));
checkReverse(Ch, numElements, expected);
HIP_CHECK(hipEventDestroy(e));
}
void testWrapper(size_t numElements) {
const size_t sizeElements = numElements * sizeof(int);
const int expected = 0x42;
int *Ad, *Bd, *Cd, *Ch;
HIP_CHECK(hipMalloc(&Ad, sizeElements));
HIP_CHECK(hipMalloc(&Bd, sizeElements));
HIP_CHECK(hipMalloc(&Cd, sizeElements));
HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
hipStream_t stream1, stream2;
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
HIP_CHECK(hipDeviceSynchronize());
runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
Ad, Bd, Cd, Ch, expected);
for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
switch (syncMode) {
// case NONE::
case EVENT_QUERY:
case EVENT_SYNC:
case STREAM_WAIT_EVENT:
// case STREAM_QUERY:
case STREAM_SYNC:
case DEVICE_SYNC:
runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
break;
default:
break;
}
}
}
}
#if 0
runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
numElements, Ad, Bd, Cd, Ch, expected);
runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
Ad, Bd, Cd, Ch, expected);
runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
numElements, Ad, Bd, Cd, Ch, expected);
runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
Ad, Bd, Cd, Ch, expected);
#endif
HIP_CHECK(hipFree(Ad));
HIP_CHECK(hipFree(Bd));
HIP_CHECK(hipFree(Cd));
HIP_CHECK(hipHostFree(Ch));
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
}
/**
* Test Description
* ------------------------
* - Test cache management (fences) and synchronization between
* kernel and copy commands. Exhaustively tests 3 command types
* (copy, kernel, module kernel), many sync types (see SyncType), followed by
* another command, across a sweep of data sizes designed to stress
* various levels of the memory hierarchy.
* Test source
* ------------------------
* - catch/unit/synchronization/copy_coherency.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.5
*/
TEST_CASE("Unit_Copy_Coherency") {
for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
size_t numElements = g_elementSizes[index];
testWrapper(numElements);
}
}
+182 -182
Переглянути файл
@@ -1,182 +1,182 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/hip_fp16.h>
#define WIDTH 4
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
T val = in[x];
for (int i = 0; i < width; i++) {
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
}
}
// CPU implementation of matrix transpose
template <typename T>
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
static void getFactor(int* fact) { *fact = 101; }
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
static void getFactor(float* fact) { *fact = 2.5; }
static void getFactor(__half* fact) { *fact = 2.5; }
static void getFactor(double* fact) { *fact = 2.5; }
static void getFactor(int64_t* fact) { *fact = 303; }
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
errors++;
}
}
return errors;
}
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
errors++;
}
}
return errors;
}
template <typename T> void init(T* Matrix) {
// initialize the input data
T factor;
getFactor(&factor);
for (int i = 0; i < NUM; i++) {
Matrix[i] = (T)i + factor;
}
}
template <> void init(__half* Matrix) {
// initialize the input data
__half factor;
getFactor(&factor);
for (int i = 0; i < NUM; i++) {
Matrix[i] = i + __half2float(factor);
}
}
template <typename T> static void runTest() {
T* Matrix;
T* TransposeMatrix;
T* cpuTransposeMatrix;
T* gpuMatrix;
T* gpuTransposeMatrix;
hipDeviceProp_t devProp;
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
int errors = 0;
Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
init(Matrix);
// allocate the memory on the device side
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
// Memory transfer from host to device
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
// free the resources on device side
HIP_CHECK(hipFree(gpuMatrix));
HIP_CHECK(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
}
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl(T var, int srcLane, int width=warpSize)` -
* Contains wrap __shfl functions.
* @}
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflTests.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipShflTests") {
SECTION("run test for int") { runTest<int>(); }
SECTION("run test for float") { runTest<float>(); }
SECTION("run test for double") { runTest<double>(); }
// Test added to support half datatype.
SECTION("run test for __half") { runTest<__half>(); }
SECTION("run test for int64_t") { runTest<int64_t>(); }
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
}
/**
* End doxygen group ShflTest.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/hip_fp16.h>
#define WIDTH 4
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
T val = in[x];
for (int i = 0; i < width; i++) {
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
}
}
// CPU implementation of matrix transpose
template <typename T>
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
static void getFactor(int* fact) { *fact = 101; }
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
static void getFactor(float* fact) { *fact = 2.5; }
static void getFactor(__half* fact) { *fact = 2.5; }
static void getFactor(double* fact) { *fact = 2.5; }
static void getFactor(int64_t* fact) { *fact = 303; }
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
errors++;
}
}
return errors;
}
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
errors++;
}
}
return errors;
}
template <typename T> void init(T* Matrix) {
// initialize the input data
T factor;
getFactor(&factor);
for (int i = 0; i < NUM; i++) {
Matrix[i] = (T)i + factor;
}
}
template <> void init(__half* Matrix) {
// initialize the input data
__half factor;
getFactor(&factor);
for (int i = 0; i < NUM; i++) {
Matrix[i] = i + __half2float(factor);
}
}
template <typename T> static void runTest() {
T* Matrix;
T* TransposeMatrix;
T* cpuTransposeMatrix;
T* gpuMatrix;
T* gpuTransposeMatrix;
hipDeviceProp_t devProp;
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
int errors = 0;
Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
init(Matrix);
// allocate the memory on the device side
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
// Memory transfer from host to device
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
// free the resources on device side
HIP_CHECK(hipFree(gpuMatrix));
HIP_CHECK(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
}
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl(T var, int srcLane, int width=warpSize)` -
* Contains wrap __shfl functions.
* @}
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflTests.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
*/
TEST_CASE("Unit_hipShflTests") {
SECTION("run test for int") { runTest<int>(); }
SECTION("run test for float") { runTest<float>(); }
SECTION("run test for double") { runTest<double>(); }
// Test added to support half datatype.
SECTION("run test for __half") { runTest<__half>(); }
SECTION("run test for int64_t") { runTest<int64_t>(); }
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
}
/**
* End doxygen group ShflTest.
* @}
*/
+241 -241
Переглянути файл
@@ -1,241 +1,241 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/hip_fp16.h>
const int size = 32;
template <typename T> __global__ void shflDownSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_down(val, i, size);
}
a[threadIdx.x] = val;
}
template <typename T> __global__ void shflUpSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_up(val, i, size);
}
a[threadIdx.x] = val;
}
template <typename T> __global__ void shflXorSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_xor(val, i, size);
}
a[threadIdx.x] = val;
}
static void getFactor(int* fact) { *fact = 101; }
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
static void getFactor(float* fact) { *fact = 2.5; }
static void getFactor(double* fact) { *fact = 2.5; }
static void getFactor(__half* fact) { *fact = 2.5; }
static void getFactor(int64_t* fact) { *fact = 303; }
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
template <typename T> T sum(T* a) {
T cpuSum = 0;
T factor;
getFactor(&factor);
for (int i = 0; i < size; i++) {
a[i] = i + factor;
cpuSum += a[i];
}
return cpuSum;
}
template <> __half sum(__half* a) {
__half cpuSum = 0;
__half factor;
getFactor(&factor);
for (int i = 0; i < size; i++) {
a[i] = i + __half2float(factor);
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
}
return cpuSum;
}
template <typename T> bool compare(T gpuSum, T cpuSum) {
if (gpuSum != cpuSum) {
return true;
}
return false;
}
template <> bool compare(__half gpuSum, __half cpuSum) {
if (__half2float(gpuSum) != __half2float(cpuSum)) {
return true;
}
return false;
}
template <typename T> static void runTestShflUp() {
const int size = 32;
T a[size];
T cpuSum = sum(a);
T* d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
REQUIRE((compare(a[size - 1], cpuSum)) == 0);
HIP_CHECK(hipFree(d_a));
}
template <typename T> static void runTestShflDown() {
T a[size];
T cpuSum = sum(a);
T* d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
REQUIRE((compare(a[0], cpuSum)) == 0);
HIP_CHECK(hipFree(d_a));
}
template <typename T> static void runTestShflXor() {
T a[size];
T cpuSum = sum(a);
T* d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
REQUIRE((compare(a[0], cpuSum)) == 0);
HIP_CHECK(hipFree(d_a));
}
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_up function
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl_up warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflUpDownTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
* - Gaurding this test against cuda with refernce to mentioned
* ticket SWDEV-379177
*/
TEST_CASE("Unit_runTestShfl_up") {
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
}
/**
* End doxygen group __shfl.
* @}
*/
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_down function
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl_down warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflUpDownTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
* - Gaurding this test against cuda with refernce to mentioned
* ticket SWDEV-379177
*/
TEST_CASE("Unit_runTestShfl_Down") {
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
}
/**
* End doxygen group __shfl.
* @}
*/
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
* Contains warp __shfl_xor function
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl_xor warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflUpDownTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
* - Gaurding this test against cuda with refernce to mentioned
* ticket SWDEV-379177
*/
TEST_CASE("Unit_runTestShfl_Xor") {
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
}
/**
* End doxygen group __shfl.
* @}
*/
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/hip_fp16.h>
const int size = 32;
template <typename T> __global__ void shflDownSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_down(val, i, size);
}
a[threadIdx.x] = val;
}
template <typename T> __global__ void shflUpSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_up(val, i, size);
}
a[threadIdx.x] = val;
}
template <typename T> __global__ void shflXorSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_xor(val, i, size);
}
a[threadIdx.x] = val;
}
static void getFactor(int* fact) { *fact = 101; }
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
static void getFactor(float* fact) { *fact = 2.5; }
static void getFactor(double* fact) { *fact = 2.5; }
static void getFactor(__half* fact) { *fact = 2.5; }
static void getFactor(int64_t* fact) { *fact = 303; }
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
template <typename T> T sum(T* a) {
T cpuSum = 0;
T factor;
getFactor(&factor);
for (int i = 0; i < size; i++) {
a[i] = i + factor;
cpuSum += a[i];
}
return cpuSum;
}
template <> __half sum(__half* a) {
__half cpuSum = 0;
__half factor;
getFactor(&factor);
for (int i = 0; i < size; i++) {
a[i] = i + __half2float(factor);
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
}
return cpuSum;
}
template <typename T> bool compare(T gpuSum, T cpuSum) {
if (gpuSum != cpuSum) {
return true;
}
return false;
}
template <> bool compare(__half gpuSum, __half cpuSum) {
if (__half2float(gpuSum) != __half2float(cpuSum)) {
return true;
}
return false;
}
template <typename T> static void runTestShflUp() {
const int size = 32;
T a[size];
T cpuSum = sum(a);
T* d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
REQUIRE((compare(a[size - 1], cpuSum)) == 0);
HIP_CHECK(hipFree(d_a));
}
template <typename T> static void runTestShflDown() {
T a[size];
T cpuSum = sum(a);
T* d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
REQUIRE((compare(a[0], cpuSum)) == 0);
HIP_CHECK(hipFree(d_a));
}
template <typename T> static void runTestShflXor() {
T a[size];
T cpuSum = sum(a);
T* d_a;
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
REQUIRE((compare(a[0], cpuSum)) == 0);
HIP_CHECK(hipFree(d_a));
}
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_up function
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl_up warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflUpDownTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
* - Gaurding this test against cuda with refernce to mentioned
* ticket SWDEV-379177
*/
TEST_CASE("Unit_runTestShfl_up") {
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
}
/**
* End doxygen group __shfl.
* @}
*/
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_down function
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl_down warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflUpDownTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
* - Gaurding this test against cuda with refernce to mentioned
* ticket SWDEV-379177
*/
TEST_CASE("Unit_runTestShfl_Down") {
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
}
/**
* End doxygen group __shfl.
* @}
*/
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
* Contains warp __shfl_xor function
*/
/**
* Test Description
* ------------------------
* - Test case to verify __shfl_xor warp functions for different datatypes.
* Test source
* ------------------------
* - catch/unit/kernel/hipShflUpDownTest.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.6
* - Gaurding this test against cuda with refernce to mentioned
* ticket SWDEV-379177
*/
TEST_CASE("Unit_runTestShfl_Xor") {
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
}
/**
* End doxygen group __shfl.
* @}
*/
+437 -437
Переглянути файл
@@ -1,437 +1,437 @@
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../src/test_common.cpp
* TEST: %t
* HIT_END
*/
#include "test_common.h"
#include <iostream>
#include <chrono>
static unsigned int sizeList[] = {
256, 512, 1024, 2048, 4096, 8192,
};
static unsigned int eleNumList[] = {
0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
0x200000, 0x400000, 0x800000, 0x1000000
};
typedef struct _dataType {
char memsetval = 0x42;
char memsetD8val = 0xDE;
int16_t memsetD16val = 0xDEAD;
int memsetD32val = 0xDEADBEEF;
}dataType;
#define NUM_ITER 1000
enum MemsetType {
hipMemsetTypeDefault,
hipMemsetTypeD8,
hipMemsetTypeD16,
hipMemsetTypeD32,
hipMemsetTypeMax
};
using namespace std;
class hipPerfMemset {
private:
uint64_t bufSize_;
unsigned int num_elements_;
unsigned int testNumEle_;
unsigned int _numSubTests = 0;
unsigned int _numSubTests2D = 0;
unsigned int _numSubTests3D = 0;
unsigned int num_sizes_ =0;
public:
hipPerfMemset() {
num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
_numSubTests = num_elements_ * hipMemsetTypeMax;
num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
_numSubTests2D = num_sizes_;
_numSubTests3D = _numSubTests2D;
};
~hipPerfMemset() {};
void open(int deviceID);
template<typename T>
void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
template<typename T>
void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
template<typename T>
void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
uint getNumTests() {
return _numSubTests;
}
uint getNumTests2D() {
return _numSubTests2D;
}
uint getNumTests3D() {
return _numSubTests3D;
}
};
void hipPerfMemset::open(int deviceId) {
int nGpu = 0;
HIPCHECK(hipGetDeviceCount(&nGpu));
if (nGpu < 1) {
failed("No GPU!");
}
HIPCHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
<< std::endl;
}
template<typename T>
void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
T * A_h;
T * A_d;
testNumEle_ = eleNumList[test % num_elements_];
bufSize_ = testNumEle_ * sizeof(uint32_t);
HIPCHECK(hipMalloc(&A_d, bufSize_));
A_h = reinterpret_cast<T*> (malloc(bufSize_));
hipStream_t stream;
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
// Warm-up
if (async) {
HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
HIPCHECK(hipDeviceSynchronize());
}
auto start = chrono::high_resolution_clock::now();
for (uint i = 0; i < NUM_ITER; i++) {
if (type == hipMemsetTypeDefault && !async) {
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
}
else if (type == hipMemsetTypeDefault && async) {
HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
}
else if (type == hipMemsetTypeD8 && !async){
HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
}
else if (type == hipMemsetTypeD8 && async) {
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
}
else if (type == hipMemsetTypeD16 && !async) {
HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
}
else if (type == hipMemsetTypeD16 && async) {
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
}
else if (type == hipMemsetTypeD32 && !async) {
HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
}
else if (type == hipMemsetTypeD32 && async) {
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
}
}
if (async) {
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipDeviceSynchronize());
}
auto end = chrono::high_resolution_clock::now();
HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
for (int i = 0; i < bufSize_ / sizeof(T); i++) {
if (A_h[i] != memsetval) {
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
break;
}
}
HIPCHECK(hipFree(A_d));
free(A_h);
auto diff = std::chrono::duration<double>(end - start);
auto sec = diff.count();
auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
<< " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
}
template<typename T>
void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
bufSize_ = sizeList[test % num_sizes_];
size_t numH = bufSize_;
size_t numW = bufSize_;
size_t pitch_A;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH;
size_t elements = numW* numH;
T * A_h;
T * A_d;
HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
numH));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
for (size_t i=0; i < elements; i++) {
A_h[i] = 1;
}
hipStream_t stream;
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
// Warm-up
if (async) {
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
HIPCHECK(hipDeviceSynchronize());
}
auto start = chrono::steady_clock::now();
for (uint i = 0; i < NUM_ITER; i++) {
if (type == hipMemsetTypeDefault && !async) {
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
}
else if (type == hipMemsetTypeDefault && async) {
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
}
}
if (async) {
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipDeviceSynchronize());
}
auto end = chrono::steady_clock::now();
HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
hipMemcpyDeviceToHost));
for (int i=0; i < elements; i++) {
if (A_h[i] != memsetval) {
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
break;
}
}
chrono::duration<double> diff = end - start;
auto sec = diff.count();
auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] "
<< " " << "(GB/s) for " << setw(5) << bufSize_
<< " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl;
HIPCHECK(hipStreamDestroy(stream));
HIPCHECK(hipFree(A_d));
free(A_h);
}
template<typename T>
void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
bufSize_ = sizeList[test % num_sizes_];
size_t numH = bufSize_;
size_t numW = bufSize_;
size_t depth = 10;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH * depth;
size_t elements = numW* numH* depth;
hipStream_t stream;
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
T *A_h;
hipExtent extent = make_hipExtent(width, numH, depth);
hipPitchedPtr devPitchedPtr;
HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
A_h = (char*)malloc(sizeElements);
HIPASSERT(A_h != NULL);
for (size_t i=0; i<elements; i++) {
A_h[i] = 1;
}
// Warm-up
if (async) {
HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
HIPCHECK(hipDeviceSynchronize());
}
auto start = chrono::steady_clock::now();
for (uint i = 0; i < NUM_ITER; i++) {
if (type == hipMemsetTypeDefault && !async) {
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
}
else if (type == hipMemsetTypeDefault && async) {
HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
}
}
if (async) {
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipDeviceSynchronize());
}
auto end = chrono::steady_clock::now();
hipMemcpy3DParms myparms = {0};
myparms.srcPos = make_hipPos(0,0,0);
myparms.dstPos = make_hipPos(0,0,0);
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
myparms.srcPtr = devPitchedPtr;
myparms.extent = extent;
myparms.kind = hipMemcpyDeviceToHost;
HIPCHECK(hipMemcpy3D(&myparms));
for (int i=0; i<elements; i++) {
if (A_h[i] != memsetval) {
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
break;
}
}
chrono::duration<double> diff = end - start;
auto sec = diff.count();
auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " "
<< "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
<< bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl;
HIPCHECK(hipFree(devPitchedPtr.ptr));
free(A_h);
}
int main() {
hipPerfMemset hipPerfMemset;
dataType pattern;
int deviceId = 0;
hipPerfMemset.open(deviceId);
MemsetType type;
int numTests = hipPerfMemset.getNumTests();
int numTests2D = hipPerfMemset.getNumTests2D();
int numTests3D = hipPerfMemset.getNumTests3D();
cout << "--------------------- 1D buffer -------------------" << endl;
bool async= false;
for (uint i = 0; i < 2 ; i++) {
cout << endl;
for (auto testCase = 0; testCase < numTests; testCase++) {
if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
cout << "API: hipMemsetD8" << (async ? "Async " : " ");
hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
}
else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
cout << "API: hipMemsetD16" << (async ? "Async" : " ");
hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
}
else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
cout << "API: hipMemsetD32" << (async ? "Async" : " ");
hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
}
else {
cout << "API: hipMemset" << (async ? "Async " : " ");
hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
}
}
async = true;
}
cout << endl;
cout << "------------------ 2D buffer arrays ---------------" << endl;
async = false;
for (uint i = 0; i < 2; i++) {
cout << endl;
for (uint test = 0; test < numTests2D; test++) {
hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
}
async = true;
}
cout << endl;
cout << "------------------ 3D buffer arrays ---------------" << endl;
async = false;
for (uint i = 0; i < 2; i++) {
cout << endl;
for (uint test =0; test < numTests3D; test++) {
hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
}
async = true;
}
passed();
}
/*
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../../src/test_common.cpp
* TEST: %t
* HIT_END
*/
#include "test_common.h"
#include <iostream>
#include <chrono>
static unsigned int sizeList[] = {
256, 512, 1024, 2048, 4096, 8192,
};
static unsigned int eleNumList[] = {
0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
0x200000, 0x400000, 0x800000, 0x1000000
};
typedef struct _dataType {
char memsetval = 0x42;
char memsetD8val = 0xDE;
int16_t memsetD16val = 0xDEAD;
int memsetD32val = 0xDEADBEEF;
}dataType;
#define NUM_ITER 1000
enum MemsetType {
hipMemsetTypeDefault,
hipMemsetTypeD8,
hipMemsetTypeD16,
hipMemsetTypeD32,
hipMemsetTypeMax
};
using namespace std;
class hipPerfMemset {
private:
uint64_t bufSize_;
unsigned int num_elements_;
unsigned int testNumEle_;
unsigned int _numSubTests = 0;
unsigned int _numSubTests2D = 0;
unsigned int _numSubTests3D = 0;
unsigned int num_sizes_ =0;
public:
hipPerfMemset() {
num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
_numSubTests = num_elements_ * hipMemsetTypeMax;
num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
_numSubTests2D = num_sizes_;
_numSubTests3D = _numSubTests2D;
};
~hipPerfMemset() {};
void open(int deviceID);
template<typename T>
void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
template<typename T>
void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
template<typename T>
void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
uint getNumTests() {
return _numSubTests;
}
uint getNumTests2D() {
return _numSubTests2D;
}
uint getNumTests3D() {
return _numSubTests3D;
}
};
void hipPerfMemset::open(int deviceId) {
int nGpu = 0;
HIPCHECK(hipGetDeviceCount(&nGpu));
if (nGpu < 1) {
failed("No GPU!");
}
HIPCHECK(hipSetDevice(deviceId));
hipDeviceProp_t props = {0};
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
<< std::endl;
}
template<typename T>
void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
T * A_h;
T * A_d;
testNumEle_ = eleNumList[test % num_elements_];
bufSize_ = testNumEle_ * sizeof(uint32_t);
HIPCHECK(hipMalloc(&A_d, bufSize_));
A_h = reinterpret_cast<T*> (malloc(bufSize_));
hipStream_t stream;
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
// Warm-up
if (async) {
HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
HIPCHECK(hipDeviceSynchronize());
}
auto start = chrono::high_resolution_clock::now();
for (uint i = 0; i < NUM_ITER; i++) {
if (type == hipMemsetTypeDefault && !async) {
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
}
else if (type == hipMemsetTypeDefault && async) {
HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
}
else if (type == hipMemsetTypeD8 && !async){
HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
}
else if (type == hipMemsetTypeD8 && async) {
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
}
else if (type == hipMemsetTypeD16 && !async) {
HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
}
else if (type == hipMemsetTypeD16 && async) {
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
}
else if (type == hipMemsetTypeD32 && !async) {
HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
}
else if (type == hipMemsetTypeD32 && async) {
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
}
}
if (async) {
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipDeviceSynchronize());
}
auto end = chrono::high_resolution_clock::now();
HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
for (int i = 0; i < bufSize_ / sizeof(T); i++) {
if (A_h[i] != memsetval) {
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
break;
}
}
HIPCHECK(hipFree(A_d));
free(A_h);
auto diff = std::chrono::duration<double>(end - start);
auto sec = diff.count();
auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
<< " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
}
template<typename T>
void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
bufSize_ = sizeList[test % num_sizes_];
size_t numH = bufSize_;
size_t numW = bufSize_;
size_t pitch_A;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH;
size_t elements = numW* numH;
T * A_h;
T * A_d;
HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
numH));
A_h = reinterpret_cast<char*>(malloc(sizeElements));
for (size_t i=0; i < elements; i++) {
A_h[i] = 1;
}
hipStream_t stream;
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
// Warm-up
if (async) {
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
HIPCHECK(hipDeviceSynchronize());
}
auto start = chrono::steady_clock::now();
for (uint i = 0; i < NUM_ITER; i++) {
if (type == hipMemsetTypeDefault && !async) {
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
}
else if (type == hipMemsetTypeDefault && async) {
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
}
}
if (async) {
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipDeviceSynchronize());
}
auto end = chrono::steady_clock::now();
HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
hipMemcpyDeviceToHost));
for (int i=0; i < elements; i++) {
if (A_h[i] != memsetval) {
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
break;
}
}
chrono::duration<double> diff = end - start;
auto sec = diff.count();
auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] "
<< " " << "(GB/s) for " << setw(5) << bufSize_
<< " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl;
HIPCHECK(hipStreamDestroy(stream));
HIPCHECK(hipFree(A_d));
free(A_h);
}
template<typename T>
void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
bufSize_ = sizeList[test % num_sizes_];
size_t numH = bufSize_;
size_t numW = bufSize_;
size_t depth = 10;
size_t width = numW * sizeof(char);
size_t sizeElements = width * numH * depth;
size_t elements = numW* numH* depth;
hipStream_t stream;
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
T *A_h;
hipExtent extent = make_hipExtent(width, numH, depth);
hipPitchedPtr devPitchedPtr;
HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
A_h = (char*)malloc(sizeElements);
HIPASSERT(A_h != NULL);
for (size_t i=0; i<elements; i++) {
A_h[i] = 1;
}
// Warm-up
if (async) {
HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
HIPCHECK(hipDeviceSynchronize());
}
auto start = chrono::steady_clock::now();
for (uint i = 0; i < NUM_ITER; i++) {
if (type == hipMemsetTypeDefault && !async) {
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
}
else if (type == hipMemsetTypeDefault && async) {
HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
}
}
if (async) {
HIPCHECK(hipStreamSynchronize(stream));
} else {
HIPCHECK(hipDeviceSynchronize());
}
auto end = chrono::steady_clock::now();
hipMemcpy3DParms myparms = {0};
myparms.srcPos = make_hipPos(0,0,0);
myparms.dstPos = make_hipPos(0,0,0);
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
myparms.srcPtr = devPitchedPtr;
myparms.extent = extent;
myparms.kind = hipMemcpyDeviceToHost;
HIPCHECK(hipMemcpy3D(&myparms));
for (int i=0; i<elements; i++) {
if (A_h[i] != memsetval) {
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
break;
}
}
chrono::duration<double> diff = end - start;
auto sec = diff.count();
auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " "
<< "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
<< bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl;
HIPCHECK(hipFree(devPitchedPtr.ptr));
free(A_h);
}
int main() {
hipPerfMemset hipPerfMemset;
dataType pattern;
int deviceId = 0;
hipPerfMemset.open(deviceId);
MemsetType type;
int numTests = hipPerfMemset.getNumTests();
int numTests2D = hipPerfMemset.getNumTests2D();
int numTests3D = hipPerfMemset.getNumTests3D();
cout << "--------------------- 1D buffer -------------------" << endl;
bool async= false;
for (uint i = 0; i < 2 ; i++) {
cout << endl;
for (auto testCase = 0; testCase < numTests; testCase++) {
if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
cout << "API: hipMemsetD8" << (async ? "Async " : " ");
hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
}
else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
cout << "API: hipMemsetD16" << (async ? "Async" : " ");
hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
}
else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
cout << "API: hipMemsetD32" << (async ? "Async" : " ");
hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
}
else {
cout << "API: hipMemset" << (async ? "Async " : " ");
hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
}
}
async = true;
}
cout << endl;
cout << "------------------ 2D buffer arrays ---------------" << endl;
async = false;
for (uint i = 0; i < 2; i++) {
cout << endl;
for (uint test = 0; test < numTests2D; test++) {
hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
}
async = true;
}
cout << endl;
cout << "------------------ 3D buffer arrays ---------------" << endl;
async = false;
for (uint i = 0; i < 2; i++) {
cout << endl;
for (uint test =0; test < numTests3D; test++) {
hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
}
async = true;
}
passed();
}
+1 -1
Переглянути файл
@@ -41,4 +41,4 @@ cmake ../samples
make package_samples
## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.
## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.