SWDEV-472723 - Correct file format and remove trailing spaces
Change-Id: Ie40c763e9391fa36d6c890cd0a171659a1502a83
[ROCm/hip-tests commit: 5d042c80fa]
This commit is contained in:
@@ -0,0 +1,20 @@
|
|||||||
|
# Set the default behavior, in case people don't have core.autolf set.
|
||||||
|
* text=auto
|
||||||
|
|
||||||
|
# Explicitly declare text files you want to always be normalized and converted
|
||||||
|
# to have LF line endings on checkout.
|
||||||
|
*.c text eol=lf
|
||||||
|
*.cpp text eol=lf
|
||||||
|
*.cc text eol=lf
|
||||||
|
*.h text eol=lf
|
||||||
|
*.hpp text eol=lf
|
||||||
|
*.txt text eol=lf
|
||||||
|
|
||||||
|
# Define files to support auto-remove trailing white space
|
||||||
|
# Need to run the command below, before add modified file(s) to the staging area
|
||||||
|
# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"'
|
||||||
|
*.cpp filter=trimspace
|
||||||
|
*.c filter=trimspace
|
||||||
|
*.h filter=trimspacecpp
|
||||||
|
*.hpp filter=trimspace
|
||||||
|
*.md filter=trimspace
|
||||||
@@ -180,7 +180,7 @@ hipcc <path_to_test.cpp> -I<HIP_SRC_DIR>/tests/catch/include <HIP_SRC_DIR>/tests
|
|||||||
## Debugging support
|
## Debugging support
|
||||||
Catch2 allows multiple ways in which you can debug the test case.
|
Catch2 allows multiple ways in which you can debug the test case.
|
||||||
- `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
|
- `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
|
||||||
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
|
- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
|
||||||
- User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
|
- User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
|
||||||
- User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
|
- User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
|
||||||
|
|
||||||
|
|||||||
@@ -1,119 +1,119 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
|
|
||||||
// Test case to validate atomicInc and atomicDec functions.
|
// Test case to validate atomicInc and atomicDec functions.
|
||||||
// if TestToRun=1, then atomicInc function will be tested and validated
|
// if TestToRun=1, then atomicInc function will be tested and validated
|
||||||
// if TestToRun=2, then atomicDec function will be tested and validated.
|
// if TestToRun=2, then atomicDec function will be tested and validated.
|
||||||
|
|
||||||
|
|
||||||
// kernel function for atomicInc
|
// kernel function for atomicInc
|
||||||
static __global__ void AtomicCheckInc(int* g_ptr) {
|
static __global__ void AtomicCheckInc(int* g_ptr) {
|
||||||
atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
|
atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
|
||||||
}
|
}
|
||||||
|
|
||||||
// kernel function for atomicDec
|
// kernel function for atomicDec
|
||||||
static __global__ void AtomicCheckDec(int* g_ptr) {
|
static __global__ void AtomicCheckDec(int* g_ptr) {
|
||||||
atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
|
atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
|
||||||
}
|
}
|
||||||
|
|
||||||
// verify results for atomicInc
|
// verify results for atomicInc
|
||||||
static int verifyResultInc(int value) {
|
static int verifyResultInc(int value) {
|
||||||
int limit = 17;
|
int limit = 17;
|
||||||
value = (value >= limit) ? 0 : value + 1;
|
value = (value >= limit) ? 0 : value + 1;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// verify results for atomicDec
|
// verify results for atomicDec
|
||||||
static int verifyResultDec(int value) {
|
static int verifyResultDec(int value) {
|
||||||
int limit = 25;
|
int limit = 25;
|
||||||
value = ((value == 0) || (value > limit)) ? limit : value - 1;
|
value = ((value == 0) || (value > limit)) ? limit : value - 1;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// common fuction to launch atomic functions kernel.
|
// common fuction to launch atomic functions kernel.
|
||||||
static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
|
static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
|
||||||
unsigned int memSize = sizeof(int) * 1;
|
unsigned int memSize = sizeof(int) * 1;
|
||||||
int *dptr{nullptr};
|
int *dptr{nullptr};
|
||||||
// allocate device memory
|
// allocate device memory
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
|
||||||
// copy host memory to device
|
// copy host memory to device
|
||||||
HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
|
||||||
// launch kernel function
|
// launch kernel function
|
||||||
if (TestToRun == 1) {
|
if (TestToRun == 1) {
|
||||||
AtomicCheckInc<<<1, 1>>>(dptr);
|
AtomicCheckInc<<<1, 1>>>(dptr);
|
||||||
} else if (TestToRun == 2) {
|
} else if (TestToRun == 2) {
|
||||||
AtomicCheckDec<<<1, 1>>>(dptr);
|
AtomicCheckDec<<<1, 1>>>(dptr);
|
||||||
}
|
}
|
||||||
// copy back from device to host
|
// copy back from device to host
|
||||||
HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
|
||||||
// verify the results.
|
// verify the results.
|
||||||
if (TestToRun == 1) {
|
if (TestToRun == 1) {
|
||||||
int result = verifyResultInc(val);
|
int result = verifyResultInc(val);
|
||||||
REQUIRE(result == Hptr[0]);
|
REQUIRE(result == Hptr[0]);
|
||||||
} else if (TestToRun == 2) {
|
} else if (TestToRun == 2) {
|
||||||
int result = verifyResultDec(val);
|
int result = verifyResultDec(val);
|
||||||
REQUIRE(result == Hptr[0]);
|
REQUIRE(result == Hptr[0]);
|
||||||
}
|
}
|
||||||
// Cleanup memory
|
// Cleanup memory
|
||||||
HIP_CHECK(hipFree(dptr));
|
HIP_CHECK(hipFree(dptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_AtomicFunctions_Inc") {
|
TEST_CASE("Unit_AtomicFunctions_Inc") {
|
||||||
int *Hptr{nullptr};
|
int *Hptr{nullptr};
|
||||||
int val;
|
int val;
|
||||||
// Allocate Host memory
|
// Allocate Host memory
|
||||||
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
||||||
SECTION("Test case when value is lesser than limit") {
|
SECTION("Test case when value is lesser than limit") {
|
||||||
val = Hptr[0] = 10;
|
val = Hptr[0] = 10;
|
||||||
launchAtomicFunction(Hptr, val, 1);
|
launchAtomicFunction(Hptr, val, 1);
|
||||||
}
|
}
|
||||||
SECTION("Test case when value is greater than limit") {
|
SECTION("Test case when value is greater than limit") {
|
||||||
val = Hptr[0] = 20;
|
val = Hptr[0] = 20;
|
||||||
launchAtomicFunction(Hptr, val, 1);
|
launchAtomicFunction(Hptr, val, 1);
|
||||||
}
|
}
|
||||||
SECTION("Test case when value is equal to the limit") {
|
SECTION("Test case when value is equal to the limit") {
|
||||||
val = Hptr[0] = 17;
|
val = Hptr[0] = 17;
|
||||||
launchAtomicFunction(Hptr, val, 1);
|
launchAtomicFunction(Hptr, val, 1);
|
||||||
}
|
}
|
||||||
free(Hptr);
|
free(Hptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_AtomicFunctions_Dec") {
|
TEST_CASE("Unit_AtomicFunctions_Dec") {
|
||||||
int *Hptr{nullptr};
|
int *Hptr{nullptr};
|
||||||
int val;
|
int val;
|
||||||
// Allocate Host memory
|
// Allocate Host memory
|
||||||
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
|
||||||
SECTION("Test case when value is less than limit") {
|
SECTION("Test case when value is less than limit") {
|
||||||
val = Hptr[0] = 4;
|
val = Hptr[0] = 4;
|
||||||
launchAtomicFunction(Hptr, val, 2);
|
launchAtomicFunction(Hptr, val, 2);
|
||||||
}
|
}
|
||||||
SECTION("Test case when value is greater than limit") {
|
SECTION("Test case when value is greater than limit") {
|
||||||
val = Hptr[0] = 31;
|
val = Hptr[0] = 31;
|
||||||
launchAtomicFunction(Hptr, val, 2);
|
launchAtomicFunction(Hptr, val, 2);
|
||||||
}
|
}
|
||||||
SECTION("Test case when value is equal to the limit") {
|
SECTION("Test case when value is equal to the limit") {
|
||||||
val = Hptr[0] = 25;
|
val = Hptr[0] = 25;
|
||||||
launchAtomicFunction(Hptr, val, 2);
|
launchAtomicFunction(Hptr, val, 2);
|
||||||
}
|
}
|
||||||
free(Hptr);
|
free(Hptr);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,81 +1,81 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__device__ void double_precision_intrinsics() {
|
__device__ void double_precision_intrinsics() {
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__dadd_rd(0.0, 1.0);
|
__dadd_rd(0.0, 1.0);
|
||||||
#endif
|
#endif
|
||||||
__dadd_rn(0.0, 1.0);
|
__dadd_rn(0.0, 1.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__dadd_ru(0.0, 1.0);
|
__dadd_ru(0.0, 1.0);
|
||||||
__dadd_rz(0.0, 1.0);
|
__dadd_rz(0.0, 1.0);
|
||||||
__ddiv_rd(0.0, 1.0);
|
__ddiv_rd(0.0, 1.0);
|
||||||
#endif
|
#endif
|
||||||
__ddiv_rn(0.0, 1.0);
|
__ddiv_rn(0.0, 1.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__ddiv_ru(0.0, 1.0);
|
__ddiv_ru(0.0, 1.0);
|
||||||
__ddiv_rz(0.0, 1.0);
|
__ddiv_rz(0.0, 1.0);
|
||||||
__dmul_rd(1.0, 2.0);
|
__dmul_rd(1.0, 2.0);
|
||||||
#endif
|
#endif
|
||||||
__dmul_rn(1.0, 2.0);
|
__dmul_rn(1.0, 2.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__dmul_ru(1.0, 2.0);
|
__dmul_ru(1.0, 2.0);
|
||||||
__dmul_rz(1.0, 2.0);
|
__dmul_rz(1.0, 2.0);
|
||||||
__drcp_rd(2.0);
|
__drcp_rd(2.0);
|
||||||
#endif
|
#endif
|
||||||
__drcp_rn(2.0);
|
__drcp_rn(2.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__drcp_ru(2.0);
|
__drcp_ru(2.0);
|
||||||
__drcp_rz(2.0);
|
__drcp_rz(2.0);
|
||||||
__dsqrt_rd(4.0);
|
__dsqrt_rd(4.0);
|
||||||
#endif
|
#endif
|
||||||
__dsqrt_rn(4.0);
|
__dsqrt_rn(4.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__dsqrt_ru(4.0);
|
__dsqrt_ru(4.0);
|
||||||
__dsqrt_rz(4.0);
|
__dsqrt_rz(4.0);
|
||||||
__dsub_rd(2.0, 1.0);
|
__dsub_rd(2.0, 1.0);
|
||||||
#endif
|
#endif
|
||||||
__dsub_rn(2.0, 1.0);
|
__dsub_rn(2.0, 1.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__dsub_ru(2.0, 1.0);
|
__dsub_ru(2.0, 1.0);
|
||||||
__dsub_rz(2.0, 1.0);
|
__dsub_rz(2.0, 1.0);
|
||||||
__fma_rd(1.0, 2.0, 3.0);
|
__fma_rd(1.0, 2.0, 3.0);
|
||||||
#endif
|
#endif
|
||||||
__fma_rn(1.0, 2.0, 3.0);
|
__fma_rn(1.0, 2.0, 3.0);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fma_ru(1.0, 2.0, 3.0);
|
__fma_ru(1.0, 2.0, 3.0);
|
||||||
__fma_rz(1.0, 2.0, 3.0);
|
__fma_rz(1.0, 2.0, 3.0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compileDoublePrecisionIntrinsics(int) {
|
__global__ void compileDoublePrecisionIntrinsics(int) {
|
||||||
double_precision_intrinsics();
|
double_precision_intrinsics();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_DoublePrecisionIntrinsics") {
|
TEST_CASE("Unit_DoublePrecisionIntrinsics") {
|
||||||
hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
|
hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
|
||||||
dim3(1, 1, 1), 0, 0, 1);
|
dim3(1, 1, 1), 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,133 +1,133 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__device__ void double_precision_math_functions() {
|
__device__ void double_precision_math_functions() {
|
||||||
int iX;
|
int iX;
|
||||||
double fX, fY;
|
double fX, fY;
|
||||||
|
|
||||||
acos(1.0);
|
acos(1.0);
|
||||||
acosh(1.0);
|
acosh(1.0);
|
||||||
asin(0.0);
|
asin(0.0);
|
||||||
asinh(0.0);
|
asinh(0.0);
|
||||||
atan(0.0);
|
atan(0.0);
|
||||||
atan2(0.0, 1.0);
|
atan2(0.0, 1.0);
|
||||||
atanh(0.0);
|
atanh(0.0);
|
||||||
cbrt(0.0);
|
cbrt(0.0);
|
||||||
ceil(0.0);
|
ceil(0.0);
|
||||||
copysign(1.0, -2.0);
|
copysign(1.0, -2.0);
|
||||||
cos(0.0);
|
cos(0.0);
|
||||||
cosh(0.0);
|
cosh(0.0);
|
||||||
cospi(0.0);
|
cospi(0.0);
|
||||||
cyl_bessel_i0(0.0);
|
cyl_bessel_i0(0.0);
|
||||||
cyl_bessel_i1(0.0);
|
cyl_bessel_i1(0.0);
|
||||||
erf(0.0);
|
erf(0.0);
|
||||||
erfc(0.0);
|
erfc(0.0);
|
||||||
erfcinv(2.0);
|
erfcinv(2.0);
|
||||||
erfcx(0.0);
|
erfcx(0.0);
|
||||||
erfinv(1.0);
|
erfinv(1.0);
|
||||||
exp(0.0);
|
exp(0.0);
|
||||||
exp10(0.0);
|
exp10(0.0);
|
||||||
exp2(0.0);
|
exp2(0.0);
|
||||||
expm1(0.0);
|
expm1(0.0);
|
||||||
fabs(1.0);
|
fabs(1.0);
|
||||||
fdim(1.0, 0.0);
|
fdim(1.0, 0.0);
|
||||||
floor(0.0);
|
floor(0.0);
|
||||||
fma(1.0, 2.0, 3.0);
|
fma(1.0, 2.0, 3.0);
|
||||||
fmax(0.0, 0.0);
|
fmax(0.0, 0.0);
|
||||||
fmin(0.0, 0.0);
|
fmin(0.0, 0.0);
|
||||||
fmod(0.0, 1.0);
|
fmod(0.0, 1.0);
|
||||||
frexp(0.0, &iX);
|
frexp(0.0, &iX);
|
||||||
hypot(1.0, 0.0);
|
hypot(1.0, 0.0);
|
||||||
ilogb(1.0);
|
ilogb(1.0);
|
||||||
isfinite(0.0);
|
isfinite(0.0);
|
||||||
isinf(0.0);
|
isinf(0.0);
|
||||||
isnan(0.0);
|
isnan(0.0);
|
||||||
j0(0.0);
|
j0(0.0);
|
||||||
j1(0.0);
|
j1(0.0);
|
||||||
jn(-1.0, 1.0);
|
jn(-1.0, 1.0);
|
||||||
ldexp(0.0, 0);
|
ldexp(0.0, 0);
|
||||||
lgamma(1.0);
|
lgamma(1.0);
|
||||||
llrint(0.0);
|
llrint(0.0);
|
||||||
llround(0.0);
|
llround(0.0);
|
||||||
log(1.0);
|
log(1.0);
|
||||||
log10(1.0);
|
log10(1.0);
|
||||||
log1p(-1.0);
|
log1p(-1.0);
|
||||||
log2(1.0);
|
log2(1.0);
|
||||||
logb(1.0);
|
logb(1.0);
|
||||||
lrint(0.0);
|
lrint(0.0);
|
||||||
lround(0.0);
|
lround(0.0);
|
||||||
modf(0.0, &fX);
|
modf(0.0, &fX);
|
||||||
nan("1");
|
nan("1");
|
||||||
nearbyint(0.0);
|
nearbyint(0.0);
|
||||||
nextafter(0.0, 0.0);
|
nextafter(0.0, 0.0);
|
||||||
fX = 1.0;
|
fX = 1.0;
|
||||||
norm(1, &fX);
|
norm(1, &fX);
|
||||||
norm3d(1.0, 0.0, 0.0);
|
norm3d(1.0, 0.0, 0.0);
|
||||||
norm4d(1.0, 0.0, 0.0, 0.0);
|
norm4d(1.0, 0.0, 0.0, 0.0);
|
||||||
normcdf(0.0);
|
normcdf(0.0);
|
||||||
normcdfinv(1.0);
|
normcdfinv(1.0);
|
||||||
pow(1.0, 0.0);
|
pow(1.0, 0.0);
|
||||||
rcbrt(1.0);
|
rcbrt(1.0);
|
||||||
remainder(2.0, 1.0);
|
remainder(2.0, 1.0);
|
||||||
remquo(1.0, 2.0, &iX);
|
remquo(1.0, 2.0, &iX);
|
||||||
rhypot(0.0, 1.0);
|
rhypot(0.0, 1.0);
|
||||||
rint(1.0);
|
rint(1.0);
|
||||||
fX = 1.0;
|
fX = 1.0;
|
||||||
rnorm(1, &fX);
|
rnorm(1, &fX);
|
||||||
rnorm3d(0.0, 0.0, 1.0);
|
rnorm3d(0.0, 0.0, 1.0);
|
||||||
rnorm4d(0.0, 0.0, 0.0, 1.0);
|
rnorm4d(0.0, 0.0, 0.0, 1.0);
|
||||||
round(0.0);
|
round(0.0);
|
||||||
rsqrt(1.0);
|
rsqrt(1.0);
|
||||||
scalbln(0.0, 1);
|
scalbln(0.0, 1);
|
||||||
scalbn(0.0, 1);
|
scalbn(0.0, 1);
|
||||||
signbit(1.0);
|
signbit(1.0);
|
||||||
sin(0.0);
|
sin(0.0);
|
||||||
#if HT_AMD
|
#if HT_AMD
|
||||||
// NV A100 has a bug in sincos(), so temporarily disbale it
|
// NV A100 has a bug in sincos(), so temporarily disbale it
|
||||||
sincos(0.0, &fX, &fY);
|
sincos(0.0, &fX, &fY);
|
||||||
#endif
|
#endif
|
||||||
sincospi(0.0, &fX, &fY);
|
sincospi(0.0, &fX, &fY);
|
||||||
sinh(0.0);
|
sinh(0.0);
|
||||||
sinpi(0.0);
|
sinpi(0.0);
|
||||||
sqrt(0.0);
|
sqrt(0.0);
|
||||||
tan(0.0);
|
tan(0.0);
|
||||||
tanh(0.0);
|
tanh(0.0);
|
||||||
tgamma(2.0);
|
tgamma(2.0);
|
||||||
trunc(0.0);
|
trunc(0.0);
|
||||||
y0(1.0);
|
y0(1.0);
|
||||||
y1(1.0);
|
y1(1.0);
|
||||||
yn(1, 1.0);
|
yn(1, 1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compileDoublePrecisionMathOnDevice(int) {
|
__global__ void compileDoublePrecisionMathOnDevice(int) {
|
||||||
double_precision_math_functions();
|
double_precision_math_functions();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_DoublePrecisionMathDevice") {
|
TEST_CASE("Unit_DoublePrecisionMathDevice") {
|
||||||
hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
|
hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
|
||||||
dim3(1, 1, 1), 0, 0, 1);
|
dim3(1, 1, 1), 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,117 +1,117 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__host__ static void double_precision_math_functions() {
|
__host__ static void double_precision_math_functions() {
|
||||||
int iX;
|
int iX;
|
||||||
double fX, fY;
|
double fX, fY;
|
||||||
|
|
||||||
acos(1.0);
|
acos(1.0);
|
||||||
acosh(1.0);
|
acosh(1.0);
|
||||||
asin(0.0);
|
asin(0.0);
|
||||||
asinh(0.0);
|
asinh(0.0);
|
||||||
atan(0.0);
|
atan(0.0);
|
||||||
atan2(0.0, 1.0);
|
atan2(0.0, 1.0);
|
||||||
atanh(0.0);
|
atanh(0.0);
|
||||||
cbrt(0.0);
|
cbrt(0.0);
|
||||||
ceil(0.0);
|
ceil(0.0);
|
||||||
copysign(1.0, -2.0);
|
copysign(1.0, -2.0);
|
||||||
cos(0.0);
|
cos(0.0);
|
||||||
cosh(0.0);
|
cosh(0.0);
|
||||||
erf(0.0);
|
erf(0.0);
|
||||||
erfc(0.0);
|
erfc(0.0);
|
||||||
exp(0.0);
|
exp(0.0);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
exp10(0.0);
|
exp10(0.0);
|
||||||
#endif
|
#endif
|
||||||
exp2(0.0);
|
exp2(0.0);
|
||||||
expm1(0.0);
|
expm1(0.0);
|
||||||
fabs(1.0);
|
fabs(1.0);
|
||||||
fdim(1.0, 0.0);
|
fdim(1.0, 0.0);
|
||||||
floor(0.0);
|
floor(0.0);
|
||||||
fma(1.0, 2.0, 3.0);
|
fma(1.0, 2.0, 3.0);
|
||||||
fmax(0.0, 0.0);
|
fmax(0.0, 0.0);
|
||||||
fmin(0.0, 0.0);
|
fmin(0.0, 0.0);
|
||||||
fmod(0.0, 1.0);
|
fmod(0.0, 1.0);
|
||||||
frexp(0.0, &iX);
|
frexp(0.0, &iX);
|
||||||
hypot(1.0, 0.0);
|
hypot(1.0, 0.0);
|
||||||
ilogb(1.0);
|
ilogb(1.0);
|
||||||
std::isfinite(0.0);
|
std::isfinite(0.0);
|
||||||
std::isinf(0.0);
|
std::isinf(0.0);
|
||||||
std::isnan(0.0);
|
std::isnan(0.0);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
j0(0.0);
|
j0(0.0);
|
||||||
j1(0.0);
|
j1(0.0);
|
||||||
jn(-1.0, 1.0);
|
jn(-1.0, 1.0);
|
||||||
#elif _WIN64
|
#elif _WIN64
|
||||||
_j0(0.0);
|
_j0(0.0);
|
||||||
_j1(0.0);
|
_j1(0.0);
|
||||||
_jn(-1.0, 1.0);
|
_jn(-1.0, 1.0);
|
||||||
#endif
|
#endif
|
||||||
ldexp(0.0, 0);
|
ldexp(0.0, 0);
|
||||||
llrint(0.0);
|
llrint(0.0);
|
||||||
llround(0.0);
|
llround(0.0);
|
||||||
log(1.0);
|
log(1.0);
|
||||||
log10(1.0);
|
log10(1.0);
|
||||||
log1p(-1.0);
|
log1p(-1.0);
|
||||||
log2(1.0);
|
log2(1.0);
|
||||||
logb(1.0);
|
logb(1.0);
|
||||||
lrint(0.0);
|
lrint(0.0);
|
||||||
lround(0.0);
|
lround(0.0);
|
||||||
modf(0.0, &fX);
|
modf(0.0, &fX);
|
||||||
nan("1");
|
nan("1");
|
||||||
nearbyint(0.0);
|
nearbyint(0.0);
|
||||||
fX = 1.0;
|
fX = 1.0;
|
||||||
pow(1.0, 0.0);
|
pow(1.0, 0.0);
|
||||||
remainder(2.0, 1.0);
|
remainder(2.0, 1.0);
|
||||||
remquo(1.0, 2.0, &iX);
|
remquo(1.0, 2.0, &iX);
|
||||||
rint(1.0);
|
rint(1.0);
|
||||||
round(0.0);
|
round(0.0);
|
||||||
scalbln(0.0, 1);
|
scalbln(0.0, 1);
|
||||||
scalbn(0.0, 1);
|
scalbn(0.0, 1);
|
||||||
std::signbit(1.0);
|
std::signbit(1.0);
|
||||||
sin(0.0);
|
sin(0.0);
|
||||||
#ifdef _unix__
|
#ifdef _unix__
|
||||||
sincos(0.0, &fX, &fY);
|
sincos(0.0, &fX, &fY);
|
||||||
#endif
|
#endif
|
||||||
sinh(0.0);
|
sinh(0.0);
|
||||||
sqrt(0.0);
|
sqrt(0.0);
|
||||||
tan(0.0);
|
tan(0.0);
|
||||||
tanh(0.0);
|
tanh(0.0);
|
||||||
tgamma(2.0);
|
tgamma(2.0);
|
||||||
trunc(0.0);
|
trunc(0.0);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
y0(1.0);
|
y0(1.0);
|
||||||
y1(1.0);
|
y1(1.0);
|
||||||
yn(1, 1.0);
|
yn(1, 1.0);
|
||||||
#elif _WIN64
|
#elif _WIN64
|
||||||
_y0(1.0);
|
_y0(1.0);
|
||||||
_y1(1.0);
|
_y1(1.0);
|
||||||
_yn(1, 1.0);
|
_yn(1, 1.0);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_DoublePrecisionMathHost") {
|
TEST_CASE("Unit_DoublePrecisionMathHost") {
|
||||||
double_precision_math_functions();
|
double_precision_math_functions();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,128 +1,128 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip/math_functions.h>
|
#include <hip/math_functions.h>
|
||||||
|
|
||||||
__device__ void FloatMathPrecise() {
|
__device__ void FloatMathPrecise() {
|
||||||
int iX;
|
int iX;
|
||||||
float fX, fY;
|
float fX, fY;
|
||||||
|
|
||||||
acosf(1.0f);
|
acosf(1.0f);
|
||||||
acoshf(1.0f);
|
acoshf(1.0f);
|
||||||
asinf(0.0f);
|
asinf(0.0f);
|
||||||
asinhf(0.0f);
|
asinhf(0.0f);
|
||||||
atan2f(0.0f, 1.0f);
|
atan2f(0.0f, 1.0f);
|
||||||
atanf(0.0f);
|
atanf(0.0f);
|
||||||
atanhf(0.0f);
|
atanhf(0.0f);
|
||||||
cbrtf(0.0f);
|
cbrtf(0.0f);
|
||||||
fX = ceilf(0.0f);
|
fX = ceilf(0.0f);
|
||||||
fX = copysignf(1.0f, -2.0f);
|
fX = copysignf(1.0f, -2.0f);
|
||||||
cosf(0.0f);
|
cosf(0.0f);
|
||||||
coshf(0.0f);
|
coshf(0.0f);
|
||||||
cospif(0.0f);
|
cospif(0.0f);
|
||||||
cyl_bessel_i0f(0.0f);
|
cyl_bessel_i0f(0.0f);
|
||||||
cyl_bessel_i1f(0.0f);
|
cyl_bessel_i1f(0.0f);
|
||||||
erfcf(0.0f);
|
erfcf(0.0f);
|
||||||
erfcinvf(2.0f);
|
erfcinvf(2.0f);
|
||||||
erfcxf(0.0f);
|
erfcxf(0.0f);
|
||||||
erff(0.0f);
|
erff(0.0f);
|
||||||
erfinvf(1.0f);
|
erfinvf(1.0f);
|
||||||
exp10f(0.0f);
|
exp10f(0.0f);
|
||||||
exp2f(0.0f);
|
exp2f(0.0f);
|
||||||
expf(0.0f);
|
expf(0.0f);
|
||||||
expm1f(0.0f);
|
expm1f(0.0f);
|
||||||
fX = fabsf(1.0f);
|
fX = fabsf(1.0f);
|
||||||
fdimf(1.0f, 0.0f);
|
fdimf(1.0f, 0.0f);
|
||||||
fdividef(0.0f, 1.0f);
|
fdividef(0.0f, 1.0f);
|
||||||
fX = floorf(0.0f);
|
fX = floorf(0.0f);
|
||||||
fmaf(1.0f, 2.0f, 3.0f);
|
fmaf(1.0f, 2.0f, 3.0f);
|
||||||
fX = fmaxf(0.0f, 0.0f);
|
fX = fmaxf(0.0f, 0.0f);
|
||||||
fX = fminf(0.0f, 0.0f);
|
fX = fminf(0.0f, 0.0f);
|
||||||
fmodf(0.0f, 1.0f);
|
fmodf(0.0f, 1.0f);
|
||||||
frexpf(0.0f, &iX);
|
frexpf(0.0f, &iX);
|
||||||
hypotf(1.0f, 0.0f);
|
hypotf(1.0f, 0.0f);
|
||||||
ilogbf(1.0f);
|
ilogbf(1.0f);
|
||||||
isfinite(0.0f);
|
isfinite(0.0f);
|
||||||
fX = isinf(0.0f);
|
fX = isinf(0.0f);
|
||||||
fX = isnan(0.0f);
|
fX = isnan(0.0f);
|
||||||
j0f(0.0f);
|
j0f(0.0f);
|
||||||
j1f(0.0f);
|
j1f(0.0f);
|
||||||
jnf(-1.0f, 1.0f);
|
jnf(-1.0f, 1.0f);
|
||||||
ldexpf(0.0f, 0);
|
ldexpf(0.0f, 0);
|
||||||
lgammaf(1.0f);
|
lgammaf(1.0f);
|
||||||
llrintf(0.0f);
|
llrintf(0.0f);
|
||||||
llroundf(0.0f);
|
llroundf(0.0f);
|
||||||
log10f(1.0f);
|
log10f(1.0f);
|
||||||
log1pf(-1.0f);
|
log1pf(-1.0f);
|
||||||
log2f(1.0f);
|
log2f(1.0f);
|
||||||
logbf(1.0f);
|
logbf(1.0f);
|
||||||
logf(1.0f);
|
logf(1.0f);
|
||||||
lrintf(0.0f);
|
lrintf(0.0f);
|
||||||
lroundf(0.0f);
|
lroundf(0.0f);
|
||||||
modff(0.0f, &fX);
|
modff(0.0f, &fX);
|
||||||
fX = nanf("1");
|
fX = nanf("1");
|
||||||
fX = nearbyintf(0.0f);
|
fX = nearbyintf(0.0f);
|
||||||
nextafterf(0.0f, 0.0f);
|
nextafterf(0.0f, 0.0f);
|
||||||
norm3df(1.0f, 0.0f, 0.0f);
|
norm3df(1.0f, 0.0f, 0.0f);
|
||||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||||
normcdff(0.0f);
|
normcdff(0.0f);
|
||||||
normcdfinvf(1.0f);
|
normcdfinvf(1.0f);
|
||||||
fX = 1.0f;
|
fX = 1.0f;
|
||||||
normf(1, &fX);
|
normf(1, &fX);
|
||||||
powf(1.0f, 0.0f);
|
powf(1.0f, 0.0f);
|
||||||
rcbrtf(1.0f);
|
rcbrtf(1.0f);
|
||||||
remainderf(2.0f, 1.0f);
|
remainderf(2.0f, 1.0f);
|
||||||
remquof(1.0f, 2.0f, &iX);
|
remquof(1.0f, 2.0f, &iX);
|
||||||
rhypotf(0.0f, 1.0f);
|
rhypotf(0.0f, 1.0f);
|
||||||
fY = rintf(1.0f);
|
fY = rintf(1.0f);
|
||||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||||
fX = 1.0f;
|
fX = 1.0f;
|
||||||
rnormf(1, &fX);
|
rnormf(1, &fX);
|
||||||
fY = roundf(0.0f);
|
fY = roundf(0.0f);
|
||||||
rsqrtf(1.0f);
|
rsqrtf(1.0f);
|
||||||
scalblnf(0.0f, 1);
|
scalblnf(0.0f, 1);
|
||||||
scalbnf(0.0f, 1);
|
scalbnf(0.0f, 1);
|
||||||
signbit(1.0f);
|
signbit(1.0f);
|
||||||
sincosf(0.0f, &fX, &fY);
|
sincosf(0.0f, &fX, &fY);
|
||||||
sincospif(0.0f, &fX, &fY);
|
sincospif(0.0f, &fX, &fY);
|
||||||
sinf(0.0f);
|
sinf(0.0f);
|
||||||
sinhf(0.0f);
|
sinhf(0.0f);
|
||||||
sinpif(0.0f);
|
sinpif(0.0f);
|
||||||
sqrtf(0.0f);
|
sqrtf(0.0f);
|
||||||
tanf(0.0f);
|
tanf(0.0f);
|
||||||
tanhf(0.0f);
|
tanhf(0.0f);
|
||||||
tgammaf(2.0f);
|
tgammaf(2.0f);
|
||||||
fY = truncf(0.0f);
|
fY = truncf(0.0f);
|
||||||
y0f(1.0f);
|
y0f(1.0f);
|
||||||
y1f(1.0f);
|
y1f(1.0f);
|
||||||
ynf(1, 1.0f);
|
ynf(1, 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void CompileFloatMathPrecise(int) {
|
__global__ void CompileFloatMathPrecise(int) {
|
||||||
FloatMathPrecise();
|
FloatMathPrecise();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_FloatMathPrecise") {
|
TEST_CASE("Unit_FloatMathPrecise") {
|
||||||
hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
|
hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
|
||||||
dim3(1, 1, 1), 0, 0, 1);
|
dim3(1, 1, 1), 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,68 +1,68 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip/device_functions.h>
|
#include <hip/device_functions.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__device__ void integer_intrinsics() {
|
__device__ void integer_intrinsics() {
|
||||||
__brev((unsigned int)10);
|
__brev((unsigned int)10);
|
||||||
__brevll((uint64_t)10);
|
__brevll((uint64_t)10);
|
||||||
__byte_perm((unsigned int)0, (unsigned int)0, 0);
|
__byte_perm((unsigned int)0, (unsigned int)0, 0);
|
||||||
__clz(static_cast<int>(10));
|
__clz(static_cast<int>(10));
|
||||||
__clzll((int64_t)10);
|
__clzll((int64_t)10);
|
||||||
__ffs(static_cast<int>(10));
|
__ffs(static_cast<int>(10));
|
||||||
__ffsll((long long)(10)); // NOLINT
|
__ffsll((long long)(10)); // NOLINT
|
||||||
__funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
__funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||||
__funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
__funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||||
__funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
__funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||||
__funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
__funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
|
||||||
__hadd(static_cast<int>(1), static_cast<int>(3));
|
__hadd(static_cast<int>(1), static_cast<int>(3));
|
||||||
__mul24(static_cast<int>(1), static_cast<int>(2));
|
__mul24(static_cast<int>(1), static_cast<int>(2));
|
||||||
__mul64hi((int64_t)1, (int64_t)2);
|
__mul64hi((int64_t)1, (int64_t)2);
|
||||||
__mulhi(static_cast<int>(1), static_cast<int>(2));
|
__mulhi(static_cast<int>(1), static_cast<int>(2));
|
||||||
__popc((unsigned int)4);
|
__popc((unsigned int)4);
|
||||||
__popcll((uint64_t)4);
|
__popcll((uint64_t)4);
|
||||||
int a = min(static_cast<int>(4), static_cast<int>(5));
|
int a = min(static_cast<int>(4), static_cast<int>(5));
|
||||||
int b = max(static_cast<int>(4), static_cast<int>(5));
|
int b = max(static_cast<int>(4), static_cast<int>(5));
|
||||||
__rhadd(static_cast<int>(1), static_cast<int>(2));
|
__rhadd(static_cast<int>(1), static_cast<int>(2));
|
||||||
__sad(static_cast<int>(1), static_cast<int>(2), 0);
|
__sad(static_cast<int>(1), static_cast<int>(2), 0);
|
||||||
__uhadd((unsigned int)1, (unsigned int)3);
|
__uhadd((unsigned int)1, (unsigned int)3);
|
||||||
__umul24((unsigned int)1, (unsigned int)2);
|
__umul24((unsigned int)1, (unsigned int)2);
|
||||||
__umul64hi((uint64_t)1, (uint64_t)2);
|
__umul64hi((uint64_t)1, (uint64_t)2);
|
||||||
__umulhi((unsigned int)1, (unsigned int)2);
|
__umulhi((unsigned int)1, (unsigned int)2);
|
||||||
__urhadd((unsigned int)1, (unsigned int)2);
|
__urhadd((unsigned int)1, (unsigned int)2);
|
||||||
__usad((unsigned int)1, (unsigned int)2, 0);
|
__usad((unsigned int)1, (unsigned int)2, 0);
|
||||||
|
|
||||||
assert(1);
|
assert(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compileIntegerIntrinsics(int) {
|
__global__ void compileIntegerIntrinsics(int) {
|
||||||
integer_intrinsics();
|
integer_intrinsics();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_IntegerIntrinsics") {
|
TEST_CASE("Unit_IntegerIntrinsics") {
|
||||||
hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
|
hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
|
||||||
dim3(1, 1, 1), 0, 0, 1);
|
dim3(1, 1, 1), 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,298 +1,298 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Auto-Verification Code
|
// Auto-Verification Code
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
bool verifyBitwise(...) {
|
bool verifyBitwise(...) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||||
bool verifyBitwise(T* gpuData, int len) {
|
bool verifyBitwise(T* gpuData, int len) {
|
||||||
// Atomic and
|
// Atomic and
|
||||||
T val = 0xff;
|
T val = 0xff;
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
// 9th element should be 1
|
// 9th element should be 1
|
||||||
val &= (2 * i + 7);
|
val &= (2 * i + 7);
|
||||||
}
|
}
|
||||||
REQUIRE(val == gpuData[8]);
|
REQUIRE(val == gpuData[8]);
|
||||||
|
|
||||||
// atomic Or
|
// atomic Or
|
||||||
val = 0;
|
val = 0;
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
// 10th element should be 0xff
|
// 10th element should be 0xff
|
||||||
val |= (1 << i);
|
val |= (1 << i);
|
||||||
}
|
}
|
||||||
REQUIRE(val == gpuData[9]);
|
REQUIRE(val == gpuData[9]);
|
||||||
|
|
||||||
// atomic Xor
|
// atomic Xor
|
||||||
val = 0xff;
|
val = 0xff;
|
||||||
|
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
// 11th element should be 0xff
|
// 11th element should be 0xff
|
||||||
val ^= i;
|
val ^= i;
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[10]);
|
REQUIRE(val == gpuData[10]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool verifySub(...) {
|
bool verifySub(...) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<
|
template<
|
||||||
typename T,
|
typename T,
|
||||||
typename enable_if<
|
typename enable_if<
|
||||||
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
||||||
bool verifySub(T* gpuData, int len) {
|
bool verifySub(T* gpuData, int len) {
|
||||||
T val = 0;
|
T val = 0;
|
||||||
|
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
val -= 10;
|
val -= 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[1]);
|
REQUIRE(val == gpuData[1]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool verifyExch(...) {
|
bool verifyExch(...) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
|
template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
|
||||||
bool computeExchExch(T* gpuData, int len) {
|
bool computeExchExch(T* gpuData, int len) {
|
||||||
T val = 0;
|
T val = 0;
|
||||||
|
|
||||||
for (T i = 0; i < len; ++i) {
|
for (T i = 0; i < len; ++i) {
|
||||||
if (i == gpuData[2]) {
|
if (i == gpuData[2]) {
|
||||||
return true;
|
return true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VerifyIntegral(...) {
|
bool VerifyIntegral(...) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||||
bool VerifyIntegral(T* gpuData, int len) {
|
bool VerifyIntegral(T* gpuData, int len) {
|
||||||
// atomic Max
|
// atomic Max
|
||||||
T val = 0;
|
T val = 0;
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
// fourth element should be len-1
|
// fourth element should be len-1
|
||||||
val = max(val, static_cast<T>(i));
|
val = max(val, static_cast<T>(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[3]);
|
REQUIRE(val == gpuData[3]);
|
||||||
|
|
||||||
// atomic Min
|
// atomic Min
|
||||||
val = 1 << 8;
|
val = 1 << 8;
|
||||||
|
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
val = min(val, static_cast<T>(i));
|
val = min(val, static_cast<T>(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[4]);
|
REQUIRE(val == gpuData[4]);
|
||||||
|
|
||||||
// atomic Inc
|
// atomic Inc
|
||||||
T limit = 17;
|
T limit = 17;
|
||||||
val = 0;
|
val = 0;
|
||||||
|
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
val = (val >= limit) ? 0 : val + 1;
|
val = (val >= limit) ? 0 : val + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[5]);
|
REQUIRE(val == gpuData[5]);
|
||||||
|
|
||||||
// atomic Dec
|
// atomic Dec
|
||||||
limit = 137;
|
limit = 137;
|
||||||
val = 0;
|
val = 0;
|
||||||
|
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
val = ((val == 0) || (val > limit)) ? limit : val - 1;
|
val = ((val == 0) || (val > limit)) ? limit : val - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[6]);
|
REQUIRE(val == gpuData[6]);
|
||||||
|
|
||||||
// atomic CAS
|
// atomic CAS
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
// eighth element should be a member of [0, len)
|
// eighth element should be a member of [0, len)
|
||||||
if (static_cast<T>(i) == gpuData[7]) {
|
if (static_cast<T>(i) == gpuData[7]) {
|
||||||
return true;
|
return true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
|
return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
bool verifyData(T* gpuData, int len) {
|
bool verifyData(T* gpuData, int len) {
|
||||||
T val = 0;
|
T val = 0;
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
val += 10;
|
val += 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
REQUIRE(val == gpuData[0]);
|
REQUIRE(val == gpuData[0]);
|
||||||
return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
|
return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void testKernelExch(...) {}
|
void testKernelExch(...) {}
|
||||||
|
|
||||||
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
|
template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
|
||||||
__device__
|
__device__
|
||||||
void testKernelExch(T* g_odata) {
|
void testKernelExch(T* g_odata) {
|
||||||
// access thread id
|
// access thread id
|
||||||
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
// Atomic exchange
|
// Atomic exchange
|
||||||
atomicExch(&g_odata[2], tid);
|
atomicExch(&g_odata[2], tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void testKernelSub(...) {}
|
void testKernelSub(...) {}
|
||||||
|
|
||||||
template<
|
template<
|
||||||
typename T,
|
typename T,
|
||||||
typename enable_if<
|
typename enable_if<
|
||||||
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
|
||||||
__device__
|
__device__
|
||||||
void testKernelSub(T* g_odata) {
|
void testKernelSub(T* g_odata) {
|
||||||
// Atomic subtraction (final should be 0)
|
// Atomic subtraction (final should be 0)
|
||||||
atomicSub(&g_odata[1], 10);
|
atomicSub(&g_odata[1], 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void testKernelIntegral(...) {}
|
void testKernelIntegral(...) {}
|
||||||
|
|
||||||
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
|
||||||
__device__
|
__device__
|
||||||
void testKernelIntegral(T* g_odata) {
|
void testKernelIntegral(T* g_odata) {
|
||||||
// access thread id
|
// access thread id
|
||||||
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
const T tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
// Atomic maximum
|
// Atomic maximum
|
||||||
atomicMax(&g_odata[3], tid);
|
atomicMax(&g_odata[3], tid);
|
||||||
|
|
||||||
// Atomic minimum
|
// Atomic minimum
|
||||||
atomicMin(&g_odata[4], tid);
|
atomicMin(&g_odata[4], tid);
|
||||||
|
|
||||||
// Atomic increment (modulo 17+1)
|
// Atomic increment (modulo 17+1)
|
||||||
atomicInc((unsigned int*)&g_odata[5], 17);
|
atomicInc((unsigned int*)&g_odata[5], 17);
|
||||||
|
|
||||||
// Atomic decrement
|
// Atomic decrement
|
||||||
atomicDec((unsigned int*)&g_odata[6], 137);
|
atomicDec((unsigned int*)&g_odata[6], 137);
|
||||||
|
|
||||||
// Atomic compare-and-swap
|
// Atomic compare-and-swap
|
||||||
atomicCAS(&g_odata[7], tid - 1, tid);
|
atomicCAS(&g_odata[7], tid - 1, tid);
|
||||||
|
|
||||||
// Bitwise atomic instructions
|
// Bitwise atomic instructions
|
||||||
|
|
||||||
// Atomic AND
|
// Atomic AND
|
||||||
atomicAnd(&g_odata[8], 2 * tid + 7);
|
atomicAnd(&g_odata[8], 2 * tid + 7);
|
||||||
|
|
||||||
// Atomic OR
|
// Atomic OR
|
||||||
atomicOr(&g_odata[9], 1 << tid);
|
atomicOr(&g_odata[9], 1 << tid);
|
||||||
|
|
||||||
// Atomic XOR
|
// Atomic XOR
|
||||||
atomicXor(&g_odata[10], tid);
|
atomicXor(&g_odata[10], tid);
|
||||||
|
|
||||||
testKernelSub(g_odata);
|
testKernelSub(g_odata);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
__global__ void testKernel(T* g_odata) {
|
__global__ void testKernel(T* g_odata) {
|
||||||
// Atomic addition
|
// Atomic addition
|
||||||
atomicAdd(&g_odata[0], 10);
|
atomicAdd(&g_odata[0], 10);
|
||||||
testKernelIntegral(g_odata);
|
testKernelIntegral(g_odata);
|
||||||
testKernelExch(g_odata);
|
testKernelExch(g_odata);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static void runTest() {
|
static void runTest() {
|
||||||
bool testResult = true;
|
bool testResult = true;
|
||||||
unsigned int numThreads = 256;
|
unsigned int numThreads = 256;
|
||||||
unsigned int numBlocks = 64;
|
unsigned int numBlocks = 64;
|
||||||
unsigned int numData = 11;
|
unsigned int numData = 11;
|
||||||
unsigned int memSize = sizeof(T) * numData;
|
unsigned int memSize = sizeof(T) * numData;
|
||||||
|
|
||||||
// allocate mem for the result on host side
|
// allocate mem for the result on host side
|
||||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||||
|
|
||||||
// initialize the memory
|
// initialize the memory
|
||||||
for (unsigned int i = 0; i < numData; i++) {
|
for (unsigned int i = 0; i < numData; i++) {
|
||||||
hOData[i] = 0;
|
hOData[i] = 0;
|
||||||
}
|
}
|
||||||
// To make the AND and XOR tests generate something other than 0...
|
// To make the AND and XOR tests generate something other than 0...
|
||||||
hOData[8] = hOData[10] = 0xff;
|
hOData[8] = hOData[10] = 0xff;
|
||||||
|
|
||||||
// allocate device memory for result
|
// allocate device memory for result
|
||||||
T* dOData;
|
T* dOData;
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
|
||||||
// copy host memory to device to initialize to zero
|
// copy host memory to device to initialize to zero
|
||||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// execute the kernel
|
// execute the kernel
|
||||||
hipLaunchKernelGGL(
|
hipLaunchKernelGGL(
|
||||||
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
|
||||||
|
|
||||||
// Copy result from device to host
|
// Copy result from device to host
|
||||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
// Compute reference solution
|
// Compute reference solution
|
||||||
REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
|
REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
|
||||||
|
|
||||||
// Cleanup memory
|
// Cleanup memory
|
||||||
free(hOData);
|
free(hOData);
|
||||||
HIP_CHECK(hipFree(dOData));
|
HIP_CHECK(hipFree(dOData));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_SimpleAtomicsTest") {
|
TEST_CASE("Unit_SimpleAtomicsTest") {
|
||||||
SECTION("test for int") {
|
SECTION("test for int") {
|
||||||
runTest<int>();
|
runTest<int>();
|
||||||
}
|
}
|
||||||
SECTION("test for unsigned int") {
|
SECTION("test for unsigned int") {
|
||||||
runTest<unsigned int>();
|
runTest<unsigned int>();
|
||||||
}
|
}
|
||||||
SECTION("test for float") {
|
SECTION("test for float") {
|
||||||
runTest<float>();
|
runTest<float>();
|
||||||
}
|
}
|
||||||
#if HT_AMD
|
#if HT_AMD
|
||||||
SECTION("test for unsigned long long") {
|
SECTION("test for unsigned long long") {
|
||||||
runTest<uint64_t>();
|
runTest<uint64_t>();
|
||||||
}
|
}
|
||||||
SECTION("test for double") {
|
SECTION("test for double") {
|
||||||
runTest<double>();
|
runTest<double>();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,101 +1,101 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip/device_functions.h>
|
#include <hip/device_functions.h>
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__device__ void single_precision_intrinsics() {
|
__device__ void single_precision_intrinsics() {
|
||||||
float fX, fY;
|
float fX, fY;
|
||||||
|
|
||||||
__cosf(0.0f);
|
__cosf(0.0f);
|
||||||
__exp10f(0.0f);
|
__exp10f(0.0f);
|
||||||
__expf(0.0f);
|
__expf(0.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fadd_rd(0.0f, 1.0f);
|
__fadd_rd(0.0f, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
__fadd_rn(0.0f, 1.0f);
|
__fadd_rn(0.0f, 1.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fadd_ru(0.0f, 1.0f);
|
__fadd_ru(0.0f, 1.0f);
|
||||||
__fadd_rz(0.0f, 1.0f);
|
__fadd_rz(0.0f, 1.0f);
|
||||||
__fdiv_rd(4.0f, 2.0f);
|
__fdiv_rd(4.0f, 2.0f);
|
||||||
#endif
|
#endif
|
||||||
__fdiv_rn(4.0f, 2.0f);
|
__fdiv_rn(4.0f, 2.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fdiv_ru(4.0f, 2.0f);
|
__fdiv_ru(4.0f, 2.0f);
|
||||||
__fdiv_rz(4.0f, 2.0f);
|
__fdiv_rz(4.0f, 2.0f);
|
||||||
#endif
|
#endif
|
||||||
__fdividef(4.0f, 2.0f);
|
__fdividef(4.0f, 2.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||||
#endif
|
#endif
|
||||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||||
__fmaf_rz(1.0f, 2.0f, 3.0f);
|
__fmaf_rz(1.0f, 2.0f, 3.0f);
|
||||||
__fmul_rd(1.0f, 2.0f);
|
__fmul_rd(1.0f, 2.0f);
|
||||||
#endif
|
#endif
|
||||||
__fmul_rn(1.0f, 2.0f);
|
__fmul_rn(1.0f, 2.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fmul_ru(1.0f, 2.0f);
|
__fmul_ru(1.0f, 2.0f);
|
||||||
__fmul_rz(1.0f, 2.0f);
|
__fmul_rz(1.0f, 2.0f);
|
||||||
__frcp_rd(2.0f);
|
__frcp_rd(2.0f);
|
||||||
#endif
|
#endif
|
||||||
__frcp_rn(2.0f);
|
__frcp_rn(2.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__frcp_ru(2.0f);
|
__frcp_ru(2.0f);
|
||||||
__frcp_rz(2.0f);
|
__frcp_rz(2.0f);
|
||||||
#endif
|
#endif
|
||||||
__frsqrt_rn(4.0f);
|
__frsqrt_rn(4.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fsqrt_rd(4.0f);
|
__fsqrt_rd(4.0f);
|
||||||
#endif
|
#endif
|
||||||
__fsqrt_rn(4.0f);
|
__fsqrt_rn(4.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fsqrt_ru(4.0f);
|
__fsqrt_ru(4.0f);
|
||||||
__fsqrt_rz(4.0f);
|
__fsqrt_rz(4.0f);
|
||||||
__fsub_rd(2.0f, 1.0f);
|
__fsub_rd(2.0f, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
__fsub_rn(2.0f, 1.0f);
|
__fsub_rn(2.0f, 1.0f);
|
||||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||||
__fsub_ru(2.0f, 1.0f);
|
__fsub_ru(2.0f, 1.0f);
|
||||||
__fsub_rz(2.0f, 1.0f);
|
__fsub_rz(2.0f, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
__log10f(1.0f);
|
__log10f(1.0f);
|
||||||
__log2f(1.0f);
|
__log2f(1.0f);
|
||||||
__logf(1.0f);
|
__logf(1.0f);
|
||||||
__powf(1.0f, 0.0f);
|
__powf(1.0f, 0.0f);
|
||||||
__saturatef(0.1f);
|
__saturatef(0.1f);
|
||||||
__sincosf(0.0f, &fX, &fY);
|
__sincosf(0.0f, &fX, &fY);
|
||||||
__sinf(0.0f);
|
__sinf(0.0f);
|
||||||
__tanf(0.0f);
|
__tanf(0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compileSinglePrecisionIntrinsics(int) {
|
__global__ void compileSinglePrecisionIntrinsics(int) {
|
||||||
single_precision_intrinsics();
|
single_precision_intrinsics();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_SinglePrecisionIntrinsics") {
|
TEST_CASE("Unit_SinglePrecisionIntrinsics") {
|
||||||
hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
|
hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
|
||||||
dim3(1, 1, 1), 0, 0, 1);
|
dim3(1, 1, 1), 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,123 +1,123 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip/math_functions.h>
|
#include <hip/math_functions.h>
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__device__ void single_precision_math_functions() {
|
__device__ void single_precision_math_functions() {
|
||||||
int iX;
|
int iX;
|
||||||
float fX, fY;
|
float fX, fY;
|
||||||
|
|
||||||
acosf(1.0f);
|
acosf(1.0f);
|
||||||
acoshf(1.0f);
|
acoshf(1.0f);
|
||||||
asinf(0.0f);
|
asinf(0.0f);
|
||||||
asinhf(0.0f);
|
asinhf(0.0f);
|
||||||
atan2f(0.0f, 1.0f);
|
atan2f(0.0f, 1.0f);
|
||||||
atanf(0.0f);
|
atanf(0.0f);
|
||||||
atanhf(0.0f);
|
atanhf(0.0f);
|
||||||
cbrtf(0.0f);
|
cbrtf(0.0f);
|
||||||
ceilf(0.0f);
|
ceilf(0.0f);
|
||||||
copysignf(1.0f, -2.0f);
|
copysignf(1.0f, -2.0f);
|
||||||
cosf(0.0f);
|
cosf(0.0f);
|
||||||
coshf(0.0f);
|
coshf(0.0f);
|
||||||
cospif(0.0f);
|
cospif(0.0f);
|
||||||
erfcf(0.0f);
|
erfcf(0.0f);
|
||||||
erfcinvf(2.0f);
|
erfcinvf(2.0f);
|
||||||
erfcxf(0.0f);
|
erfcxf(0.0f);
|
||||||
erff(0.0f);
|
erff(0.0f);
|
||||||
erfinvf(1.0f);
|
erfinvf(1.0f);
|
||||||
exp10f(0.0f);
|
exp10f(0.0f);
|
||||||
exp2f(0.0f);
|
exp2f(0.0f);
|
||||||
expf(0.0f);
|
expf(0.0f);
|
||||||
expm1f(0.0f);
|
expm1f(0.0f);
|
||||||
fabsf(1.0f);
|
fabsf(1.0f);
|
||||||
fdimf(1.0f, 0.0f);
|
fdimf(1.0f, 0.0f);
|
||||||
fdividef(0.0f, 1.0f);
|
fdividef(0.0f, 1.0f);
|
||||||
floorf(0.0f);
|
floorf(0.0f);
|
||||||
fmaf(1.0f, 2.0f, 3.0f);
|
fmaf(1.0f, 2.0f, 3.0f);
|
||||||
fmaxf(0.0f, 0.0f);
|
fmaxf(0.0f, 0.0f);
|
||||||
fminf(0.0f, 0.0f);
|
fminf(0.0f, 0.0f);
|
||||||
fmodf(0.0f, 1.0f);
|
fmodf(0.0f, 1.0f);
|
||||||
frexpf(0.0f, &iX);
|
frexpf(0.0f, &iX);
|
||||||
hypotf(1.0f, 0.0f);
|
hypotf(1.0f, 0.0f);
|
||||||
ilogbf(1.0f);
|
ilogbf(1.0f);
|
||||||
isfinite(0.0f);
|
isfinite(0.0f);
|
||||||
isinf(0.0f);
|
isinf(0.0f);
|
||||||
isnan(0.0f);
|
isnan(0.0f);
|
||||||
j0f(0.0f);
|
j0f(0.0f);
|
||||||
j1f(0.0f);
|
j1f(0.0f);
|
||||||
jnf(-1.0f, 1.0f);
|
jnf(-1.0f, 1.0f);
|
||||||
ldexpf(0.0f, 0);
|
ldexpf(0.0f, 0);
|
||||||
llrintf(0.0f);
|
llrintf(0.0f);
|
||||||
llroundf(0.0f);
|
llroundf(0.0f);
|
||||||
log10f(1.0f);
|
log10f(1.0f);
|
||||||
log1pf(-1.0f);
|
log1pf(-1.0f);
|
||||||
log2f(1.0f);
|
log2f(1.0f);
|
||||||
logbf(1.0f);
|
logbf(1.0f);
|
||||||
logf(1.0f);
|
logf(1.0f);
|
||||||
lrintf(0.0f);
|
lrintf(0.0f);
|
||||||
lroundf(0.0f);
|
lroundf(0.0f);
|
||||||
nanf("1");
|
nanf("1");
|
||||||
nearbyintf(0.0f);
|
nearbyintf(0.0f);
|
||||||
norm3df(1.0f, 0.0f, 0.0f);
|
norm3df(1.0f, 0.0f, 0.0f);
|
||||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||||
normcdff(0.0f);
|
normcdff(0.0f);
|
||||||
normcdfinvf(1.0f);
|
normcdfinvf(1.0f);
|
||||||
fX = 1.0f;
|
fX = 1.0f;
|
||||||
normf(1, &fX);
|
normf(1, &fX);
|
||||||
powf(1.0f, 0.0f);
|
powf(1.0f, 0.0f);
|
||||||
remainderf(2.0f, 1.0f);
|
remainderf(2.0f, 1.0f);
|
||||||
rhypotf(0.0f, 1.0f);
|
rhypotf(0.0f, 1.0f);
|
||||||
rintf(1.0f);
|
rintf(1.0f);
|
||||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||||
fX = 1.0f;
|
fX = 1.0f;
|
||||||
rnormf(1, &fX);
|
rnormf(1, &fX);
|
||||||
roundf(0.0f);
|
roundf(0.0f);
|
||||||
rsqrtf(1.0f);
|
rsqrtf(1.0f);
|
||||||
signbit(1.0f);
|
signbit(1.0f);
|
||||||
sincosf(0.0f, &fX, &fY);
|
sincosf(0.0f, &fX, &fY);
|
||||||
sincospif(0.0f, &fX, &fY);
|
sincospif(0.0f, &fX, &fY);
|
||||||
sinf(0.0f);
|
sinf(0.0f);
|
||||||
sinhf(0.0f);
|
sinhf(0.0f);
|
||||||
sinpif(0.0f);
|
sinpif(0.0f);
|
||||||
sqrtf(0.0f);
|
sqrtf(0.0f);
|
||||||
tanf(0.0f);
|
tanf(0.0f);
|
||||||
tanhf(0.0f);
|
tanhf(0.0f);
|
||||||
tgammaf(2.0f);
|
tgammaf(2.0f);
|
||||||
truncf(0.0f);
|
truncf(0.0f);
|
||||||
y0f(1.0f);
|
y0f(1.0f);
|
||||||
y1f(1.0f);
|
y1f(1.0f);
|
||||||
ynf(1, 1.0f);
|
ynf(1, 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void compileSinglePrecisionMathOnDevice(int) {
|
__global__ void compileSinglePrecisionMathOnDevice(int) {
|
||||||
single_precision_math_functions();
|
single_precision_math_functions();
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_SinglePrecisionMathDevice") {
|
TEST_CASE("Unit_SinglePrecisionMathDevice") {
|
||||||
hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
|
hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
|
||||||
dim3(1, 1, 1), 0, 0, 1);
|
dim3(1, 1, 1), 0, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,113 +1,113 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#pragma GCC diagnostic ignored "-Wall"
|
#pragma GCC diagnostic ignored "-Wall"
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
|
|
||||||
__host__ static void single_precision_math_functions() {
|
__host__ static void single_precision_math_functions() {
|
||||||
int iX;
|
int iX;
|
||||||
float fX, fY;
|
float fX, fY;
|
||||||
|
|
||||||
acosf(1.0f);
|
acosf(1.0f);
|
||||||
acoshf(1.0f);
|
acoshf(1.0f);
|
||||||
asinf(0.0f);
|
asinf(0.0f);
|
||||||
asinhf(0.0f);
|
asinhf(0.0f);
|
||||||
atan2f(0.0f, 1.0f);
|
atan2f(0.0f, 1.0f);
|
||||||
atanf(0.0f);
|
atanf(0.0f);
|
||||||
atanhf(0.0f);
|
atanhf(0.0f);
|
||||||
cbrtf(0.0f);
|
cbrtf(0.0f);
|
||||||
ceilf(0.0f);
|
ceilf(0.0f);
|
||||||
copysignf(1.0f, -2.0f);
|
copysignf(1.0f, -2.0f);
|
||||||
cosf(0.0f);
|
cosf(0.0f);
|
||||||
coshf(0.0f);
|
coshf(0.0f);
|
||||||
erfcf(0.0f);
|
erfcf(0.0f);
|
||||||
erff(0.0f);
|
erff(0.0f);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
exp10f(0.0f);
|
exp10f(0.0f);
|
||||||
#endif
|
#endif
|
||||||
exp2f(0.0f);
|
exp2f(0.0f);
|
||||||
expf(0.0f);
|
expf(0.0f);
|
||||||
expm1f(0.0f);
|
expm1f(0.0f);
|
||||||
fabsf(1.0f);
|
fabsf(1.0f);
|
||||||
fdimf(1.0f, 0.0f);
|
fdimf(1.0f, 0.0f);
|
||||||
floorf(0.0f);
|
floorf(0.0f);
|
||||||
fmaf(1.0f, 2.0f, 3.0f);
|
fmaf(1.0f, 2.0f, 3.0f);
|
||||||
fmaxf(0.0f, 0.0f);
|
fmaxf(0.0f, 0.0f);
|
||||||
fminf(0.0f, 0.0f);
|
fminf(0.0f, 0.0f);
|
||||||
fmodf(0.0f, 1.0f);
|
fmodf(0.0f, 1.0f);
|
||||||
frexpf(0.0f, &iX);
|
frexpf(0.0f, &iX);
|
||||||
hypotf(1.0f, 0.0f);
|
hypotf(1.0f, 0.0f);
|
||||||
ilogbf(1.0f);
|
ilogbf(1.0f);
|
||||||
std::isfinite(0.0f);
|
std::isfinite(0.0f);
|
||||||
std::isinf(0.0f);
|
std::isinf(0.0f);
|
||||||
std::isnan(0.0f);
|
std::isnan(0.0f);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
j0f(0.0f);
|
j0f(0.0f);
|
||||||
j1f(0.0f);
|
j1f(0.0f);
|
||||||
jnf(-1.0f, 1.0f);
|
jnf(-1.0f, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
ldexpf(0.0f, 0);
|
ldexpf(0.0f, 0);
|
||||||
lgammaf(1.0f);
|
lgammaf(1.0f);
|
||||||
llrintf(0.0f);
|
llrintf(0.0f);
|
||||||
llroundf(0.0f);
|
llroundf(0.0f);
|
||||||
log10f(1.0f);
|
log10f(1.0f);
|
||||||
log1pf(-1.0f);
|
log1pf(-1.0f);
|
||||||
log2f(1.0f);
|
log2f(1.0f);
|
||||||
logbf(1.0f);
|
logbf(1.0f);
|
||||||
logf(1.0f);
|
logf(1.0f);
|
||||||
lrintf(0.0f);
|
lrintf(0.0f);
|
||||||
lroundf(0.0f);
|
lroundf(0.0f);
|
||||||
modff(0.0f, &fX);
|
modff(0.0f, &fX);
|
||||||
nanf("1");
|
nanf("1");
|
||||||
nearbyintf(0.0f);
|
nearbyintf(0.0f);
|
||||||
powf(1.0f, 0.0f);
|
powf(1.0f, 0.0f);
|
||||||
remainderf(2.0f, 1.0f);
|
remainderf(2.0f, 1.0f);
|
||||||
remquof(1.0f, 2.0f, &iX);
|
remquof(1.0f, 2.0f, &iX);
|
||||||
rintf(1.0f);
|
rintf(1.0f);
|
||||||
#if HT_AMD
|
#if HT_AMD
|
||||||
fX = 1.0f;
|
fX = 1.0f;
|
||||||
#endif
|
#endif
|
||||||
roundf(0.0f);
|
roundf(0.0f);
|
||||||
/// rsqrtf(1.0f);
|
/// rsqrtf(1.0f);
|
||||||
scalblnf(0.0f, 1);
|
scalblnf(0.0f, 1);
|
||||||
scalbnf(0.0f, 1);
|
scalbnf(0.0f, 1);
|
||||||
std::signbit(1.0f);
|
std::signbit(1.0f);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
sincosf(0.0f, &fX, &fY);
|
sincosf(0.0f, &fX, &fY);
|
||||||
#endif
|
#endif
|
||||||
sinf(0.0f);
|
sinf(0.0f);
|
||||||
sinhf(0.0f);
|
sinhf(0.0f);
|
||||||
sqrtf(0.0f);
|
sqrtf(0.0f);
|
||||||
tanf(0.0f);
|
tanf(0.0f);
|
||||||
tanhf(0.0f);
|
tanhf(0.0f);
|
||||||
tgammaf(2.0f);
|
tgammaf(2.0f);
|
||||||
truncf(0.0f);
|
truncf(0.0f);
|
||||||
#ifdef __unix__
|
#ifdef __unix__
|
||||||
y0f(1.0f);
|
y0f(1.0f);
|
||||||
y1f(1.0f);
|
y1f(1.0f);
|
||||||
ynf(1, 1.0f);
|
ynf(1, 1.0f);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_SinglePrecisionMathHost") {
|
TEST_CASE("Unit_SinglePrecisionMathHost") {
|
||||||
single_precision_math_functions();
|
single_precision_math_functions();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,153 +1,153 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <complex>
|
#include <complex>
|
||||||
|
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
// Tolerance for error
|
// Tolerance for error
|
||||||
const double tolerance = 1e-6;
|
const double tolerance = 1e-6;
|
||||||
|
|
||||||
#define LEN 64
|
#define LEN 64
|
||||||
|
|
||||||
#define ALL_FUN \
|
#define ALL_FUN \
|
||||||
OP(add) \
|
OP(add) \
|
||||||
OP(sub) \
|
OP(sub) \
|
||||||
OP(mul) \
|
OP(mul) \
|
||||||
OP(div) \
|
OP(div) \
|
||||||
OP(abs) \
|
OP(abs) \
|
||||||
OP(arg) \
|
OP(arg) \
|
||||||
OP(sin) \
|
OP(sin) \
|
||||||
OP(cos)
|
OP(cos)
|
||||||
|
|
||||||
#define OP(x) CK_##x,
|
#define OP(x) CK_##x,
|
||||||
enum CalcKind {
|
enum CalcKind {
|
||||||
ALL_FUN
|
ALL_FUN
|
||||||
};
|
};
|
||||||
#undef OP
|
#undef OP
|
||||||
|
|
||||||
#define OP(x) case CK_##x: return #x;
|
#define OP(x) case CK_##x: return #x;
|
||||||
std::string getName(enum CalcKind CK) {
|
std::string getName(enum CalcKind CK) {
|
||||||
switch (CK) {
|
switch (CK) {
|
||||||
ALL_FUN
|
ALL_FUN
|
||||||
}
|
}
|
||||||
return ""; // To prevent compile warning
|
return ""; // To prevent compile warning
|
||||||
}
|
}
|
||||||
#undef OP
|
#undef OP
|
||||||
|
|
||||||
// Calculates function.
|
// Calculates function.
|
||||||
// If the function has one argument, B is ignored.
|
// If the function has one argument, B is ignored.
|
||||||
// If the function returns real number, converts it to a complex number.
|
// If the function returns real number, converts it to a complex number.
|
||||||
#define ONE_ARG(func) \
|
#define ONE_ARG(func) \
|
||||||
case CK_##func: \
|
case CK_##func: \
|
||||||
return std::complex<FloatT>(func(A));
|
return std::complex<FloatT>(func(A));
|
||||||
|
|
||||||
template<typename FloatT>
|
template<typename FloatT>
|
||||||
__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
|
__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
|
||||||
std::complex<FloatT> B,
|
std::complex<FloatT> B,
|
||||||
enum CalcKind CK) {
|
enum CalcKind CK) {
|
||||||
switch (CK) {
|
switch (CK) {
|
||||||
case CK_add:
|
case CK_add:
|
||||||
return A + B;
|
return A + B;
|
||||||
case CK_sub:
|
case CK_sub:
|
||||||
return A - B;
|
return A - B;
|
||||||
case CK_mul:
|
case CK_mul:
|
||||||
return A * B;
|
return A * B;
|
||||||
case CK_div:
|
case CK_div:
|
||||||
return A / B;
|
return A / B;
|
||||||
|
|
||||||
ONE_ARG(abs)
|
ONE_ARG(abs)
|
||||||
ONE_ARG(arg)
|
ONE_ARG(arg)
|
||||||
ONE_ARG(sin)
|
ONE_ARG(sin)
|
||||||
ONE_ARG(cos)
|
ONE_ARG(cos)
|
||||||
}
|
}
|
||||||
return A; // To prevent compile warning
|
return A; // To prevent compile warning
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename FloatT>
|
template<typename FloatT>
|
||||||
__global__ void kernel(std::complex<FloatT>* A,
|
__global__ void kernel(std::complex<FloatT>* A,
|
||||||
std::complex<FloatT>* B, std::complex<FloatT>* C,
|
std::complex<FloatT>* B, std::complex<FloatT>* C,
|
||||||
enum CalcKind CK) {
|
enum CalcKind CK) {
|
||||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
C[tx] = calc<FloatT>(A[tx], B[tx], CK);
|
C[tx] = calc<FloatT>(A[tx], B[tx], CK);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename FloatT>
|
template<typename FloatT>
|
||||||
void test() {
|
void test() {
|
||||||
typedef std::complex<FloatT> ComplexT;
|
typedef std::complex<FloatT> ComplexT;
|
||||||
|
|
||||||
ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
|
ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
|
||||||
A = new ComplexT[LEN];
|
A = new ComplexT[LEN];
|
||||||
B = new ComplexT[LEN];
|
B = new ComplexT[LEN];
|
||||||
C = new ComplexT[LEN];
|
C = new ComplexT[LEN];
|
||||||
D = new ComplexT[LEN];
|
D = new ComplexT[LEN];
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
|
||||||
|
|
||||||
for (uint32_t i = 0; i < LEN; i++) {
|
for (uint32_t i = 0; i < LEN; i++) {
|
||||||
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
|
A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
|
||||||
B[i] = A[i];
|
B[i] = A[i];
|
||||||
C[i] = A[i];
|
C[i] = A[i];
|
||||||
}
|
}
|
||||||
HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// Run kernel for a calculation kind and verify by comparing with host
|
// Run kernel for a calculation kind and verify by comparing with host
|
||||||
// calculation result. Returns false if fails.
|
// calculation result. Returns false if fails.
|
||||||
auto test_fun = [&](enum CalcKind CK) {
|
auto test_fun = [&](enum CalcKind CK) {
|
||||||
hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
|
hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
|
||||||
Ad, Bd, Cd, CK);
|
Ad, Bd, Cd, CK);
|
||||||
HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
|
||||||
bool pass = true;
|
bool pass = true;
|
||||||
for (int i = 0; i < LEN; i++) {
|
for (int i = 0; i < LEN; i++) {
|
||||||
ComplexT Expected = calc(A[i], B[i], CK);
|
ComplexT Expected = calc(A[i], B[i], CK);
|
||||||
FloatT error = abs(C[i] - Expected);
|
FloatT error = abs(C[i] - Expected);
|
||||||
if (abs(Expected) > tolerance)
|
if (abs(Expected) > tolerance)
|
||||||
error /= abs(Expected);
|
error /= abs(Expected);
|
||||||
pass &= error < tolerance;
|
pass &= error < tolerance;
|
||||||
}
|
}
|
||||||
return pass;
|
return pass;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define OP(x) assert(test_fun(CK_##x));
|
#define OP(x) assert(test_fun(CK_##x));
|
||||||
ALL_FUN
|
ALL_FUN
|
||||||
#undef OP
|
#undef OP
|
||||||
|
|
||||||
HIP_CHECK(hipFree(Ad));
|
HIP_CHECK(hipFree(Ad));
|
||||||
HIP_CHECK(hipFree(Bd));
|
HIP_CHECK(hipFree(Bd));
|
||||||
HIP_CHECK(hipFree(Cd));
|
HIP_CHECK(hipFree(Cd));
|
||||||
delete[] A;
|
delete[] A;
|
||||||
delete[] B;
|
delete[] B;
|
||||||
delete[] C;
|
delete[] C;
|
||||||
delete[] D;
|
delete[] D;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if HT_AMD
|
#if HT_AMD
|
||||||
TEST_CASE("Unit_StdComplex") {
|
TEST_CASE("Unit_StdComplex") {
|
||||||
SECTION("Test run with float") {
|
SECTION("Test run with float") {
|
||||||
test<float>();
|
test<float>();
|
||||||
}
|
}
|
||||||
SECTION("Test run with double") {
|
SECTION("Test run with double") {
|
||||||
test<double>();
|
test<double>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,220 +1,220 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
Testcase Scenarios :
|
Testcase Scenarios :
|
||||||
(TestCase 1)::
|
(TestCase 1)::
|
||||||
1) Execute atomicAdd in multi threaded scenario by diverging the data across
|
1) Execute atomicAdd in multi threaded scenario by diverging the data across
|
||||||
multiple threads and validate the output at the end of all operations.
|
multiple threads and validate the output at the end of all operations.
|
||||||
2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
|
2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
|
||||||
across multiple threads and validate the output at the end of all operations.
|
across multiple threads and validate the output at the end of all operations.
|
||||||
(TestCase 2)::
|
(TestCase 2)::
|
||||||
3) Execute atomicAdd API and validate the result.
|
3) Execute atomicAdd API and validate the result.
|
||||||
4) Execute atomicAddNoRet API and validate the result.
|
4) Execute atomicAddNoRet API and validate the result.
|
||||||
(TestCase 3)::
|
(TestCase 3)::
|
||||||
5) atomicadd/NoRet negative scenarios (TBD).
|
5) atomicadd/NoRet negative scenarios (TBD).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
/*
|
/*
|
||||||
* Defines initial and increment values
|
* Defines initial and increment values
|
||||||
*/
|
*/
|
||||||
#define INCREMENT_VALUE 10
|
#define INCREMENT_VALUE 10
|
||||||
#define INT_INITIAL_VALUE 10
|
#define INT_INITIAL_VALUE 10
|
||||||
#define FLOAT_INITIAL_VALUE 10.50
|
#define FLOAT_INITIAL_VALUE 10.50
|
||||||
#define DOUBLE_INITIAL_VALUE 200.12
|
#define DOUBLE_INITIAL_VALUE 200.12
|
||||||
#define LONG_INITIAL_VALUE 10000
|
#define LONG_INITIAL_VALUE 10000
|
||||||
#define UNSIGNED_INITIAL_VALUE 20
|
#define UNSIGNED_INITIAL_VALUE 20
|
||||||
|
|
||||||
#if HT_NVIDIA
|
#if HT_NVIDIA
|
||||||
// atomicAddNoRet is unavailable in cuda
|
// atomicAddNoRet is unavailable in cuda
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void atomicAddNoRet(T* x, int y) {
|
__device__ void atomicAddNoRet(T* x, int y) {
|
||||||
atomicAdd(x, static_cast<T>(y));
|
atomicAdd(x, static_cast<T>(y));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool p_atomicNoRet = false;
|
bool p_atomicNoRet = false;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void atomicnoret_manywaves(T* C_d) {
|
__global__ void atomicnoret_manywaves(T* C_d) {
|
||||||
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void atomic_manywaves(T* C_d) {
|
__global__ void atomic_manywaves(T* C_d) {
|
||||||
atomicAdd(C_d, INCREMENT_VALUE);
|
atomicAdd(C_d, INCREMENT_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void atomicnoret_simple(T* C_d) {
|
__global__ void atomicnoret_simple(T* C_d) {
|
||||||
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
atomicAddNoRet(C_d, INCREMENT_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void atomic_simple(T* C_d) {
|
__global__ void atomic_simple(T* C_d) {
|
||||||
atomicAdd(C_d, INCREMENT_VALUE);
|
atomicAdd(C_d, INCREMENT_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool atomictest_manywaves(const T& initial_val) {
|
bool atomictest_manywaves(const T& initial_val) {
|
||||||
unsigned int ThreadsperBlock = 10;
|
unsigned int ThreadsperBlock = 10;
|
||||||
unsigned int numBlocks = 1;
|
unsigned int numBlocks = 1;
|
||||||
T memSize = sizeof(T);
|
T memSize = sizeof(T);
|
||||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||||
*hOData = initial_val;
|
*hOData = initial_val;
|
||||||
T* dOData;
|
T* dOData;
|
||||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||||
// copy host memory to device to initialize to zero
|
// copy host memory to device to initialize to zero
|
||||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// execute the kernel
|
// execute the kernel
|
||||||
hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
|
hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
|
||||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||||
|
|
||||||
// Copy result from device to host
|
// Copy result from device to host
|
||||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||||
REQUIRE(hOData[0] == initial_val+
|
REQUIRE(hOData[0] == initial_val+
|
||||||
static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
||||||
|
|
||||||
// Cleanup memory
|
// Cleanup memory
|
||||||
free(hOData);
|
free(hOData);
|
||||||
HIP_CHECK(hipFree(dOData));
|
HIP_CHECK(hipFree(dOData));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool atomictestnoret_manywaves(const T& initial_val) {
|
bool atomictestnoret_manywaves(const T& initial_val) {
|
||||||
unsigned int ThreadsperBlock = 10;
|
unsigned int ThreadsperBlock = 10;
|
||||||
unsigned int numBlocks = 1;
|
unsigned int numBlocks = 1;
|
||||||
T memSize = sizeof(T);
|
T memSize = sizeof(T);
|
||||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||||
*hOData = initial_val;
|
*hOData = initial_val;
|
||||||
T* dOData;
|
T* dOData;
|
||||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||||
// copy host memory to device to initialize to zero
|
// copy host memory to device to initialize to zero
|
||||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// execute the kernel
|
// execute the kernel
|
||||||
hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
|
hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
|
||||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||||
|
|
||||||
// Copy result from device to host
|
// Copy result from device to host
|
||||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||||
REQUIRE(hOData[0] == initial_val+
|
REQUIRE(hOData[0] == initial_val+
|
||||||
(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
|
||||||
|
|
||||||
// Cleanup memory
|
// Cleanup memory
|
||||||
free(hOData);
|
free(hOData);
|
||||||
HIP_CHECK(hipFree(dOData));
|
HIP_CHECK(hipFree(dOData));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool atomictest_simple(const T& initial_val) {
|
bool atomictest_simple(const T& initial_val) {
|
||||||
unsigned int ThreadsperBlock = 1;
|
unsigned int ThreadsperBlock = 1;
|
||||||
unsigned int numBlocks = 1;
|
unsigned int numBlocks = 1;
|
||||||
T memSize = sizeof(T);
|
T memSize = sizeof(T);
|
||||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||||
*hOData = initial_val;
|
*hOData = initial_val;
|
||||||
T* dOData;
|
T* dOData;
|
||||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||||
// copy host memory to device to initialize to zero
|
// copy host memory to device to initialize to zero
|
||||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// execute the kernel
|
// execute the kernel
|
||||||
hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
|
hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
|
||||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||||
|
|
||||||
// Copy result from device to host
|
// Copy result from device to host
|
||||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||||
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
||||||
|
|
||||||
// Cleanup memory
|
// Cleanup memory
|
||||||
free(hOData);
|
free(hOData);
|
||||||
HIP_CHECK(hipFree(dOData));
|
HIP_CHECK(hipFree(dOData));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool atomictestnoret_simple(const T& initial_val) {
|
bool atomictestnoret_simple(const T& initial_val) {
|
||||||
unsigned int ThreadsperBlock = 1;
|
unsigned int ThreadsperBlock = 1;
|
||||||
unsigned int numBlocks = 1;
|
unsigned int numBlocks = 1;
|
||||||
T memSize = sizeof(T);
|
T memSize = sizeof(T);
|
||||||
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
T* hOData = reinterpret_cast<T*>(malloc(memSize));
|
||||||
*hOData = initial_val;
|
*hOData = initial_val;
|
||||||
T* dOData;
|
T* dOData;
|
||||||
HIP_CHECK(hipMalloc(&dOData, memSize));
|
HIP_CHECK(hipMalloc(&dOData, memSize));
|
||||||
// copy host memory to device to initialize to zero
|
// copy host memory to device to initialize to zero
|
||||||
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// execute the kernel
|
// execute the kernel
|
||||||
hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
|
hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
|
||||||
dim3(ThreadsperBlock), 0, 0, dOData);
|
dim3(ThreadsperBlock), 0, 0, dOData);
|
||||||
|
|
||||||
// Copy result from device to host
|
// Copy result from device to host
|
||||||
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
|
||||||
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
|
||||||
|
|
||||||
// Cleanup memory
|
// Cleanup memory
|
||||||
free(hOData);
|
free(hOData);
|
||||||
HIP_CHECK(hipFree(dOData));
|
HIP_CHECK(hipFree(dOData));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_hipTestAtomicAdd") {
|
TEST_CASE("Unit_hipTestAtomicAdd") {
|
||||||
bool TestPassed = true;
|
bool TestPassed = true;
|
||||||
|
|
||||||
SECTION("atomic tests with many waves") {
|
SECTION("atomic tests with many waves") {
|
||||||
REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
|
REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
|
||||||
REQUIRE(TestPassed ==
|
REQUIRE(TestPassed ==
|
||||||
atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
||||||
REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
|
REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
|
||||||
#if HT_AMD
|
#if HT_AMD
|
||||||
REQUIRE(TestPassed ==
|
REQUIRE(TestPassed ==
|
||||||
atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
|
atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
|
||||||
REQUIRE(TestPassed ==
|
REQUIRE(TestPassed ==
|
||||||
atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
|
atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
SECTION("atomic tests with many waves and no return") {
|
SECTION("atomic tests with many waves and no return") {
|
||||||
REQUIRE(TestPassed ==
|
REQUIRE(TestPassed ==
|
||||||
atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
|
atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
|
||||||
}
|
}
|
||||||
SECTION("simple atomic tests") {
|
SECTION("simple atomic tests") {
|
||||||
REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
|
REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
|
||||||
REQUIRE(TestPassed ==
|
REQUIRE(TestPassed ==
|
||||||
atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
|
||||||
REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
|
REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
|
||||||
#if HT_AMD
|
#if HT_AMD
|
||||||
REQUIRE(TestPassed ==
|
REQUIRE(TestPassed ==
|
||||||
atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
|
atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
|
||||||
REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
|
REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
SECTION("Simple atomic test with no return") {
|
SECTION("Simple atomic test with no return") {
|
||||||
REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
|
REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,51 +1,51 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
|
|
||||||
#define LEN 512
|
#define LEN 512
|
||||||
#define SIZE (LEN * sizeof(int64_t))
|
#define SIZE (LEN * sizeof(int64_t))
|
||||||
|
|
||||||
static __global__ void kernel1(int64_t* Ad) {
|
static __global__ void kernel1(int64_t* Ad) {
|
||||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
Ad[tid] = clock() + clock64() + __clock() + __clock64();
|
Ad[tid] = clock() + clock64() + __clock() + __clock64();
|
||||||
}
|
}
|
||||||
|
|
||||||
static __global__ void kernel2(int64_t* Ad) {
|
static __global__ void kernel2(int64_t* Ad) {
|
||||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
|
Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_CASE("Unit_hipTestClock") {
|
TEST_CASE("Unit_hipTestClock") {
|
||||||
int64_t *A, *Ad;
|
int64_t *A, *Ad;
|
||||||
A = new int64_t[LEN];
|
A = new int64_t[LEN];
|
||||||
for (unsigned i = 0; i < LEN; i++) {
|
for (unsigned i = 0; i < LEN; i++) {
|
||||||
A[i] = 0;
|
A[i] = 0;
|
||||||
}
|
}
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
|
||||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
||||||
hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
|
||||||
HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||||
for (unsigned i = 0; i < LEN; i++) {
|
for (unsigned i = 0; i < LEN; i++) {
|
||||||
assert(0 != A[i]);
|
assert(0 != A[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,88 +1,88 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#include "error_handling_common.hh"
|
#include "error_handling_common.hh"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
|
* @addtogroup hipDrvGetErrorName hipDrvGetErrorName
|
||||||
* @{
|
* @{
|
||||||
* @ingroup ErrorTest
|
* @ingroup ErrorTest
|
||||||
* `hipDrvGetErrorName(hipError_t hip_error)` -
|
* `hipDrvGetErrorName(hipError_t hip_error)` -
|
||||||
* Return hip error as text string form.
|
* Return hip error as text string form.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Validate that the correct string is returned for each supported
|
* - Validate that the correct string is returned for each supported
|
||||||
* device error enumeration.
|
* device error enumeration.
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.4
|
* - HIP_VERSION >= 5.4
|
||||||
*/
|
*/
|
||||||
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
|
TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
|
||||||
const char* error_string = nullptr;
|
const char* error_string = nullptr;
|
||||||
const auto enumerator =
|
const auto enumerator =
|
||||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||||
INFO("Error: " << enumerator);
|
INFO("Error: " << enumerator);
|
||||||
|
|
||||||
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
|
HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
|
||||||
|
|
||||||
REQUIRE(error_string != nullptr);
|
REQUIRE(error_string != nullptr);
|
||||||
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
|
REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Validate handling of invalid arguments:
|
* - Validate handling of invalid arguments:
|
||||||
* -# When error enumerator is invalid (-1)
|
* -# When error enumerator is invalid (-1)
|
||||||
* - AMD expected output: return "hipErrorUnknown"
|
* - AMD expected output: return "hipErrorUnknown"
|
||||||
* - NVIDIA expected output: return "cudaErrorUnknown"
|
* - NVIDIA expected output: return "cudaErrorUnknown"
|
||||||
* -# When nullptr is passed as store location
|
* -# When nullptr is passed as store location
|
||||||
* - Expected output: return "hipErrorInvalidValue"
|
* - Expected output: return "hipErrorInvalidValue"
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - unit/errorHandling/hipDrvGetErrorName.cc
|
* - unit/errorHandling/hipDrvGetErrorName.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.4
|
* - HIP_VERSION >= 5.4
|
||||||
*/
|
*/
|
||||||
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
|
TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
|
||||||
const char* error_string = nullptr;
|
const char* error_string = nullptr;
|
||||||
SECTION("pass unknown value to hipError") {
|
SECTION("pass unknown value to hipError") {
|
||||||
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
|
HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
|
||||||
hipErrorInvalidValue);
|
hipErrorInvalidValue);
|
||||||
}
|
}
|
||||||
#if HT_AMD // segfaults on NVIDIA
|
#if HT_AMD // segfaults on NVIDIA
|
||||||
SECTION("pass nullptr to error string") {
|
SECTION("pass nullptr to error string") {
|
||||||
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
|
HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group ErrorTest.
|
* End doxygen group ErrorTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,88 +1,88 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#include "error_handling_common.hh"
|
#include "error_handling_common.hh"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
|
* @addtogroup hipDrvGetErrorString hipDrvGetErrorString
|
||||||
* @{
|
* @{
|
||||||
* @ingroup ErrorTest
|
* @ingroup ErrorTest
|
||||||
* `hipDrvGetErrorString(hipError_t hipError)` -
|
* `hipDrvGetErrorString(hipError_t hipError)` -
|
||||||
* Return handy text string message to explain the error which occurred.
|
* Return handy text string message to explain the error which occurred.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Validate that the correct string is returned for each supported
|
* - Validate that the correct string is returned for each supported
|
||||||
* device error enumeration.
|
* device error enumeration.
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.4
|
* - HIP_VERSION >= 5.4
|
||||||
*/
|
*/
|
||||||
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
|
TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
|
||||||
const char* error_string = nullptr;
|
const char* error_string = nullptr;
|
||||||
const auto enumerator =
|
const auto enumerator =
|
||||||
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
|
||||||
INFO("Error: " << enumerator);
|
INFO("Error: " << enumerator);
|
||||||
|
|
||||||
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
|
HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
|
||||||
|
|
||||||
REQUIRE(error_string != nullptr);
|
REQUIRE(error_string != nullptr);
|
||||||
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
|
REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Validate handling of invalid arguments:
|
* - Validate handling of invalid arguments:
|
||||||
* -# When error enumerator is invalid (-1)
|
* -# When error enumerator is invalid (-1)
|
||||||
* - Expected output: return "hipErrorInvalidValue"
|
* - Expected output: return "hipErrorInvalidValue"
|
||||||
* -# When nullptr is passed as store location
|
* -# When nullptr is passed as store location
|
||||||
* - Expected output: return "hipErrorInvalidValue"
|
* - Expected output: return "hipErrorInvalidValue"
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - unit/errorHandling/hipDrvGetErrorString.cc
|
* - unit/errorHandling/hipDrvGetErrorString.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.4
|
* - HIP_VERSION >= 5.4
|
||||||
*/
|
*/
|
||||||
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
|
TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
|
||||||
const char* error_string = nullptr;
|
const char* error_string = nullptr;
|
||||||
SECTION("pass unknown value to hipError") {
|
SECTION("pass unknown value to hipError") {
|
||||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
|
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
|
||||||
hipErrorInvalidValue);
|
hipErrorInvalidValue);
|
||||||
}
|
}
|
||||||
#if HT_AMD // segfaults on NVIDIA
|
#if HT_AMD // segfaults on NVIDIA
|
||||||
SECTION("pass nullptr to error string") {
|
SECTION("pass nullptr to error string") {
|
||||||
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
|
HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
|
||||||
hipErrorInvalidValue);
|
hipErrorInvalidValue);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group ErrorTest.
|
* End doxygen group ErrorTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,19 +1,19 @@
|
|||||||
# AMD specific test
|
# AMD specific test
|
||||||
if(HIP_PLATFORM MATCHES "amd")
|
if(HIP_PLATFORM MATCHES "amd")
|
||||||
if(UNIX)
|
if(UNIX)
|
||||||
set(TEST_SRC
|
set(TEST_SRC
|
||||||
hipMalloc.cc
|
hipMalloc.cc
|
||||||
)
|
)
|
||||||
# Creating Custom object file
|
# Creating Custom object file
|
||||||
add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
|
add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
|
||||||
add_library(malloc_gpp OBJECT IMPORTED)
|
add_library(malloc_gpp OBJECT IMPORTED)
|
||||||
set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
|
set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
|
||||||
|
|
||||||
hip_add_exe_to_target(NAME gppTests
|
hip_add_exe_to_target(NAME gppTests
|
||||||
TEST_SRC ${TEST_SRC}
|
TEST_SRC ${TEST_SRC}
|
||||||
TEST_TARGET_NAME build_tests
|
TEST_TARGET_NAME build_tests
|
||||||
LINKER_LIBS malloc_gpp)
|
LINKER_LIBS malloc_gpp)
|
||||||
|
|
||||||
add_dependencies(gppTests malloc_custom)
|
add_dependencies(gppTests malloc_custom)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,54 +1,54 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
* in the Software without restriction, including without limitation the rights
|
* in the Software without restriction, including without limitation the rights
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
* furnished to do so, subject to the following conditions:
|
* furnished to do so, subject to the following conditions:
|
||||||
* The above copyright notice and this permission notice shall be included in
|
* The above copyright notice and this permission notice shall be included in
|
||||||
* all copies or substantial portions of the Software.
|
* all copies or substantial portions of the Software.
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
* THE SOFTWARE.
|
* THE SOFTWARE.
|
||||||
* */
|
* */
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#include "hipMalloc.h"
|
#include "hipMalloc.h"
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipMalloc hipMalloc
|
* @addtogroup hipMalloc hipMalloc
|
||||||
* @{
|
* @{
|
||||||
* @ingroup MemoryTest
|
* @ingroup MemoryTest
|
||||||
* `hipError_t hipMalloc(void** ptr, size_t size)` -
|
* `hipError_t hipMalloc(void** ptr, size_t size)` -
|
||||||
* Allocate memory on the default accelerator.
|
* Allocate memory on the default accelerator.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
|
* - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/g++/hipMalloc.cc
|
* - catch/unit/g++/hipMalloc.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipMalloc_gpptest") {
|
TEST_CASE("Unit_hipMalloc_gpptest") {
|
||||||
printf("calling cpp function from here\n");
|
printf("calling cpp function from here\n");
|
||||||
int result = MallocFunc();
|
int result = MallocFunc();
|
||||||
REQUIRE(result == 1);
|
REQUIRE(result == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group MemoryTest.
|
* End doxygen group MemoryTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,22 +1,22 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
* in the Software without restriction, including without limitation the rights
|
* in the Software without restriction, including without limitation the rights
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
* furnished to do so, subject to the following conditions:
|
* furnished to do so, subject to the following conditions:
|
||||||
* The above copyright notice and this permission notice shall be included in
|
* The above copyright notice and this permission notice shall be included in
|
||||||
* all copies or substantial portions of the Software.
|
* all copies or substantial portions of the Software.
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
* THE SOFTWARE.
|
* THE SOFTWARE.
|
||||||
* */
|
* */
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
extern int MallocFunc();
|
extern int MallocFunc();
|
||||||
@@ -1,28 +1,28 @@
|
|||||||
# Common Tests - Test independent of all platforms
|
# Common Tests - Test independent of all platforms
|
||||||
if(HIP_PLATFORM MATCHES "amd")
|
if(HIP_PLATFORM MATCHES "amd")
|
||||||
if(UNIX)
|
if(UNIX)
|
||||||
set(TEST_SRC
|
set(TEST_SRC
|
||||||
gccTest.cc
|
gccTest.cc
|
||||||
gpu.cpp
|
gpu.cpp
|
||||||
)
|
)
|
||||||
# Creating Custom object file
|
# Creating Custom object file
|
||||||
add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
|
add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
|
||||||
add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
|
add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
|
||||||
add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
|
add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
|
||||||
add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
|
add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
|
||||||
|
|
||||||
add_library(LaunchKernel_lib OBJECT IMPORTED)
|
add_library(LaunchKernel_lib OBJECT IMPORTED)
|
||||||
add_library(hipMalloc_lib OBJECT IMPORTED)
|
add_library(hipMalloc_lib OBJECT IMPORTED)
|
||||||
|
|
||||||
set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
|
set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
|
||||||
set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
|
set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
|
||||||
|
|
||||||
|
|
||||||
hip_add_exe_to_target(NAME gccTests
|
hip_add_exe_to_target(NAME gccTests
|
||||||
TEST_SRC ${TEST_SRC}
|
TEST_SRC ${TEST_SRC}
|
||||||
TEST_TARGET_NAME build_tests
|
TEST_TARGET_NAME build_tests
|
||||||
LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
|
LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
|
||||||
|
|
||||||
add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
|
add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -1,64 +1,64 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
* in the Software without restriction, including without limitation the rights
|
* in the Software without restriction, including without limitation the rights
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
* furnished to do so, subject to the following conditions:
|
* furnished to do so, subject to the following conditions:
|
||||||
* The above copyright notice and this permission notice shall be included in
|
* The above copyright notice and this permission notice shall be included in
|
||||||
* all copies or substantial portions of the Software.
|
* all copies or substantial portions of the Software.
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||||
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||||
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
* THE SOFTWARE.
|
* THE SOFTWARE.
|
||||||
* */
|
* */
|
||||||
|
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#include "LaunchKernel.h"
|
#include "LaunchKernel.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - calling launchKernel which is c function from catch2
|
* - calling launchKernel which is c function from catch2
|
||||||
* and compile with gcc compiler and verify the results.
|
* and compile with gcc compiler and verify the results.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/gcc/gccTest.cc
|
* - catch/unit/gcc/gccTest.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_LaunchKernelgccTests") {
|
TEST_CASE("Unit_LaunchKernelgccTests") {
|
||||||
printf("Calling launchKernel files from here\n");
|
printf("Calling launchKernel files from here\n");
|
||||||
int result = launchKernel();
|
int result = launchKernel();
|
||||||
REQUIRE(result == 1);
|
REQUIRE(result == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Calling hipMalloc which is c file from catch2 and compile
|
* - Calling hipMalloc which is c file from catch2 and compile
|
||||||
* with gcc compiler and verify the results.
|
* with gcc compiler and verify the results.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/gcc/gccTest.cc
|
* - catch/unit/gcc/gccTest.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipMallocgccTests") {
|
TEST_CASE("Unit_hipMallocgccTests") {
|
||||||
printf("Calling hipMalloc files from here\n");
|
printf("Calling hipMalloc files from here\n");
|
||||||
int result = hipMallocfunc();
|
int result = hipMallocfunc();
|
||||||
REQUIRE(result == 1);
|
REQUIRE(result == 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,176 +1,176 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
|
||||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||||
|
|
||||||
unsigned threadsPerBlock = 256;
|
unsigned threadsPerBlock = 256;
|
||||||
|
|
||||||
template <unsigned batch, typename T>
|
template <unsigned batch, typename T>
|
||||||
__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
|
__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
|
||||||
T tmp;
|
T tmp;
|
||||||
if (groupElements < batch)
|
if (groupElements < batch)
|
||||||
return;
|
return;
|
||||||
// sdata[tid] += sdata[tid - batch/2] does not work when block size is
|
// sdata[tid] += sdata[tid - batch/2] does not work when block size is
|
||||||
// greater than wave size because one wave may complete before another
|
// greater than wave size because one wave may complete before another
|
||||||
// wave.
|
// wave.
|
||||||
if (tid >= batch/2 && tid < groupElements)
|
if (tid >= batch/2 && tid < groupElements)
|
||||||
tmp = sdata[tid - batch/2];
|
tmp = sdata[tid - batch/2];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if (tid >= batch/2 && tid < groupElements)
|
if (tid >= batch/2 && tid < groupElements)
|
||||||
sdata[tid] += tmp;
|
sdata[tid] += tmp;
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
|
__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
|
||||||
size_t numElements, size_t groupElements) {
|
size_t numElements, size_t groupElements) {
|
||||||
// declare dynamic shared memory
|
// declare dynamic shared memory
|
||||||
extern __shared__ double sdata0[];
|
extern __shared__ double sdata0[];
|
||||||
T* sdata = reinterpret_cast<T *>(sdata0);
|
T* sdata = reinterpret_cast<T *>(sdata0);
|
||||||
|
|
||||||
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||||
size_t tid = threadIdx.x;
|
size_t tid = threadIdx.x;
|
||||||
|
|
||||||
// initialize dynamic shared memory
|
// initialize dynamic shared memory
|
||||||
if (tid < groupElements) {
|
if (tid < groupElements) {
|
||||||
sdata[tid] = static_cast<T>(tid);
|
sdata[tid] = static_cast<T>(tid);
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
// prefix sum inside dynamic shared memory
|
// prefix sum inside dynamic shared memory
|
||||||
sum<512>(sdata, groupElements, tid);
|
sum<512>(sdata, groupElements, tid);
|
||||||
sum<256>(sdata, groupElements, tid);
|
sum<256>(sdata, groupElements, tid);
|
||||||
sum<128>(sdata, groupElements, tid);
|
sum<128>(sdata, groupElements, tid);
|
||||||
sum<64>(sdata, groupElements, tid);
|
sum<64>(sdata, groupElements, tid);
|
||||||
sum<32>(sdata, groupElements, tid);
|
sum<32>(sdata, groupElements, tid);
|
||||||
sum<16>(sdata, groupElements, tid);
|
sum<16>(sdata, groupElements, tid);
|
||||||
sum<8>(sdata, groupElements, tid);
|
sum<8>(sdata, groupElements, tid);
|
||||||
sum<4>(sdata, groupElements, tid);
|
sum<4>(sdata, groupElements, tid);
|
||||||
sum<2>(sdata, groupElements, tid);
|
sum<2>(sdata, groupElements, tid);
|
||||||
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
|
C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void testExternShared(size_t N, unsigned groupElements) {
|
void testExternShared(size_t N, unsigned groupElements) {
|
||||||
size_t Nbytes = N * sizeof(T);
|
size_t Nbytes = N * sizeof(T);
|
||||||
|
|
||||||
T *A_d, *B_d, *C_d;
|
T *A_d, *B_d, *C_d;
|
||||||
T *A_h, *B_h, *C_h;
|
T *A_h, *B_h, *C_h;
|
||||||
|
|
||||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||||
unsigned blocks = N/threadsPerBlock;
|
unsigned blocks = N/threadsPerBlock;
|
||||||
assert(N == blocks * threadsPerBlock);
|
assert(N == blocks * threadsPerBlock);
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// calculate the amount of dynamic shared memory required
|
// calculate the amount of dynamic shared memory required
|
||||||
size_t groupMemBytes = groupElements * sizeof(T);
|
size_t groupMemBytes = groupElements * sizeof(T);
|
||||||
|
|
||||||
// launch kernel with dynamic shared memory
|
// launch kernel with dynamic shared memory
|
||||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
|
hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
|
||||||
dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
|
dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
|
||||||
N, groupElements);
|
N, groupElements);
|
||||||
|
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
// verify
|
// verify
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
size_t tid = (i % min(threadsPerBlock, groupElements));
|
size_t tid = (i % min(threadsPerBlock, groupElements));
|
||||||
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
|
T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
|
||||||
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
|
T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
|
||||||
REQUIRE(C_h[i] == expected);
|
REQUIRE(C_h[i] == expected);
|
||||||
}
|
}
|
||||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||||
* Method to invocate kernel functions
|
* Method to invocate kernel functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - launch kernel with dynamic shared memory for float and double
|
* - launch kernel with dynamic shared memory for float and double
|
||||||
* datatypes and verify the results.
|
* datatypes and verify the results.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipDynamicShared.cc
|
* - catch/unit/kernel/hipDynamicShared.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipDynamicShared") {
|
TEST_CASE("Unit_hipDynamicShared") {
|
||||||
SECTION("test case with float for least size") {
|
SECTION("test case with float for least size") {
|
||||||
testExternShared<float>(1024, 4);
|
testExternShared<float>(1024, 4);
|
||||||
testExternShared<float>(1024, 8);
|
testExternShared<float>(1024, 8);
|
||||||
testExternShared<float>(1024, 16);
|
testExternShared<float>(1024, 16);
|
||||||
testExternShared<float>(1024, 32);
|
testExternShared<float>(1024, 32);
|
||||||
testExternShared<float>(1024, 64);
|
testExternShared<float>(1024, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("test case with float for max size") {
|
SECTION("test case with float for max size") {
|
||||||
testExternShared<float>(65536, 4);
|
testExternShared<float>(65536, 4);
|
||||||
testExternShared<float>(65536, 8);
|
testExternShared<float>(65536, 8);
|
||||||
testExternShared<float>(65536, 16);
|
testExternShared<float>(65536, 16);
|
||||||
testExternShared<float>(65536, 32);
|
testExternShared<float>(65536, 32);
|
||||||
testExternShared<float>(65536, 64);
|
testExternShared<float>(65536, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("test case with double for least size") {
|
SECTION("test case with double for least size") {
|
||||||
testExternShared<double>(1024, 4);
|
testExternShared<double>(1024, 4);
|
||||||
testExternShared<double>(1024, 8);
|
testExternShared<double>(1024, 8);
|
||||||
testExternShared<double>(1024, 16);
|
testExternShared<double>(1024, 16);
|
||||||
testExternShared<double>(1024, 32);
|
testExternShared<double>(1024, 32);
|
||||||
testExternShared<double>(1024, 64);
|
testExternShared<double>(1024, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("test case with double for max size") {
|
SECTION("test case with double for max size") {
|
||||||
testExternShared<double>(65536, 4);
|
testExternShared<double>(65536, 4);
|
||||||
testExternShared<double>(65536, 8);
|
testExternShared<double>(65536, 8);
|
||||||
testExternShared<double>(65536, 16);
|
testExternShared<double>(65536, 16);
|
||||||
testExternShared<double>(65536, 32);
|
testExternShared<double>(65536, 32);
|
||||||
testExternShared<double>(65536, 64);
|
testExternShared<double>(65536, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
SECTION("test case with float for max LDS size") {
|
SECTION("test case with float for max LDS size") {
|
||||||
int maxLDS = 0;
|
int maxLDS = 0;
|
||||||
HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
|
HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
|
||||||
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
|
||||||
testExternShared<float>(1024, maxLDS/sizeof(float));
|
testExternShared<float>(1024, maxLDS/sizeof(float));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,94 +1,94 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
|
||||||
#define LEN (16 * 1024)
|
#define LEN (16 * 1024)
|
||||||
#define SIZE (LEN * sizeof(float))
|
#define SIZE (LEN * sizeof(float))
|
||||||
|
|
||||||
__global__ void vectorAdd(float* Ad, float* Bd) {
|
__global__ void vectorAdd(float* Ad, float* Bd) {
|
||||||
extern __shared__ float sBd[];
|
extern __shared__ float sBd[];
|
||||||
int tx = threadIdx.x;
|
int tx = threadIdx.x;
|
||||||
for (int i = 0; i < LEN / 64; i++) {
|
for (int i = 0; i < LEN / 64; i++) {
|
||||||
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
|
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
|
||||||
Bd[tx + i * 64] = sBd[tx + i * 64];
|
Bd[tx + i * 64] = sBd[tx + i * 64];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||||
* Method to invocate kernel functions
|
* Method to invocate kernel functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Assign max dynamic shared memory to kernel function and
|
* - Assign max dynamic shared memory to kernel function and
|
||||||
* verify the results.
|
* verify the results.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipDynamicShared2.cc
|
* - catch/unit/kernel/hipDynamicShared2.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipDynamicShared2") {
|
TEST_CASE("Unit_hipDynamicShared2") {
|
||||||
float *A, *B, *Ad, *Bd;
|
float *A, *B, *Ad, *Bd;
|
||||||
A = new float[LEN];
|
A = new float[LEN];
|
||||||
B = new float[LEN];
|
B = new float[LEN];
|
||||||
for (int i = 0; i < LEN; i++) {
|
for (int i = 0; i < LEN; i++) {
|
||||||
A[i] = 1.0f;
|
A[i] = 1.0f;
|
||||||
B[i] = 1.0f;
|
B[i] = 1.0f;
|
||||||
}
|
}
|
||||||
HIP_CHECK(hipMalloc(&Ad, SIZE));
|
HIP_CHECK(hipMalloc(&Ad, SIZE));
|
||||||
HIP_CHECK(hipMalloc(&Bd, SIZE));
|
HIP_CHECK(hipMalloc(&Bd, SIZE));
|
||||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
hipError_t ret = hipFuncSetAttribute(
|
hipError_t ret = hipFuncSetAttribute(
|
||||||
reinterpret_cast<const void*>(&vectorAdd),
|
reinterpret_cast<const void*>(&vectorAdd),
|
||||||
hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
|
hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
|
||||||
|
|
||||||
REQUIRE(ret == hipSuccess);
|
REQUIRE(ret == hipSuccess);
|
||||||
hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
|
hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
|
||||||
HIP_CHECK(hipGetLastError());
|
HIP_CHECK(hipGetLastError());
|
||||||
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
|
||||||
for (int i = 0; i < LEN; i++) {
|
for (int i = 0; i < LEN; i++) {
|
||||||
assert(B[i] > 1.0f && B[i] < 3.0f);
|
assert(B[i] > 1.0f && B[i] < 3.0f);
|
||||||
}
|
}
|
||||||
HIP_CHECK(hipFree(Ad));
|
HIP_CHECK(hipFree(Ad));
|
||||||
HIP_CHECK(hipFree(Bd));
|
HIP_CHECK(hipFree(Bd));
|
||||||
|
|
||||||
delete[] A;
|
delete[] A;
|
||||||
delete[] B;
|
delete[] B;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,59 +1,59 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
|
||||||
#pragma clang diagnostic ignored "-Wunused-parameter"
|
#pragma clang diagnostic ignored "-Wunused-parameter"
|
||||||
|
|
||||||
__global__ void Empty(int param) {}
|
__global__ void Empty(int param) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||||
* Method to invocate kernel functions
|
* Method to invocate kernel functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - pass empty Kernel function.
|
* - pass empty Kernel function.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipEmptyKernel.cc
|
* - catch/unit/kernel/hipEmptyKernel.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipEmptyKernel") {
|
TEST_CASE("Unit_hipEmptyKernel") {
|
||||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
|
hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,138 +1,138 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
// Test the Grid_Launch syntax.
|
// Test the Grid_Launch syntax.
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#include "hip/hip_ext.h"
|
#include "hip/hip_ext.h"
|
||||||
|
|
||||||
static unsigned threadsPerBlock = 256;
|
static unsigned threadsPerBlock = 256;
|
||||||
static unsigned blocksPerCU = 6;
|
static unsigned blocksPerCU = 6;
|
||||||
|
|
||||||
struct _t {
|
struct _t {
|
||||||
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct _t _T;
|
typedef struct _t _T;
|
||||||
|
|
||||||
__global__ void sKernel(_T s, double *a) {
|
__global__ void sKernel(_T s, double *a) {
|
||||||
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
|
*a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void mKernel(char f, int16_t a, int b, double c,
|
__global__ void mKernel(char f, int16_t a, int b, double c,
|
||||||
int16_t d, int e, double* res) {
|
int16_t d, int e, double* res) {
|
||||||
*res = a + b + c + d + e + f;
|
*res = a + b + c + d + e + f;
|
||||||
}
|
}
|
||||||
|
|
||||||
void testMixData() {
|
void testMixData() {
|
||||||
double m = 0;
|
double m = 0;
|
||||||
double *d_m;
|
double *d_m;
|
||||||
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
||||||
int a = 1, e = 10;
|
int a = 1, e = 10;
|
||||||
int16_t b = 2, d = 4;
|
int16_t b = 2, d = 4;
|
||||||
double c = 3.0;
|
double c = 3.0;
|
||||||
char ff = 10;
|
char ff = 10;
|
||||||
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
|
hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
|
||||||
b, a, c, d, e, d_m);
|
b, a, c, d, e, d_m);
|
||||||
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||||
REQUIRE(m == 30.0);
|
REQUIRE(m == 30.0);
|
||||||
HIP_CHECK(hipFree(d_m));
|
HIP_CHECK(hipFree(d_m));
|
||||||
}
|
}
|
||||||
|
|
||||||
void testStruct() {
|
void testStruct() {
|
||||||
double m = 0;
|
double m = 0;
|
||||||
double *d_m;
|
double *d_m;
|
||||||
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
|
||||||
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
_T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
|
||||||
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
|
hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
|
||||||
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
|
||||||
REQUIRE(m == 55.0);
|
REQUIRE(m == 55.0);
|
||||||
HIP_CHECK(hipFree(d_m));
|
HIP_CHECK(hipFree(d_m));
|
||||||
}
|
}
|
||||||
|
|
||||||
void test(size_t N) {
|
void test(size_t N) {
|
||||||
size_t Nbytes = N * sizeof(int);
|
size_t Nbytes = N * sizeof(int);
|
||||||
int *A_d, *B_d, *C_d;
|
int *A_d, *B_d, *C_d;
|
||||||
int *A_h, *B_h, *C_h;
|
int *A_h, *B_h, *C_h;
|
||||||
|
|
||||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||||
|
|
||||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
|
||||||
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
|
dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
|
||||||
static_cast<const int*>(A_d),
|
static_cast<const int*>(A_d),
|
||||||
static_cast<const int*>(B_d), C_d, N);
|
static_cast<const int*>(B_d), C_d, N);
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
|
* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream,
|
std::uint32_t sharedMemBytes, hipStream_t stream,
|
||||||
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
|
hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
|
||||||
Args... args)` -
|
Args... args)` -
|
||||||
* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
|
* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test case to verify sample array with hipExtLaunchKernelGGL()
|
* - Test case to verify sample array with hipExtLaunchKernelGGL()
|
||||||
* and verify the results.
|
* and verify the results.
|
||||||
* - Test case to verify struct data with hipExtLaunchKernelGGL()
|
* - Test case to verify struct data with hipExtLaunchKernelGGL()
|
||||||
* and verify the results.
|
* and verify the results.
|
||||||
* - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
|
* - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
|
||||||
* and verify the results.
|
* and verify the results.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipExtLaunchKernelGGL.cc
|
* - catch/unit/kernel/hipExtLaunchKernelGGL.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipExtLaunchKernelGGL") {
|
TEST_CASE("Unit_hipExtLaunchKernelGGL") {
|
||||||
SECTION("test run") {
|
SECTION("test run") {
|
||||||
size_t N = 4 * 1024 * 1024;
|
size_t N = 4 * 1024 * 1024;
|
||||||
test(N);
|
test(N);
|
||||||
}
|
}
|
||||||
SECTION("testStruct run") {
|
SECTION("testStruct run") {
|
||||||
testStruct();
|
testStruct();
|
||||||
}
|
}
|
||||||
SECTION("testMixData run") {
|
SECTION("testMixData run") {
|
||||||
testMixData();
|
testMixData();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,122 +1,122 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
// Test the Grid_Launch syntax.
|
// Test the Grid_Launch syntax.
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
|
||||||
static unsigned threadsPerBlock = 256;
|
static unsigned threadsPerBlock = 256;
|
||||||
static unsigned blocksPerCU = 6;
|
static unsigned blocksPerCU = 6;
|
||||||
|
|
||||||
// __device__ maps to __attribute__((hc))
|
// __device__ maps to __attribute__((hc))
|
||||||
__device__ int foo(int i) { return i + 1; }
|
__device__ int foo(int i) { return i + 1; }
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
|
__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
|
||||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||||
size_t stride = blockDim.x * gridDim.x;
|
size_t stride = blockDim.x * gridDim.x;
|
||||||
|
|
||||||
for (size_t i = offset; i < N; i += stride) {
|
for (size_t i = offset; i < N; i += stride) {
|
||||||
double foo = __hiloint2double(A_d[i], B_d[i]);
|
double foo = __hiloint2double(A_d[i], B_d[i]);
|
||||||
C_d[i] = __double2loint(foo) + __double2hiint(foo);
|
C_d[i] = __double2loint(foo) + __double2hiint(foo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_gl2(size_t N) {
|
int test_gl2(size_t N) {
|
||||||
size_t Nbytes = N * sizeof(int);
|
size_t Nbytes = N * sizeof(int);
|
||||||
int *A_d, *B_d, *C_d;
|
int *A_d, *B_d, *C_d;
|
||||||
int *A_h, *B_h, *C_h;
|
int *A_h, *B_h, *C_h;
|
||||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||||
|
|
||||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||||
|
|
||||||
// Full vadd in one large chunk, to get things started:
|
// Full vadd in one large chunk, to get things started:
|
||||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, 0, A_d, B_d, C_d, N);
|
0, 0, A_d, B_d, C_d, N);
|
||||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
// verify
|
// verify
|
||||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if __HIP__
|
#if __HIP__
|
||||||
int test_triple_chevron(size_t N) {
|
int test_triple_chevron(size_t N) {
|
||||||
size_t Nbytes = N * sizeof(int);
|
size_t Nbytes = N * sizeof(int);
|
||||||
int *A_d, *B_d, *C_d;
|
int *A_d, *B_d, *C_d;
|
||||||
int *A_h, *B_h, *C_h;
|
int *A_h, *B_h, *C_h;
|
||||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||||
|
|
||||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||||
// Full vadd in one large chunk, to get things started:
|
// Full vadd in one large chunk, to get things started:
|
||||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
|
vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
|
||||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
// verify
|
// verify
|
||||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||||
* Method to invocate kernel functions
|
* Method to invocate kernel functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test case to verify the Grid_Launch syntax.
|
* - Test case to verify the Grid_Launch syntax.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipGridLaunch.cc
|
* - catch/unit/kernel/hipGridLaunch.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipGridLaunch") {
|
TEST_CASE("Unit_hipGridLaunch") {
|
||||||
size_t N = 4 * 1024 * 1024;
|
size_t N = 4 * 1024 * 1024;
|
||||||
SECTION("Test test_gl2") {
|
SECTION("Test test_gl2") {
|
||||||
test_gl2(N);
|
test_gl2(N);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if __HIP__
|
#if __HIP__
|
||||||
SECTION("Test triple_chevron") {
|
SECTION("Test triple_chevron") {
|
||||||
test_triple_chevron(N);
|
test_triple_chevron(N);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,111 +1,111 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
|
|
||||||
#include <hip/math_functions.h>
|
#include <hip/math_functions.h>
|
||||||
|
|
||||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||||
#pragma clang diagnostic ignored "-Wuninitialized"
|
#pragma clang diagnostic ignored "-Wuninitialized"
|
||||||
|
|
||||||
// Simple tests for variable type qualifiers:
|
// Simple tests for variable type qualifiers:
|
||||||
__device__ int deviceVar;
|
__device__ int deviceVar;
|
||||||
|
|
||||||
// TODO-HCC __constant__ not working yet.
|
// TODO-HCC __constant__ not working yet.
|
||||||
__constant__ int constantVar1;
|
__constant__ int constantVar1;
|
||||||
|
|
||||||
__constant__ __device__ int constantVar2;
|
__constant__ __device__ int constantVar2;
|
||||||
|
|
||||||
// Test HOST space:
|
// Test HOST space:
|
||||||
__host__ void foo() { printf("foo!\n"); }
|
__host__ void foo() { printf("foo!\n"); }
|
||||||
|
|
||||||
__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
|
__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
|
||||||
__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
|
__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
|
||||||
|
|
||||||
|
|
||||||
__device__ __host__ float PlusOne(float x) { return x + 1.0; }
|
__device__ __host__ float PlusOne(float x) { return x + 1.0; }
|
||||||
|
|
||||||
__global__ void MyKernel(const float* a, const float* b, float* c,
|
__global__ void MyKernel(const float* a, const float* b, float* c,
|
||||||
unsigned N) {
|
unsigned N) {
|
||||||
unsigned gid = threadIdx.x;
|
unsigned gid = threadIdx.x;
|
||||||
if (gid < N) {
|
if (gid < N) {
|
||||||
c[gid] = a[gid] + PlusOne(b[gid]);
|
c[gid] = a[gid] + PlusOne(b[gid]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void callMyKernel() {
|
void callMyKernel() {
|
||||||
float *a, *b, *c;
|
float *a, *b, *c;
|
||||||
const unsigned blockSize = 256;
|
const unsigned blockSize = 256;
|
||||||
unsigned N = blockSize;
|
unsigned N = blockSize;
|
||||||
hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
|
hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
|
||||||
0, 0, a, b, c, N);
|
0, 0, a, b, c, N);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
|
__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
|
||||||
#ifdef NOT_YET
|
#ifdef NOT_YET
|
||||||
int a = __shfl_up(x, 1);
|
int a = __shfl_up(x, 1);
|
||||||
#endif
|
#endif
|
||||||
float x = 1.0;
|
float x = 1.0;
|
||||||
#ifdef NOT_YET
|
#ifdef NOT_YET
|
||||||
float fastZ = __sin(x);
|
float fastZ = __sin(x);
|
||||||
#endif
|
#endif
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||||
size_t stride = blockDim.x * gridDim.x;
|
size_t stride = blockDim.x * gridDim.x;
|
||||||
|
|
||||||
for (size_t i = offset; i < N; i += stride) {
|
for (size_t i = offset; i < N; i += stride) {
|
||||||
C_d[i] = A_d[i] + B_d[i];
|
C_d[i] = A_d[i] + B_d[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||||
* Method to invocate kernel functions
|
* Method to invocate kernel functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Collection of code to make sure that various features
|
* - Collection of code to make sure that various features
|
||||||
* in the hip kernel language compile.
|
* in the hip kernel language compile.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipLanguageExtensions.cc
|
* - catch/unit/kernel/hipLanguageExtensions.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipLanguageExtensions") {
|
TEST_CASE("Unit_hipLanguageExtensions") {
|
||||||
REQUIRE(true);
|
REQUIRE(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -1,464 +1,464 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
|
|
||||||
class HipFunctorTests {
|
class HipFunctorTests {
|
||||||
public:
|
public:
|
||||||
// Test that a class functor can be passed to hiplaunchparam
|
// Test that a class functor can be passed to hiplaunchparam
|
||||||
// and can be used in kernel
|
// and can be used in kernel
|
||||||
void TestForSimpleClassFunctor(void);
|
void TestForSimpleClassFunctor(void);
|
||||||
// Test that a templated class functor can be passed to hiplaunchparam
|
// Test that a templated class functor can be passed to hiplaunchparam
|
||||||
// and can be used in kernel
|
// and can be used in kernel
|
||||||
void TestForClassTemplateFunctor(void);
|
void TestForClassTemplateFunctor(void);
|
||||||
// Test that a class functor object ptr can be passed to hiplaunchparam
|
// Test that a class functor object ptr can be passed to hiplaunchparam
|
||||||
// and can be used in kernel
|
// and can be used in kernel
|
||||||
void TestForClassObjPtrFunctor(void);
|
void TestForClassObjPtrFunctor(void);
|
||||||
// Test that a class object containing functor can be passed
|
// Test that a class object containing functor can be passed
|
||||||
// to hiplaunchparam and can be used in kernel
|
// to hiplaunchparam and can be used in kernel
|
||||||
void TestForFunctorContainInClassObj(void);
|
void TestForFunctorContainInClassObj(void);
|
||||||
// Test that a stuct functor can be passed to hiplaunchparam
|
// Test that a stuct functor can be passed to hiplaunchparam
|
||||||
// and can be used in kernel
|
// and can be used in kernel
|
||||||
void TestForSimpleStructFunctor(void);
|
void TestForSimpleStructFunctor(void);
|
||||||
// Test that a stuct functor object ptr can be passed to hiplaunchparam
|
// Test that a stuct functor object ptr can be passed to hiplaunchparam
|
||||||
// and can be used in kernel
|
// and can be used in kernel
|
||||||
void TestForStructObjPtrFunctor(void);
|
void TestForStructObjPtrFunctor(void);
|
||||||
// Test that a templated struct functor can be passed to hiplaunchparam
|
// Test that a templated struct functor can be passed to hiplaunchparam
|
||||||
// and can be used in kernel
|
// and can be used in kernel
|
||||||
void TestForStructTemplateFunctor(void);
|
void TestForStructTemplateFunctor(void);
|
||||||
// Test that a struct object containing functor can be
|
// Test that a struct object containing functor can be
|
||||||
// passed to hiplaunchparam and can be used in kernel
|
// passed to hiplaunchparam and can be used in kernel
|
||||||
void TestForFunctorContainInStructObj(void);
|
void TestForFunctorContainInStructObj(void);
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int BLOCK_DIM_SIZE = 1024;
|
static const int BLOCK_DIM_SIZE = 1024;
|
||||||
static const int THREADS_PER_BLOCK = 1;
|
static const int THREADS_PER_BLOCK = 1;
|
||||||
|
|
||||||
// class functor tests
|
// class functor tests
|
||||||
|
|
||||||
// Simple doubler Functor
|
// Simple doubler Functor
|
||||||
class DoublerFunctor{
|
class DoublerFunctor{
|
||||||
public:
|
public:
|
||||||
__device__ int operator()(int x) { return x * 2;}
|
__device__ int operator()(int x) { return x * 2;}
|
||||||
};
|
};
|
||||||
|
|
||||||
// simple doubler functor passed to kernel
|
// simple doubler functor passed to kernel
|
||||||
__global__ void DoublerFunctorKernel(
|
__global__ void DoublerFunctorKernel(
|
||||||
DoublerFunctor doubler_,
|
DoublerFunctor doubler_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int result = doubler_(5);
|
int result = doubler_(5);
|
||||||
deviceResult[x] = (result == 10);
|
deviceResult[x] = (result == 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForSimpleClassFunctor(void) {
|
void HipFunctorTests::TestForSimpleClassFunctor(void) {
|
||||||
DoublerFunctor doubler;
|
DoublerFunctor doubler;
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForSimpleClassFunctor
|
// Validation part of TestForSimpleClassFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
}
|
}
|
||||||
|
|
||||||
// pointer functor passed to kernel
|
// pointer functor passed to kernel
|
||||||
__global__ void PtrDoublerFunctorKernel(
|
__global__ void PtrDoublerFunctorKernel(
|
||||||
DoublerFunctor *doubler_,
|
DoublerFunctor *doubler_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int result = (*doubler_)(5);
|
int result = (*doubler_)(5);
|
||||||
deviceResult[x] = (result == 10);
|
deviceResult[x] = (result == 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForClassObjPtrFunctor(void) {
|
void HipFunctorTests::TestForClassObjPtrFunctor(void) {
|
||||||
DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
|
DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForClassObjPtrFunctor
|
// Validation part of TestForClassObjPtrFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
delete[] ptrdoubler;
|
delete[] ptrdoubler;
|
||||||
}
|
}
|
||||||
|
|
||||||
class compare {
|
class compare {
|
||||||
public:
|
public:
|
||||||
template<typename T1, typename T2>
|
template<typename T1, typename T2>
|
||||||
__device__ bool operator()(const T1& v1, const T2& v2) {
|
__device__ bool operator()(const T1& v1, const T2& v2) {
|
||||||
return v1 > v2;
|
return v1 > v2;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// template functor passed to kernel
|
// template functor passed to kernel
|
||||||
__global__ void TemplateFunctorKernel(
|
__global__ void TemplateFunctorKernel(
|
||||||
compare compare_,
|
compare compare_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
deviceResult[x] = compare_(2.2, 2.1);
|
deviceResult[x] = compare_(2.2, 2.1);
|
||||||
deviceResult[x] = compare_(2, 1);
|
deviceResult[x] = compare_(2, 1);
|
||||||
deviceResult[x] = compare_('b', 'a');
|
deviceResult[x] = compare_('b', 'a');
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForClassTemplateFunctor(void) {
|
void HipFunctorTests::TestForClassTemplateFunctor(void) {
|
||||||
compare comparefunctor;
|
compare comparefunctor;
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForClassTemplateFunctor
|
// Validation part of TestForClassTemplateFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Doubler calculator
|
// Doubler calculator
|
||||||
class DoublerCalculator {
|
class DoublerCalculator {
|
||||||
public:
|
public:
|
||||||
int a, result;
|
int a, result;
|
||||||
// fucntor contained in class object
|
// fucntor contained in class object
|
||||||
DoublerFunctor doubler;
|
DoublerFunctor doubler;
|
||||||
};
|
};
|
||||||
|
|
||||||
// doubler functor conatined in class obj passed to kernel
|
// doubler functor conatined in class obj passed to kernel
|
||||||
__global__ void DoublerCalculatorFunctorKernel(
|
__global__ void DoublerCalculatorFunctorKernel(
|
||||||
DoublerCalculator doubler_,
|
DoublerCalculator doubler_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int result = doubler_.doubler(doubler_.a);
|
int result = doubler_.doubler(doubler_.a);
|
||||||
deviceResult[x] = (doubler_.result == result);
|
deviceResult[x] = (doubler_.result == result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForFunctorContainInClassObj(void) {
|
void HipFunctorTests::TestForFunctorContainInClassObj(void) {
|
||||||
DoublerCalculator Doubler;
|
DoublerCalculator Doubler;
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Doubler.a = 5;
|
Doubler.a = 5;
|
||||||
Doubler.result = 10;
|
Doubler.result = 10;
|
||||||
// pass comparefunctor to hipLaunchParm
|
// pass comparefunctor to hipLaunchParm
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForStructTemplateFunctor
|
// Validation part of TestForStructTemplateFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Struct functor tests
|
// Struct functor tests
|
||||||
|
|
||||||
// Simple doubler Functor
|
// Simple doubler Functor
|
||||||
struct sDoublerFunctor {
|
struct sDoublerFunctor {
|
||||||
public:
|
public:
|
||||||
__device__ int operator()(int x) { return x * 2;}
|
__device__ int operator()(int x) { return x * 2;}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// simple sturct doubler functor passed to kernel
|
// simple sturct doubler functor passed to kernel
|
||||||
__global__ void structDoublerFunctorKernel(
|
__global__ void structDoublerFunctorKernel(
|
||||||
sDoublerFunctor doubler_,
|
sDoublerFunctor doubler_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int result = doubler_(5);
|
int result = doubler_(5);
|
||||||
deviceResult[x] = (result == 10);
|
deviceResult[x] = (result == 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForSimpleStructFunctor(void) {
|
void HipFunctorTests::TestForSimpleStructFunctor(void) {
|
||||||
sDoublerFunctor doubler;
|
sDoublerFunctor doubler;
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForSimpleStructFunctor
|
// Validation part of TestForSimpleStructFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
}
|
}
|
||||||
|
|
||||||
// ptr functor passed to kernel
|
// ptr functor passed to kernel
|
||||||
__global__ void structPtrDoublerFunctorKernel(
|
__global__ void structPtrDoublerFunctorKernel(
|
||||||
sDoublerFunctor *doubler_,
|
sDoublerFunctor *doubler_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int result = (*doubler_)(5);
|
int result = (*doubler_)(5);
|
||||||
deviceResult[x] = (result == 10);
|
deviceResult[x] = (result == 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForStructObjPtrFunctor(void) {
|
void HipFunctorTests::TestForStructObjPtrFunctor(void) {
|
||||||
sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
|
sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForStructObjPtrFunctor
|
// Validation part of TestForStructObjPtrFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
delete[] ptrdoubler;
|
delete[] ptrdoubler;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct sCompare {
|
struct sCompare {
|
||||||
public:
|
public:
|
||||||
template< typename T1, typename T2 >
|
template< typename T1, typename T2 >
|
||||||
__device__ bool operator()(const T1& v1, const T2& v2) {
|
__device__ bool operator()(const T1& v1, const T2& v2) {
|
||||||
return v1 > v2;
|
return v1 > v2;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// template functor passed to kernel
|
// template functor passed to kernel
|
||||||
__global__ void structTemplateFunctorKernel(
|
__global__ void structTemplateFunctorKernel(
|
||||||
sCompare compare_,
|
sCompare compare_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
deviceResult[x] = compare_(2.2, 2.1);
|
deviceResult[x] = compare_(2.2, 2.1);
|
||||||
deviceResult[x] = compare_(2, 1);
|
deviceResult[x] = compare_(2, 1);
|
||||||
deviceResult[x] = compare_('b', 'a');
|
deviceResult[x] = compare_('b', 'a');
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForStructTemplateFunctor(void) {
|
void HipFunctorTests::TestForStructTemplateFunctor(void) {
|
||||||
sCompare comparefunctor;
|
sCompare comparefunctor;
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// pass comparefunctor to hipLaunchKernelGGL
|
// pass comparefunctor to hipLaunchKernelGGL
|
||||||
hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForStructTemplateFunctor
|
// Validation part of TestForStructTemplateFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Doubler calculator struct
|
// Doubler calculator struct
|
||||||
struct sDoublerCalculator {
|
struct sDoublerCalculator {
|
||||||
public:
|
public:
|
||||||
int a, result;
|
int a, result;
|
||||||
// fucntor contained in class object
|
// fucntor contained in class object
|
||||||
DoublerFunctor doubler;
|
DoublerFunctor doubler;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// doubler functor contained in struct passed to kernel
|
// doubler functor contained in struct passed to kernel
|
||||||
__global__ void DoublerCalculatorFunctorKernel(
|
__global__ void DoublerCalculatorFunctorKernel(
|
||||||
sDoublerCalculator doubler_,
|
sDoublerCalculator doubler_,
|
||||||
bool* deviceResult) {
|
bool* deviceResult) {
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int result = doubler_.doubler(doubler_.a);
|
int result = doubler_.doubler(doubler_.a);
|
||||||
deviceResult[x] = (doubler_.result == result);
|
deviceResult[x] = (doubler_.result == result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HipFunctorTests::TestForFunctorContainInStructObj(void) {
|
void HipFunctorTests::TestForFunctorContainInStructObj(void) {
|
||||||
sDoublerCalculator Doubler;
|
sDoublerCalculator Doubler;
|
||||||
bool *deviceResults, *hostResults;
|
bool *deviceResults, *hostResults;
|
||||||
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
|
||||||
// initialize to false, will be set to
|
// initialize to false, will be set to
|
||||||
// true if the functor is called in device code
|
// true if the functor is called in device code
|
||||||
hostResults[k] = false;
|
hostResults[k] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Doubler.a = 5;
|
Doubler.a = 5;
|
||||||
Doubler.result = 10;
|
Doubler.result = 10;
|
||||||
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyHostToDevice));
|
hipMemcpyHostToDevice));
|
||||||
|
|
||||||
|
|
||||||
// pass comparefunctor to hipLaunchKernelGGL
|
// pass comparefunctor to hipLaunchKernelGGL
|
||||||
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
|
||||||
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
|
||||||
|
|
||||||
// Validation part of TestForStructTemplateFunctor
|
// Validation part of TestForStructTemplateFunctor
|
||||||
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
|
||||||
REQUIRE(hostResults[k] == true);
|
REQUIRE(hostResults[k] == true);
|
||||||
HIP_CHECK(hipHostFree(hostResults));
|
HIP_CHECK(hipHostFree(hostResults));
|
||||||
HIP_CHECK(hipFree(deviceResults));
|
HIP_CHECK(hipFree(deviceResults));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
|
||||||
* @{
|
* @{
|
||||||
* @ingroup KernelTest
|
* @ingroup KernelTest
|
||||||
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
|
||||||
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
|
||||||
* Method to invocate kernel functions
|
* Method to invocate kernel functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test that a class functor can be passed to hiplaunchparam
|
* - Test that a class functor can be passed to hiplaunchparam
|
||||||
* and can be used in kernel.
|
* and can be used in kernel.
|
||||||
* - Test that a templated class functor can be passed to hiplaunchparam
|
* - Test that a templated class functor can be passed to hiplaunchparam
|
||||||
* and can be used in kernel.
|
* and can be used in kernel.
|
||||||
* - Test that a class functor object ptr can be passed to hiplaunchparam
|
* - Test that a class functor object ptr can be passed to hiplaunchparam
|
||||||
* and can be used in kernel.
|
* and can be used in kernel.
|
||||||
* - Test that a class object containing functor can be passed to hiplaunchparam
|
* - Test that a class object containing functor can be passed to hiplaunchparam
|
||||||
* and can be used in kernel
|
* and can be used in kernel
|
||||||
* - Test that a stuct functor can be passed to hiplaunchparam
|
* - Test that a stuct functor can be passed to hiplaunchparam
|
||||||
* and can be used in kernel
|
* and can be used in kernel
|
||||||
* - Test that a stuct functor object ptr can be passed to hiplaunchparam
|
* - Test that a stuct functor object ptr can be passed to hiplaunchparam
|
||||||
* and can be used in kernel
|
* and can be used in kernel
|
||||||
* - Test that a templated struct functor can be passed to hiplaunchparam
|
* - Test that a templated struct functor can be passed to hiplaunchparam
|
||||||
* and can be used in kernel
|
* and can be used in kernel
|
||||||
* - Test that a struct object containing functor can be passed to hiplaunchparam
|
* - Test that a struct object containing functor can be passed to hiplaunchparam
|
||||||
* and can be used in kernel
|
* and can be used in kernel
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipLaunchParmFunctor.cc
|
* - catch/unit/kernel/hipLaunchParmFunctor.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipLaunchParmFunctor") {
|
TEST_CASE("Unit_hipLaunchParmFunctor") {
|
||||||
HipFunctorTests FunctorTests;
|
HipFunctorTests FunctorTests;
|
||||||
|
|
||||||
SECTION("test for simple class functor") {
|
SECTION("test for simple class functor") {
|
||||||
FunctorTests.TestForSimpleClassFunctor();
|
FunctorTests.TestForSimpleClassFunctor();
|
||||||
}
|
}
|
||||||
SECTION("test for class objptr functor") {
|
SECTION("test for class objptr functor") {
|
||||||
FunctorTests.TestForClassObjPtrFunctor();
|
FunctorTests.TestForClassObjPtrFunctor();
|
||||||
}
|
}
|
||||||
SECTION("test for class templete functor") {
|
SECTION("test for class templete functor") {
|
||||||
FunctorTests.TestForClassTemplateFunctor();
|
FunctorTests.TestForClassTemplateFunctor();
|
||||||
}
|
}
|
||||||
SECTION("test for simple struct functor") {
|
SECTION("test for simple struct functor") {
|
||||||
FunctorTests.TestForSimpleStructFunctor();
|
FunctorTests.TestForSimpleStructFunctor();
|
||||||
}
|
}
|
||||||
SECTION("test for struct objptr functor") {
|
SECTION("test for struct objptr functor") {
|
||||||
FunctorTests.TestForStructObjPtrFunctor();
|
FunctorTests.TestForStructObjPtrFunctor();
|
||||||
}
|
}
|
||||||
SECTION("test for struct templete functor") {
|
SECTION("test for struct templete functor") {
|
||||||
FunctorTests.TestForStructTemplateFunctor();
|
FunctorTests.TestForStructTemplateFunctor();
|
||||||
}
|
}
|
||||||
SECTION("test for functor contain in classobj") {
|
SECTION("test for functor contain in classobj") {
|
||||||
FunctorTests.TestForFunctorContainInClassObj();
|
FunctorTests.TestForFunctorContainInClassObj();
|
||||||
}
|
}
|
||||||
SECTION("test for functor contain in structobj") {
|
SECTION("test for functor contain in structobj") {
|
||||||
FunctorTests.TestForFunctorContainInStructObj();
|
FunctorTests.TestForFunctorContainInStructObj();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group KernelTest.
|
* End doxygen group KernelTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ void verify_linked_lists_on_device(hipStream_t stream, Node* pNodes,
|
|||||||
unsigned int* pNumCorrect, unsigned int numLists,
|
unsigned int* pNumCorrect, unsigned int numLists,
|
||||||
unsigned int ListLength) {
|
unsigned int ListLength) {
|
||||||
*pNumCorrect = 0; // reset numCorrect to zero
|
*pNumCorrect = 0; // reset numCorrect to zero
|
||||||
|
|
||||||
verify_linked_lists_on_device<<<(numLists + 255) / 256, 256, 0, stream>>>(pNodes, pNumCorrect,
|
verify_linked_lists_on_device<<<(numLists + 255) / 256, 256, 0, stream>>>(pNodes, pNumCorrect,
|
||||||
ListLength);
|
ListLength);
|
||||||
|
|
||||||
|
|||||||
@@ -1,24 +1,24 @@
|
|||||||
# Common Tests - Test independent of all platforms
|
# Common Tests - Test independent of all platforms
|
||||||
# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
|
# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
|
||||||
# /catch/unit/p2p folder and its dependent files.
|
# /catch/unit/p2p folder and its dependent files.
|
||||||
set(TEST_SRC
|
set(TEST_SRC
|
||||||
hipDeviceGetP2PAttribute.cc
|
hipDeviceGetP2PAttribute.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
# only for AMD
|
# only for AMD
|
||||||
if(HIP_PLATFORM MATCHES "amd")
|
if(HIP_PLATFORM MATCHES "amd")
|
||||||
set(AMD_SRC
|
set(AMD_SRC
|
||||||
hipP2pLinkTypeAndHopFunc.cc
|
hipP2pLinkTypeAndHopFunc.cc
|
||||||
)
|
)
|
||||||
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
|
set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
|
||||||
|
|
||||||
add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
|
add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
|
||||||
|
|
||||||
hip_add_exe_to_target(NAME p2pTests
|
hip_add_exe_to_target(NAME p2pTests
|
||||||
TEST_SRC ${TEST_SRC}
|
TEST_SRC ${TEST_SRC}
|
||||||
TEST_TARGET_NAME build_tests)
|
TEST_TARGET_NAME build_tests)
|
||||||
|
|
||||||
add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
|
add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
|
||||||
|
|||||||
@@ -1,356 +1,356 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "hipP2pLinkTypeAndHopFunc.h"
|
#include "hipP2pLinkTypeAndHopFunc.h"
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/wait.h>
|
#include <sys/wait.h>
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#endif
|
#endif
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#define MAX_SIZE 30
|
#define MAX_SIZE 30
|
||||||
#define VISIBLE_DEVICE 0
|
#define VISIBLE_DEVICE 0
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetches Gpu device count
|
* Fetches Gpu device count
|
||||||
*/
|
*/
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
void getDeviceCount(int *pdevCnt) {
|
void getDeviceCount(int *pdevCnt) {
|
||||||
int fd[2], val = 0;
|
int fd[2], val = 0;
|
||||||
pid_t childpid;
|
pid_t childpid;
|
||||||
// create pipe descriptors
|
// create pipe descriptors
|
||||||
pipe(fd);
|
pipe(fd);
|
||||||
// disable visible_devices env from shell
|
// disable visible_devices env from shell
|
||||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||||
unsetenv("HIP_VISIBLE_DEVICES");
|
unsetenv("HIP_VISIBLE_DEVICES");
|
||||||
|
|
||||||
childpid = fork();
|
childpid = fork();
|
||||||
if (childpid > 0) { // Parent
|
if (childpid > 0) { // Parent
|
||||||
close(fd[1]);
|
close(fd[1]);
|
||||||
// parent will wait to read the device cnt
|
// parent will wait to read the device cnt
|
||||||
read(fd[0], &val, sizeof(val));
|
read(fd[0], &val, sizeof(val));
|
||||||
// close the read-descriptor
|
// close the read-descriptor
|
||||||
close(fd[0]);
|
close(fd[0]);
|
||||||
// wait for child exit
|
// wait for child exit
|
||||||
wait(NULL);
|
wait(NULL);
|
||||||
*pdevCnt = val;
|
*pdevCnt = val;
|
||||||
} else if (!childpid) { // Child
|
} else if (!childpid) { // Child
|
||||||
int devCnt = 1;
|
int devCnt = 1;
|
||||||
// writing only, no need for read-descriptor
|
// writing only, no need for read-descriptor
|
||||||
close(fd[0]);
|
close(fd[0]);
|
||||||
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
HIP_CHECK(hipGetDeviceCount(&devCnt));
|
||||||
// send the value on the write-descriptor:
|
// send the value on the write-descriptor:
|
||||||
write(fd[1], &devCnt, sizeof(devCnt));
|
write(fd[1], &devCnt, sizeof(devCnt));
|
||||||
// close the write descriptor:
|
// close the write descriptor:
|
||||||
close(fd[1]);
|
close(fd[1]);
|
||||||
exit(0);
|
exit(0);
|
||||||
} else { // failure
|
} else { // failure
|
||||||
*pdevCnt = 1;
|
*pdevCnt = 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool testMaskedDevice(int actualNumGPUs) {
|
bool testMaskedDevice(int actualNumGPUs) {
|
||||||
bool testResult = true;
|
bool testResult = true;
|
||||||
int fd[2];
|
int fd[2];
|
||||||
pipe(fd);
|
pipe(fd);
|
||||||
|
|
||||||
pid_t cPid;
|
pid_t cPid;
|
||||||
cPid = fork();
|
cPid = fork();
|
||||||
if (cPid == 0) { // child
|
if (cPid == 0) { // child
|
||||||
hipError_t err;
|
hipError_t err;
|
||||||
char visibleDeviceString[MAX_SIZE] = {};
|
char visibleDeviceString[MAX_SIZE] = {};
|
||||||
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
|
||||||
// disable visible_devices env from shell
|
// disable visible_devices env from shell
|
||||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||||
unsetenv("HIP_VISIBLE_DEVICES");
|
unsetenv("HIP_VISIBLE_DEVICES");
|
||||||
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||||
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
|
||||||
uint32_t linktype;
|
uint32_t linktype;
|
||||||
uint32_t hopcount;
|
uint32_t hopcount;
|
||||||
for (int count = 1;
|
for (int count = 1;
|
||||||
count < actualNumGPUs; count++) {
|
count < actualNumGPUs; count++) {
|
||||||
err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
|
err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
|
||||||
VISIBLE_DEVICE+count, &linktype, &hopcount);
|
VISIBLE_DEVICE+count, &linktype, &hopcount);
|
||||||
REQUIRE(err == hipSuccess);
|
REQUIRE(err == hipSuccess);
|
||||||
}
|
}
|
||||||
close(fd[0]);
|
close(fd[0]);
|
||||||
write(fd[1], &testResult, sizeof(testResult));
|
write(fd[1], &testResult, sizeof(testResult));
|
||||||
close(fd[1]);
|
close(fd[1]);
|
||||||
exit(0);
|
exit(0);
|
||||||
|
|
||||||
} else if (cPid > 0) { // parent
|
} else if (cPid > 0) { // parent
|
||||||
close(fd[1]);
|
close(fd[1]);
|
||||||
read(fd[0], &testResult, sizeof(testResult));
|
read(fd[0], &testResult, sizeof(testResult));
|
||||||
close(fd[0]);
|
close(fd[0]);
|
||||||
wait(NULL);
|
wait(NULL);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
printf("Info:fork() failed\n");
|
printf("Info:fork() failed\n");
|
||||||
testResult = false;
|
testResult = false;
|
||||||
}
|
}
|
||||||
return testResult;
|
return testResult;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool testhipInvalidDevice(int numDevices) {
|
bool testhipInvalidDevice(int numDevices) {
|
||||||
hipError_t ret;
|
hipError_t ret;
|
||||||
uint32_t linktype;
|
uint32_t linktype;
|
||||||
uint32_t hopcount;
|
uint32_t hopcount;
|
||||||
SECTION("Invalid device number case 1") {
|
SECTION("Invalid device number case 1") {
|
||||||
ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
|
ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
|
||||||
REQUIRE(ret != hipSuccess);
|
REQUIRE(ret != hipSuccess);
|
||||||
}
|
}
|
||||||
SECTION("Invalid device number case 2") {
|
SECTION("Invalid device number case 2") {
|
||||||
ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
|
ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
|
||||||
REQUIRE(ret != hipSuccess);
|
REQUIRE(ret != hipSuccess);
|
||||||
}
|
}
|
||||||
SECTION("Invalid device number case 3") {
|
SECTION("Invalid device number case 3") {
|
||||||
ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
|
ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
|
||||||
REQUIRE(ret != hipSuccess);
|
REQUIRE(ret != hipSuccess);
|
||||||
}
|
}
|
||||||
SECTION("Invalid device number case 4") {
|
SECTION("Invalid device number case 4") {
|
||||||
ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
|
ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
|
||||||
REQUIRE(ret != hipSuccess);
|
REQUIRE(ret != hipSuccess);
|
||||||
}
|
}
|
||||||
SECTION("Invalid device number case 5") {
|
SECTION("Invalid device number case 5") {
|
||||||
ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
|
ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
|
||||||
REQUIRE(ret != hipSuccess);
|
REQUIRE(ret != hipSuccess);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
bool testhipInvalidLinkType() {
|
bool testhipInvalidLinkType() {
|
||||||
uint32_t hopcount;
|
uint32_t hopcount;
|
||||||
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
|
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
|
||||||
&hopcount));
|
&hopcount));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool testhipInvalidHopcount() {
|
bool testhipInvalidHopcount() {
|
||||||
uint32_t linktype;
|
uint32_t linktype;
|
||||||
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
|
REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool testhipSameDevice(int numGPUs) {
|
bool testhipSameDevice(int numGPUs) {
|
||||||
hipError_t ret;
|
hipError_t ret;
|
||||||
uint32_t linktype = 0;
|
uint32_t linktype = 0;
|
||||||
uint32_t hopcount = 0;
|
uint32_t hopcount = 0;
|
||||||
for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
|
for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
|
||||||
ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
|
ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
|
||||||
REQUIRE(ret != hipSuccess);
|
REQUIRE(ret != hipSuccess);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
|
bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
|
||||||
bool TestPassed = true;
|
bool TestPassed = true;
|
||||||
// Get the unique pair of devices
|
// Get the unique pair of devices
|
||||||
for (int x = 0; x < numDevices; x++) {
|
for (int x = 0; x < numDevices; x++) {
|
||||||
for (int y = x+1; y < numDevices; y++) {
|
for (int y = x+1; y < numDevices; y++) {
|
||||||
uint32_t linktype1 = 0, linktype2 = 0;
|
uint32_t linktype1 = 0, linktype2 = 0;
|
||||||
uint32_t hopcount1 = 0, hopcount2 = 0;
|
uint32_t hopcount1 = 0, hopcount2 = 0;
|
||||||
HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
|
HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
|
||||||
&linktype1, &hopcount1));
|
&linktype1, &hopcount1));
|
||||||
HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
|
HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
|
||||||
&linktype2, &hopcount2));
|
&linktype2, &hopcount2));
|
||||||
if (hopcount1 != hopcount2) {
|
if (hopcount1 != hopcount2) {
|
||||||
TestPassed = false;
|
TestPassed = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return TestPassed;
|
return TestPassed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal Function
|
* Internal Function
|
||||||
*/
|
*/
|
||||||
bool validateLinkType(uint32_t linktype_Hip,
|
bool validateLinkType(uint32_t linktype_Hip,
|
||||||
RSMI_IO_LINK_TYPE linktype_RocmSmi) {
|
RSMI_IO_LINK_TYPE linktype_RocmSmi) {
|
||||||
bool TestPassed = false;
|
bool TestPassed = false;
|
||||||
|
|
||||||
if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
|
if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
|
||||||
(linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
|
(linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
|
||||||
TestPassed = true;
|
TestPassed = true;
|
||||||
} else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
|
} else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
|
||||||
(linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
|
(linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
|
||||||
TestPassed = true;
|
TestPassed = true;
|
||||||
} else {
|
} else {
|
||||||
printf("linktype Hip = %u, linktype RocmSmi = %u\n",
|
printf("linktype Hip = %u, linktype RocmSmi = %u\n",
|
||||||
linktype_Hip, linktype_RocmSmi);
|
linktype_Hip, linktype_RocmSmi);
|
||||||
TestPassed = false;
|
TestPassed = false;
|
||||||
}
|
}
|
||||||
return TestPassed;
|
return TestPassed;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool testhipLinkTypeHopcountDevice(int numDevices) {
|
bool testhipLinkTypeHopcountDevice(int numDevices) {
|
||||||
bool TestPassed = true;
|
bool TestPassed = true;
|
||||||
// Opening and initializing rocm-smi library
|
// Opening and initializing rocm-smi library
|
||||||
void *lib_rocm_smi_hdl;
|
void *lib_rocm_smi_hdl;
|
||||||
rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
|
rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
|
||||||
RSMI_IO_LINK_TYPE*);
|
RSMI_IO_LINK_TYPE*);
|
||||||
rsmi_status_t (*fntopo_init)(uint64_t);
|
rsmi_status_t (*fntopo_init)(uint64_t);
|
||||||
rsmi_status_t (*fntopo_shut_down)();
|
rsmi_status_t (*fntopo_shut_down)();
|
||||||
|
|
||||||
lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
|
lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
|
||||||
RTLD_LAZY);
|
RTLD_LAZY);
|
||||||
REQUIRE(lib_rocm_smi_hdl);
|
REQUIRE(lib_rocm_smi_hdl);
|
||||||
|
|
||||||
void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
|
void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
|
||||||
REQUIRE(fnsym);
|
REQUIRE(fnsym);
|
||||||
|
|
||||||
fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
|
fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
|
||||||
uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
|
uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
|
||||||
|
|
||||||
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
|
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
|
||||||
REQUIRE(fnsym);
|
REQUIRE(fnsym);
|
||||||
fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
|
fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
|
||||||
|
|
||||||
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
|
fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
|
||||||
REQUIRE(fnsym);
|
REQUIRE(fnsym);
|
||||||
fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
|
fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
|
||||||
|
|
||||||
uint64_t init_flags = 0;
|
uint64_t init_flags = 0;
|
||||||
rsmi_status_t retsmi_init;
|
rsmi_status_t retsmi_init;
|
||||||
retsmi_init = fntopo_init(init_flags);
|
retsmi_init = fntopo_init(init_flags);
|
||||||
REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
|
REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
|
||||||
|
|
||||||
// Use rocm-smi API rsmi_topo_get_link_type() to validate
|
// Use rocm-smi API rsmi_topo_get_link_type() to validate
|
||||||
struct devicePair {
|
struct devicePair {
|
||||||
int device1;
|
int device1;
|
||||||
int device2;
|
int device2;
|
||||||
};
|
};
|
||||||
std::vector<struct devicePair> devicePairList;
|
std::vector<struct devicePair> devicePairList;
|
||||||
// Get the unique pair of devices
|
// Get the unique pair of devices
|
||||||
for (int x = 0; x < numDevices; x++) {
|
for (int x = 0; x < numDevices; x++) {
|
||||||
for (int y = x+1; y < numDevices; y++) {
|
for (int y = x+1; y < numDevices; y++) {
|
||||||
devicePairList.push_back({x, y});
|
devicePairList.push_back({x, y});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto pos=devicePairList.begin();
|
for (auto pos=devicePairList.begin();
|
||||||
pos != devicePairList.end(); pos++) {
|
pos != devicePairList.end(); pos++) {
|
||||||
uint32_t linktype1 = 0;
|
uint32_t linktype1 = 0;
|
||||||
uint32_t hopcount1 = 0;
|
uint32_t hopcount1 = 0;
|
||||||
RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
|
RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
|
||||||
uint64_t hopcount2 = 0;
|
uint64_t hopcount2 = 0;
|
||||||
rsmi_status_t retsmi;
|
rsmi_status_t retsmi;
|
||||||
HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
|
HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
|
||||||
(*pos).device2, &linktype1, &hopcount1));
|
(*pos).device2, &linktype1, &hopcount1));
|
||||||
retsmi = fntopo_get_link_type((*pos).device1,
|
retsmi = fntopo_get_link_type((*pos).device1,
|
||||||
(*pos).device2, &hopcount2, &linktype2);
|
(*pos).device2, &hopcount2, &linktype2);
|
||||||
REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
|
REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
|
||||||
|
|
||||||
// Validate linktype
|
// Validate linktype
|
||||||
TestPassed = validateLinkType(linktype1, linktype2);
|
TestPassed = validateLinkType(linktype1, linktype2);
|
||||||
}
|
}
|
||||||
fntopo_shut_down();
|
fntopo_shut_down();
|
||||||
dlclose(lib_rocm_smi_hdl);
|
dlclose(lib_rocm_smi_hdl);
|
||||||
return TestPassed;
|
return TestPassed;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
|
* @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
|
||||||
* @{
|
* @{
|
||||||
* @ingroup p2pTest
|
* @ingroup p2pTest
|
||||||
* `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
|
* `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
|
||||||
* Returns the link type and hop count between two devices
|
* Returns the link type and hop count between two devices
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
|
* - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
|
||||||
* 1)Test Scenario to verify when device1 is visible and device2 is masked
|
* 1)Test Scenario to verify when device1 is visible and device2 is masked
|
||||||
* 2)Test Scenario to verify Invalid Device Number(s)
|
* 2)Test Scenario to verify Invalid Device Number(s)
|
||||||
* 3)Test Scenario to verify when linktype = NULL
|
* 3)Test Scenario to verify when linktype = NULL
|
||||||
* 4)Test Scenario to verify when hopcount = NULL
|
* 4)Test Scenario to verify when hopcount = NULL
|
||||||
* 5)Test Scenario to verify when device1 = device2
|
* 5)Test Scenario to verify when device1 = device2
|
||||||
* 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
|
* 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
|
||||||
* and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
|
* and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
|
||||||
* 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
|
* 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
|
||||||
* GPUs with the output of rocm_smi tool.
|
* GPUs with the output of rocm_smi tool.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
|
* - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
|
TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
|
||||||
int numDevices = 0;
|
int numDevices = 0;
|
||||||
bool TestPassed = true;
|
bool TestPassed = true;
|
||||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||||
if (numDevices < 2) {
|
if (numDevices < 2) {
|
||||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
SECTION("Test running for testhipInvalidDevice") {
|
SECTION("Test running for testhipInvalidDevice") {
|
||||||
TestPassed = testhipInvalidDevice(numDevices);
|
TestPassed = testhipInvalidDevice(numDevices);
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
getDeviceCount(&numDevices);
|
getDeviceCount(&numDevices);
|
||||||
if (numDevices < 2) {
|
if (numDevices < 2) {
|
||||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
SECTION("Test running for testMaskedDevice") {
|
SECTION("Test running for testMaskedDevice") {
|
||||||
TestPassed = testMaskedDevice(numDevices);
|
TestPassed = testMaskedDevice(numDevices);
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
SECTION("Test running for testhipInvalidLinkType") {
|
SECTION("Test running for testhipInvalidLinkType") {
|
||||||
TestPassed = testhipInvalidLinkType();
|
TestPassed = testhipInvalidLinkType();
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
SECTION("Test running for testhipInvalidHopcount") {
|
SECTION("Test running for testhipInvalidHopcount") {
|
||||||
TestPassed = testhipInvalidHopcount();
|
TestPassed = testhipInvalidHopcount();
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
SECTION("Test running for testhipSameDevice") {
|
SECTION("Test running for testhipSameDevice") {
|
||||||
TestPassed = testhipSameDevice(numDevices);
|
TestPassed = testhipSameDevice(numDevices);
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
|
SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
|
||||||
TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
|
TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
SECTION("Test running for testhipLinkTypeHopcountDevice") {
|
SECTION("Test running for testhipLinkTypeHopcountDevice") {
|
||||||
TestPassed = testhipLinkTypeHopcountDevice(numDevices);
|
TestPassed = testhipLinkTypeHopcountDevice(numDevices);
|
||||||
REQUIRE(TestPassed == true);
|
REQUIRE(TestPassed == true);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
printf("This test is skipped due to non linux environment.\n");
|
printf("This test is skipped due to non linux environment.\n");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group p2pTest.
|
* End doxygen group p2pTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,110 +1,110 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||||
#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||||
/**
|
/**
|
||||||
* rocm_smi.h enums
|
* rocm_smi.h enums
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
|
RSMI_STATUS_SUCCESS = 0x0, //!< Operation was successful
|
||||||
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
|
RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
|
||||||
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
|
RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
|
||||||
//!< action is not available for the
|
//!< action is not available for the
|
||||||
//!< given input, on the given system
|
//!< given input, on the given system
|
||||||
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
|
RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
|
||||||
//!< may because the operation is not
|
//!< may because the operation is not
|
||||||
//!< supported by the Linux kernel
|
//!< supported by the Linux kernel
|
||||||
//!< version running on the executing
|
//!< version running on the executing
|
||||||
//!< machine
|
//!< machine
|
||||||
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
|
RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
|
||||||
//!< error. Many functions require
|
//!< error. Many functions require
|
||||||
//!< root access to run.
|
//!< root access to run.
|
||||||
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
|
RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
|
||||||
//!< resource
|
//!< resource
|
||||||
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
|
RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
|
||||||
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
|
RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
|
||||||
//!< allowable or safe range
|
//!< allowable or safe range
|
||||||
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
|
RSMI_STATUS_INIT_ERROR, //!< An error occurred when rsmi
|
||||||
//!< initializing internal data
|
//!< initializing internal data
|
||||||
//!< structures
|
//!< structures
|
||||||
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
|
RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
|
||||||
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
|
RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
|
||||||
//!< yet been implemented in the
|
//!< yet been implemented in the
|
||||||
//!< current system for the current
|
//!< current system for the current
|
||||||
//!< devices
|
//!< devices
|
||||||
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
|
RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
|
||||||
//!< found
|
//!< found
|
||||||
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
|
RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
|
||||||
//!< available for the operation
|
//!< available for the operation
|
||||||
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
|
RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
|
||||||
//!< execution of function
|
//!< execution of function
|
||||||
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
|
RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
|
||||||
//!< was read
|
//!< was read
|
||||||
RSMI_STATUS_NO_DATA, //!< No data was found for a given
|
RSMI_STATUS_NO_DATA, //!< No data was found for a given
|
||||||
//!< input
|
//!< input
|
||||||
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
|
RSMI_STATUS_UNEXPECTED_DATA, //!< The data read or provided to
|
||||||
//!< function is not what was expected
|
//!< function is not what was expected
|
||||||
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
|
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
|
||||||
//!< acquired because it is already
|
//!< acquired because it is already
|
||||||
//!< being used
|
//!< being used
|
||||||
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
|
||||||
//!< exceeded INT32_MAX
|
//!< exceeded INT32_MAX
|
||||||
|
|
||||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
||||||
} rsmi_status_t;
|
} rsmi_status_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Types for IO Link returned from rocm_smi
|
* Types for IO Link returned from rocm_smi
|
||||||
*/
|
*/
|
||||||
typedef enum _RSMI_IO_LINK_TYPE {
|
typedef enum _RSMI_IO_LINK_TYPE {
|
||||||
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
|
RSMI_IOLINK_TYPE_UNDEFINED = 0, //!< unknown type.
|
||||||
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
|
RSMI_IOLINK_TYPE_PCIEXPRESS = 1, //!< PCI Express
|
||||||
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
|
RSMI_IOLINK_TYPE_XGMI = 2, //!< XGMI
|
||||||
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
|
RSMI_IOLINK_TYPE_NUMIOLINKTYPES, //!< Number of IO Link types
|
||||||
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
|
RSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF //!< Max of IO Link types
|
||||||
} RSMI_IO_LINK_TYPE;
|
} RSMI_IO_LINK_TYPE;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Types for IO Link returned from rocm runtime
|
* Types for IO Link returned from rocm runtime
|
||||||
*/
|
*/
|
||||||
typedef enum {
|
typedef enum {
|
||||||
/**
|
/**
|
||||||
* Hyper-transport bus type.
|
* Hyper-transport bus type.
|
||||||
*/
|
*/
|
||||||
HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
|
HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
|
||||||
/**
|
/**
|
||||||
* QPI bus type.
|
* QPI bus type.
|
||||||
*/
|
*/
|
||||||
HSA_AMD_LINK_INFO_TYPE_QPI = 1,
|
HSA_AMD_LINK_INFO_TYPE_QPI = 1,
|
||||||
/**
|
/**
|
||||||
* PCIe bus type.
|
* PCIe bus type.
|
||||||
*/
|
*/
|
||||||
HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
|
HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
|
||||||
/**
|
/**
|
||||||
* Infiniband bus type.
|
* Infiniband bus type.
|
||||||
*/
|
*/
|
||||||
HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
|
HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
|
||||||
/**
|
/**
|
||||||
* xGMI link type.
|
* xGMI link type.
|
||||||
*/
|
*/
|
||||||
HSA_AMD_LINK_INFO_TYPE_XGMI = 4
|
HSA_AMD_LINK_INFO_TYPE_XGMI = 4
|
||||||
} hsa_amd_link_info_type_t;
|
} hsa_amd_link_info_type_t;
|
||||||
|
|
||||||
#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
#endif // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
|
||||||
|
|||||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -1,178 +1,178 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
|
The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||||
#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
bool check_architecture(const char** Combination_CO,
|
bool check_architecture(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_rdc(const char** Combination_CO,
|
bool check_rdc(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_denormals_enabled(const char** Combination_CO,
|
bool check_denormals_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_denormals_disabled(const char** Combination_CO,
|
bool check_denormals_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_ffp_contract_off(const char** Combination_CO,
|
bool check_ffp_contract_off(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_ffp_contract_on(const char** Combination_CO,
|
bool check_ffp_contract_on(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_ffp_contract_fast(const char** Combination_CO,
|
bool check_ffp_contract_fast(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_fast_math_enabled(const char** Combination_CO,
|
bool check_fast_math_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_fast_math_disabled(const char** Combination_CO,
|
bool check_fast_math_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_slp_vectorize_enabled(const char** Combination_CO,
|
bool check_slp_vectorize_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_slp_vectorize_disabled(const char** Combination_CO,
|
bool check_slp_vectorize_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_macro(const char** Combination_CO,
|
bool check_macro(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_undef_macro(const char** Combination_CO,
|
bool check_undef_macro(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_header_dir(const char** Combination_CO,
|
bool check_header_dir(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_warning(const char** Combination_CO,
|
bool check_warning(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_Rpass_inline(const char** Combination_CO,
|
bool check_Rpass_inline(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_conversionerror_enabled(const char** Combination_CO,
|
bool check_conversionerror_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_conversionerror_disabled(const char** Combination_CO,
|
bool check_conversionerror_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size,
|
int Combination_CO_size,
|
||||||
int max_thread_pos,
|
int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_conversionwarning_enabled(const char** Combination_CO,
|
bool check_conversionwarning_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size,
|
int Combination_CO_size,
|
||||||
int max_thread_pos,
|
int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_conversionwarning_disabled(const char** Combination_CO,
|
bool check_conversionwarning_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size,
|
int Combination_CO_size,
|
||||||
int max_thread_pos,
|
int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_max_thread(const char** Combination_CO,
|
bool check_max_thread(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_unsafe_atomic_enabled(const char** Combination_CO,
|
bool check_unsafe_atomic_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_unsafe_atomic_disabled(const char** Combination_CO,
|
bool check_unsafe_atomic_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_infinite_num_enabled(const char** Combination_CO,
|
bool check_infinite_num_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_infinite_num_disabled(const char** Combination_CO,
|
bool check_infinite_num_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_NAN_num_enabled(const char** Combination_CO,
|
bool check_NAN_num_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_NAN_num_disabled(const char** Combination_CO,
|
bool check_NAN_num_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_finite_math_enabled(const char** Combination_CO,
|
bool check_finite_math_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_finite_math_disabled(const char** Combination_CO,
|
bool check_finite_math_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_associative_math_enabled(const char** Combination_CO,
|
bool check_associative_math_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size,
|
int Combination_CO_size,
|
||||||
int max_thread_pos,
|
int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_associative_math_disabled(const char** Combination_CO,
|
bool check_associative_math_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size,
|
int Combination_CO_size,
|
||||||
int max_thread_pos,
|
int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_signed_zeros_enabled(const char** Combination_CO,
|
bool check_signed_zeros_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size,
|
int Combination_CO_size,
|
||||||
int max_thread_pos,
|
int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_signed_zeros_disabled(const char** Combination_CO,
|
bool check_signed_zeros_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_trapping_math_enabled(const char** Combination_CO,
|
bool check_trapping_math_enabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
bool check_trapping_math_disabled(const char** Combination_CO,
|
bool check_trapping_math_disabled(const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_pos,
|
int Combination_CO_size, int max_thread_pos,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
|
std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
|
||||||
int extra_CO_IRadded_size, const char** Combination_CO,
|
int extra_CO_IRadded_size, const char** Combination_CO,
|
||||||
int Combination_CO_size);
|
int Combination_CO_size);
|
||||||
|
|
||||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
#endif // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
|
||||||
|
|||||||
@@ -1,163 +1,163 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
RtcKernels.h contains the string's with the which includes the kernel code.
|
RtcKernels.h contains the string's with the which includes the kernel code.
|
||||||
They are utilized by the compiler option functions, defined in RtcFunctions.cpp
|
They are utilized by the compiler option functions, defined in RtcFunctions.cpp
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||||
#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||||
#include <hip/hiprtc.h>
|
#include <hip/hiprtc.h>
|
||||||
#include <hip/hip_runtime.h>
|
#include <hip/hip_runtime.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
static constexpr auto max_thread_string {
|
static constexpr auto max_thread_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void max_thread(int* a) {
|
__global__ void max_thread(int* a) {
|
||||||
int BD = blockDim.x;
|
int BD = blockDim.x;
|
||||||
*a = BD;
|
*a = BD;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto denormals_string {
|
static constexpr auto denormals_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void denormals(double* base, double* power, double* result) {
|
__global__ void denormals(double* base, double* power, double* result) {
|
||||||
float denorm = powf(*base, *power);
|
float denorm = powf(*base, *power);
|
||||||
if (*result == 0 || *result ==1 )
|
if (*result == 0 || *result ==1 )
|
||||||
*result = (denorm==0) ? 0 : 1;
|
*result = (denorm==0) ? 0 : 1;
|
||||||
else
|
else
|
||||||
*result = powf(*base, *power);
|
*result = powf(*base, *power);
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto warning_string {
|
static constexpr auto warning_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void warning() {
|
__global__ void warning() {
|
||||||
#warning "Just printing a WARNING message onto the terminal";
|
#warning "Just printing a WARNING message onto the terminal";
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto fp32_div_sqrt_string {
|
static constexpr auto fp32_div_sqrt_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void fp32_div_sqrt(float* result) {
|
__global__ void fp32_div_sqrt(float* result) {
|
||||||
float input = 109.6209;
|
float input = 109.6209;
|
||||||
*result = sqrt(input);
|
*result = sqrt(input);
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto error_string {
|
static constexpr auto error_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void error() {
|
__global__ void error() {
|
||||||
unsigned int a = -1;
|
unsigned int a = -1;
|
||||||
unsigned int b = +1;
|
unsigned int b = +1;
|
||||||
signed int c = -1;
|
signed int c = -1;
|
||||||
signed int d = +1;
|
signed int d = +1;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto macro_string {
|
static constexpr auto macro_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void macro(int *result) {
|
__global__ void macro(int *result) {
|
||||||
*result = PI;
|
*result = PI;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto undef_macro_string {
|
static constexpr auto undef_macro_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void undef_macro() {
|
__global__ void undef_macro() {
|
||||||
int a = Z;
|
int a = Z;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto header_dir_string {
|
static constexpr auto header_dir_string {
|
||||||
R"(
|
R"(
|
||||||
#include "RtcFact.h"
|
#include "RtcFact.h"
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void header_dir(int* a, int* val) {
|
__global__ void header_dir(int* a, int* val) {
|
||||||
*a = fact(*val);
|
*a = fact(*val);
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto rdc_string {
|
static constexpr auto rdc_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void rdc(float* a, float* b, float* c) {
|
__global__ void rdc(float* a, float* b, float* c) {
|
||||||
*c = *a * *b;
|
*c = *a * *b;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto ffp_contract_string {
|
static constexpr auto ffp_contract_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void ffp_contract(float* a, float* b, float* c) {
|
__global__ void ffp_contract(float* a, float* b, float* c) {
|
||||||
*c = *a * *b + *c;
|
*c = *a * *b + *c;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto slp_vectorize_string {
|
static constexpr auto slp_vectorize_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
|
__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
|
||||||
(*y).data.x = x.data.x + a.data.x;
|
(*y).data.x = x.data.x + a.data.x;
|
||||||
(*y).data.y = x.data.y + a.data.y;
|
(*y).data.y = x.data.y + a.data.y;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto unsafe_atomic_string {
|
static constexpr auto unsafe_atomic_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void unsafe_atomic(float* a) {
|
__global__ void unsafe_atomic(float* a) {
|
||||||
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
int id = threadIdx.x + blockIdx.x * blockDim.x;
|
||||||
if (id < 1000) {
|
if (id < 1000) {
|
||||||
unsafeAtomicAdd(&a[id], 0.2f);
|
unsafeAtomicAdd(&a[id], 0.2f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto amdgpu_ieee_string {
|
static constexpr auto amdgpu_ieee_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void amdgpu_ieee(float* a, float* b, float* c) {
|
__global__ void amdgpu_ieee(float* a, float* b, float* c) {
|
||||||
*c = sqrt(*a / *b);
|
*c = sqrt(*a / *b);
|
||||||
printf("sqrt(a * b) = %f\n", *c);
|
printf("sqrt(a * b) = %f\n", *c);
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
static constexpr auto associative_math_string {
|
static constexpr auto associative_math_string {
|
||||||
R"(
|
R"(
|
||||||
extern "C"
|
extern "C"
|
||||||
__global__ void associative_math(int* check) {
|
__global__ void associative_math(int* check) {
|
||||||
double x = 0.1f;
|
double x = 0.1f;
|
||||||
double y = 0.2f;
|
double y = 0.2f;
|
||||||
double z = 0.3f;
|
double z = 0.3f;
|
||||||
if((x*y)*z != x*(y*z))
|
if((x*y)*z != x*(y*z))
|
||||||
*check = 1;
|
*check = 1;
|
||||||
else *check = 0;
|
else *check = 0;
|
||||||
}
|
}
|
||||||
)"};
|
)"};
|
||||||
|
|
||||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
#endif // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
|
||||||
|
|||||||
@@ -1,53 +1,53 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
|
The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||||
#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||||
#include <picojson.h>
|
#include <picojson.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
std::vector<std::string> get_combi_string_vec();
|
std::vector<std::string> get_combi_string_vec();
|
||||||
|
|
||||||
int split_comb_string(std::string option);
|
int split_comb_string(std::string option);
|
||||||
|
|
||||||
int calling_combination_function(std::vector<std::string> combi_vec_list);
|
int calling_combination_function(std::vector<std::string> combi_vec_list);
|
||||||
|
|
||||||
int check_positive_CO_present(std::string find_string);
|
int check_positive_CO_present(std::string find_string);
|
||||||
|
|
||||||
int check_negative_CO_present(std::string find_string);
|
int check_negative_CO_present(std::string find_string);
|
||||||
|
|
||||||
bool calling_resp_function(const std::string block_name,
|
bool calling_resp_function(const std::string block_name,
|
||||||
const char** Combination_CO,
|
const char** Combination_CO,
|
||||||
int Combination_CO_size, int max_thread_position,
|
int Combination_CO_size, int max_thread_position,
|
||||||
int fast_math_present);
|
int fast_math_present);
|
||||||
|
|
||||||
picojson::array getblock_fromconfig();
|
picojson::array getblock_fromconfig();
|
||||||
|
|
||||||
std::string get_string_parameters(std::string para_name_to_retrieve,
|
std::string get_string_parameters(std::string para_name_to_retrieve,
|
||||||
std::string block_name);
|
std::string block_name);
|
||||||
|
|
||||||
picojson::array get_array_parameters(std::string para_name_to_retrieve,
|
picojson::array get_array_parameters(std::string para_name_to_retrieve,
|
||||||
std::string block_name);
|
std::string block_name);
|
||||||
|
|
||||||
#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
#endif // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
|
||||||
|
|||||||
@@ -1,25 +1,25 @@
|
|||||||
# Common Tests - Test independent of all platforms
|
# Common Tests - Test independent of all platforms
|
||||||
set(TEST_SRC
|
set(TEST_SRC
|
||||||
copy_coherency.cc
|
copy_coherency.cc
|
||||||
)
|
)
|
||||||
add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
|
add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
|
${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
|
${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
|
||||||
${HIP_PATH}/include -I
|
${HIP_PATH}/include -I
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
|
${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
|
||||||
${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
|
${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
|
||||||
# only for AMD
|
# only for AMD
|
||||||
if(HIP_PLATFORM MATCHES "amd")
|
if(HIP_PLATFORM MATCHES "amd")
|
||||||
set(AMD_SRC
|
set(AMD_SRC
|
||||||
cache_coherency_cpu_gpu.cc
|
cache_coherency_cpu_gpu.cc
|
||||||
cache_coherency_gpu_gpu.cc
|
cache_coherency_gpu_gpu.cc
|
||||||
)
|
)
|
||||||
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
hip_add_exe_to_target(NAME synchronizationTests
|
hip_add_exe_to_target(NAME synchronizationTests
|
||||||
TEST_SRC ${TEST_SRC}
|
TEST_SRC ${TEST_SRC}
|
||||||
TEST_TARGET_NAME build_tests
|
TEST_TARGET_NAME build_tests
|
||||||
COMPILE_OPTIONS -std=c++14)
|
COMPILE_OPTIONS -std=c++14)
|
||||||
add_dependencies(synchronizationTests memcpyInt.hsaco)
|
add_dependencies(synchronizationTests memcpyInt.hsaco)
|
||||||
|
|
||||||
|
|||||||
@@ -1,282 +1,282 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
// Simple test for Fine Grained CPU-GPU coherency.
|
// Simple test for Fine Grained CPU-GPU coherency.
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
typedef _Atomic(unsigned int) atomic_uint;
|
typedef _Atomic(unsigned int) atomic_uint;
|
||||||
|
|
||||||
// Helper function to spin on address until address equals value.
|
// Helper function to spin on address until address equals value.
|
||||||
// If the address holds the value of -1, abort because the other thread failed.
|
// If the address holds the value of -1, abort because the other thread failed.
|
||||||
__device__ int
|
__device__ int
|
||||||
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||||
unsigned int value) {
|
unsigned int value) {
|
||||||
unsigned int compare;
|
unsigned int compare;
|
||||||
bool check = false;
|
bool check = false;
|
||||||
do {
|
do {
|
||||||
compare = value;
|
compare = value;
|
||||||
check = __opencl_atomic_compare_exchange_strong(
|
check = __opencl_atomic_compare_exchange_strong(
|
||||||
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
||||||
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||||
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
if (compare == -1)
|
if (compare == -1)
|
||||||
return -1;
|
return -1;
|
||||||
} while (!check);
|
} while (!check);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This kernel requires a single block, single thread dispatch.
|
// This kernel requires a single block, single thread dispatch.
|
||||||
__global__ void
|
__global__ void
|
||||||
gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
|
gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
|
||||||
unsigned int *AA1, unsigned int *AA2,
|
unsigned int *AA1, unsigned int *AA2,
|
||||||
unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
|
unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
// Store data into A, system fence, and atomically mark flag.
|
// Store data into A, system fence, and atomically mark flag.
|
||||||
// This guarantees this global write is visible by device 1.
|
// This guarantees this global write is visible by device 1.
|
||||||
A[i] = X[i];
|
A[i] = X[i];
|
||||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
// Wait on device 1's global write to B.
|
// Wait on device 1's global write to B.
|
||||||
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
||||||
*dresult = -1;
|
*dresult = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check device 1 properly stored Y into B.
|
// Check device 1 properly stored Y into B.
|
||||||
bool stored_data_matches = (B[i] == Y[i]);
|
bool stored_data_matches = (B[i] == Y[i]);
|
||||||
if (!stored_data_matches) {
|
if (!stored_data_matches) {
|
||||||
// If the data does not match, alert other thread and abort.
|
// If the data does not match, alert other thread and abort.
|
||||||
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
||||||
i, B[i], Y[i]);
|
i, B[i], Y[i]);
|
||||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
*dresult = -1;
|
*dresult = -1;
|
||||||
}
|
}
|
||||||
// Otherwise tell the other thread to continue.
|
// Otherwise tell the other thread to continue.
|
||||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
||||||
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
||||||
*dresult = -1;
|
*dresult = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*dresult = 0;
|
*dresult = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ int
|
__host__ int
|
||||||
cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||||
unsigned int value) {
|
unsigned int value) {
|
||||||
unsigned int compare;
|
unsigned int compare;
|
||||||
bool check = false;
|
bool check = false;
|
||||||
do {
|
do {
|
||||||
compare = value;
|
compare = value;
|
||||||
check = __atomic_compare_exchange_n(
|
check = __atomic_compare_exchange_n(
|
||||||
address, /*expected=*/ &compare, /*desired=*/ value,
|
address, /*expected=*/ &compare, /*desired=*/ value,
|
||||||
/*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
|
/*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
|
||||||
if (compare == -1)
|
if (compare == -1)
|
||||||
return -1;
|
return -1;
|
||||||
} while (!check);
|
} while (!check);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This host thread runs only on a single CPU thread.
|
// This host thread runs only on a single CPU thread.
|
||||||
__host__ void
|
__host__ void
|
||||||
cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
|
cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
|
||||||
unsigned int *AA1, unsigned int *AA2,
|
unsigned int *AA1, unsigned int *AA2,
|
||||||
unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
|
unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
B[i] = Y[i];
|
B[i] = Y[i];
|
||||||
__atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
|
__atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
|
||||||
if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
||||||
*hresult = -1;
|
*hresult = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool stored_data_matches = (A[i] == X[i]);
|
bool stored_data_matches = (A[i] == X[i]);
|
||||||
if (!stored_data_matches) {
|
if (!stored_data_matches) {
|
||||||
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
||||||
i, A[i], X[i]);
|
i, A[i], X[i]);
|
||||||
__atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
|
__atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
|
||||||
*hresult = -1;
|
*hresult = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
__atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
|
__atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
|
||||||
if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
||||||
*hresult = -1;
|
*hresult = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*hresult = 0;
|
*hresult = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool cpu_to_gpu_coherency() {
|
static bool cpu_to_gpu_coherency() {
|
||||||
int *A_d, *B_d, *X_d, *Y_d;
|
int *A_d, *B_d, *X_d, *Y_d;
|
||||||
int *A_res, *A_h, *B_h, *X_h, *Y_h;
|
int *A_res, *A_h, *B_h, *X_h, *Y_h;
|
||||||
unsigned int hresult, dresult;
|
unsigned int hresult, dresult;
|
||||||
size_t N = 1024;
|
size_t N = 1024;
|
||||||
size_t Nbytes = N * sizeof(int);
|
size_t Nbytes = N * sizeof(int);
|
||||||
int numDevices = 0;
|
int numDevices = 0;
|
||||||
|
|
||||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||||
if (numDevices < 1) {
|
if (numDevices < 1) {
|
||||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip this test if feature is not supported.
|
// Skip this test if feature is not supported.
|
||||||
static int device0 = 0;
|
static int device0 = 0;
|
||||||
hipDeviceProp_t props;
|
hipDeviceProp_t props;
|
||||||
HIP_CHECK(hipGetDeviceProperties(&props, device0));
|
HIP_CHECK(hipGetDeviceProperties(&props, device0));
|
||||||
if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
|
if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
|
||||||
strncmp(props.gcnArchName, "gfx940", 6) != 0) {
|
strncmp(props.gcnArchName, "gfx940", 6) != 0) {
|
||||||
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
|
// Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
|
||||||
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||||
HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
|
HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
|
||||||
(hipHostMallocCoherent | hipHostMallocMapped)));
|
(hipHostMallocCoherent | hipHostMallocMapped)));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
|
||||||
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
|
|
||||||
// Initialize the arrays and atomic variables.
|
// Initialize the arrays and atomic variables.
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
X_h[i] = 100000000 + i;
|
X_h[i] = 100000000 + i;
|
||||||
Y_h[i] = 300000000 + i;
|
Y_h[i] = 300000000 + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize shared atomic flags between CPU and GPU.
|
// Initialize shared atomic flags between CPU and GPU.
|
||||||
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
||||||
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
||||||
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
||||||
AA1_h, 0));
|
AA1_h, 0));
|
||||||
*AA1_h = 0;
|
*AA1_h = 0;
|
||||||
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
||||||
AA2_h, 0));
|
AA2_h, 0));
|
||||||
*AA2_h = 0;
|
*AA2_h = 0;
|
||||||
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
||||||
BA1_h, 0));
|
BA1_h, 0));
|
||||||
*BA1_h = 0;
|
*BA1_h = 0;
|
||||||
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
||||||
BA2_h, 0));
|
BA2_h, 0));
|
||||||
*BA2_h = 0;
|
*BA2_h = 0;
|
||||||
|
|
||||||
// Skip the first stream, ensure stream is non-blocking.
|
// Skip the first stream, ensure stream is non-blocking.
|
||||||
hipStream_t stream[2];
|
hipStream_t stream[2];
|
||||||
HIP_CHECK(hipStreamCreate(&stream[0]));
|
HIP_CHECK(hipStreamCreate(&stream[0]));
|
||||||
HIP_CHECK(hipSetDevice(0));
|
HIP_CHECK(hipSetDevice(0));
|
||||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||||
|
|
||||||
// Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
|
// Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
|
||||||
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||||
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
||||||
Nbytes, hipDeviceMallocFinegrained);
|
Nbytes, hipDeviceMallocFinegrained);
|
||||||
REQUIRE(status == hipSuccess);
|
REQUIRE(status == hipSuccess);
|
||||||
// SVM memory - host pointer is the same as device pointer to array A.
|
// SVM memory - host pointer is the same as device pointer to array A.
|
||||||
A_h = A_d;
|
A_h = A_d;
|
||||||
HIP_CHECK(hipMalloc(&X_d, Nbytes));
|
HIP_CHECK(hipMalloc(&X_d, Nbytes));
|
||||||
HIP_CHECK(hipMalloc(&Y_d, Nbytes));
|
HIP_CHECK(hipMalloc(&Y_d, Nbytes));
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// Launch the GPU kernel.
|
// Launch the GPU kernel.
|
||||||
const unsigned blocks = 1;
|
const unsigned blocks = 1;
|
||||||
const unsigned threadsPerBlock = 1;
|
const unsigned threadsPerBlock = 1;
|
||||||
hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, stream[1],
|
0, stream[1],
|
||||||
A_d, B_d, X_d, Y_d, N,
|
A_d, B_d, X_d, Y_d, N,
|
||||||
AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
|
AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
|
||||||
// Check if launch failed.
|
// Check if launch failed.
|
||||||
HIP_CHECK(hipGetLastError());
|
HIP_CHECK(hipGetLastError());
|
||||||
REQUIRE(dresult == 0);
|
REQUIRE(dresult == 0);
|
||||||
|
|
||||||
// Do not sync the launched stream, instead run the cpu_thread.
|
// Do not sync the launched stream, instead run the cpu_thread.
|
||||||
std::thread host_thread(cpu_thread,
|
std::thread host_thread(cpu_thread,
|
||||||
A_h, B_h, X_h, Y_h, N,
|
A_h, B_h, X_h, Y_h, N,
|
||||||
AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
|
AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
|
||||||
host_thread.detach();
|
host_thread.detach();
|
||||||
REQUIRE(hresult == 0);
|
REQUIRE(hresult == 0);
|
||||||
// Wait for Device side to finish.
|
// Wait for Device side to finish.
|
||||||
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
||||||
|
|
||||||
// Evaluate the resultant arrays A and B.
|
// Evaluate the resultant arrays A and B.
|
||||||
A_res = reinterpret_cast<int*>(malloc(Nbytes));
|
A_res = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
REQUIRE(A_res[i] == (100000000 + i));
|
REQUIRE(A_res[i] == (100000000 + i));
|
||||||
REQUIRE(B_h[i] == (300000000 + i));
|
REQUIRE(B_h[i] == (300000000 + i));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free all the device and host memory allocated.
|
// Free all the device and host memory allocated.
|
||||||
HIP_CHECK(hipFree(A_d));
|
HIP_CHECK(hipFree(A_d));
|
||||||
HIP_CHECK(hipFree(X_d));
|
HIP_CHECK(hipFree(X_d));
|
||||||
HIP_CHECK(hipFree(Y_d));
|
HIP_CHECK(hipFree(Y_d));
|
||||||
HIP_CHECK(hipHostFree(AA1_h));
|
HIP_CHECK(hipHostFree(AA1_h));
|
||||||
HIP_CHECK(hipHostFree(AA2_h));
|
HIP_CHECK(hipHostFree(AA2_h));
|
||||||
HIP_CHECK(hipHostFree(BA1_h));
|
HIP_CHECK(hipHostFree(BA1_h));
|
||||||
HIP_CHECK(hipHostFree(BA2_h));
|
HIP_CHECK(hipHostFree(BA2_h));
|
||||||
HIP_CHECK(hipHostFree(B_h));
|
HIP_CHECK(hipHostFree(B_h));
|
||||||
free(X_h);
|
free(X_h);
|
||||||
free(Y_h);
|
free(Y_h);
|
||||||
free(A_res);
|
free(A_res);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - This test runs on devices where XGMI enables fine-grained communication
|
* - This test runs on devices where XGMI enables fine-grained communication
|
||||||
* between GPUs. This performs a message passing test.
|
* between GPUs. This performs a message passing test.
|
||||||
* Array A is allocated on Device 0, and remotely on host.
|
* Array A is allocated on Device 0, and remotely on host.
|
||||||
* Device 0 also increments atomic ints AA1 and AA2.
|
* Device 0 also increments atomic ints AA1 and AA2.
|
||||||
* Array B is allocated on host, and remotely on Device 0.
|
* Array B is allocated on host, and remotely on Device 0.
|
||||||
* Host also increments atomic ints BA1 and BA2.
|
* Host also increments atomic ints BA1 and BA2.
|
||||||
* Kernel will launch on Device 0, and store array X into array A.
|
* Kernel will launch on Device 0, and store array X into array A.
|
||||||
* Host Thread will store array Y into array B.
|
* Host Thread will store array Y into array B.
|
||||||
* Kernel will validate that the correct values of array Y are stored in B.
|
* Kernel will validate that the correct values of array Y are stored in B.
|
||||||
* Host Thread will validate that the correct values of array X are stored in A.
|
* Host Thread will validate that the correct values of array X are stored in A.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
|
* - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
* - Test to be run only on AMD.
|
* - Test to be run only on AMD.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_cache_coherency_cpu_gpu") {
|
TEST_CASE("Unit_cache_coherency_cpu_gpu") {
|
||||||
bool passed = true;
|
bool passed = true;
|
||||||
// Coherency between CPU and GPU sharing host and device memory.
|
// Coherency between CPU and GPU sharing host and device memory.
|
||||||
REQUIRE(passed == cpu_to_gpu_coherency());
|
REQUIRE(passed == cpu_to_gpu_coherency());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,294 +1,294 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
// Simple test for Fine Grained GPU-GPU coherency.
|
// Simple test for Fine Grained GPU-GPU coherency.
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
typedef _Atomic(unsigned int) atomic_uint;
|
typedef _Atomic(unsigned int) atomic_uint;
|
||||||
|
|
||||||
// Helper function to spin on address until address equals value.
|
// Helper function to spin on address until address equals value.
|
||||||
// If the address holds the value of -1, abort because the other thread failed.
|
// If the address holds the value of -1, abort because the other thread failed.
|
||||||
__device__ int
|
__device__ int
|
||||||
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
|
||||||
unsigned int value) {
|
unsigned int value) {
|
||||||
unsigned int compare;
|
unsigned int compare;
|
||||||
bool check = false;
|
bool check = false;
|
||||||
do {
|
do {
|
||||||
compare = value;
|
compare = value;
|
||||||
check = __opencl_atomic_compare_exchange_strong(
|
check = __opencl_atomic_compare_exchange_strong(
|
||||||
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
|
||||||
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
/*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
|
||||||
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
/*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
if (compare == -1)
|
if (compare == -1)
|
||||||
return -1;
|
return -1;
|
||||||
} while (!check);
|
} while (!check);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This kernel requires a single block, single thread dispatch.
|
// This kernel requires a single block, single thread dispatch.
|
||||||
__global__ void
|
__global__ void
|
||||||
gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
|
gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
|
||||||
unsigned int *AA1, unsigned int *AA2,
|
unsigned int *AA1, unsigned int *AA2,
|
||||||
unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
|
unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
// Store data into A, system fence, and atomically mark flag.
|
// Store data into A, system fence, and atomically mark flag.
|
||||||
// This guarantees this global write is visible by device 1.
|
// This guarantees this global write is visible by device 1.
|
||||||
A[i] = X[i];
|
A[i] = X[i];
|
||||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
// Wait on device 1's global write to B.
|
// Wait on device 1's global write to B.
|
||||||
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
|
||||||
*cache0_result = -1;
|
*cache0_result = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check device 1 properly stored Y into B.
|
// Check device 1 properly stored Y into B.
|
||||||
bool stored_data_matches = (B[i] == Y[i]);
|
bool stored_data_matches = (B[i] == Y[i]);
|
||||||
if (!stored_data_matches) {
|
if (!stored_data_matches) {
|
||||||
// If the data does not match, alert other thread and abort.
|
// If the data does not match, alert other thread and abort.
|
||||||
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
|
||||||
i, B[i], Y[i]);
|
i, B[i], Y[i]);
|
||||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
*cache0_result = -1;
|
*cache0_result = -1;
|
||||||
}
|
}
|
||||||
// Otherwise tell the other thread to continue.
|
// Otherwise tell the other thread to continue.
|
||||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
// Wait on kernel gpu_cache1 to finish checking X is stored in A.
|
||||||
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
|
||||||
*cache0_result = -1;
|
*cache0_result = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*cache0_result = 0;
|
*cache0_result = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This kernel requires a single block, single thread dispatch.
|
// This kernel requires a single block, single thread dispatch.
|
||||||
__global__ void
|
__global__ void
|
||||||
gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
|
gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
|
||||||
unsigned int *AA1, unsigned int *AA2,
|
unsigned int *AA1, unsigned int *AA2,
|
||||||
unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
|
unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
B[i] = Y[i];
|
B[i] = Y[i];
|
||||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
|
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
|
||||||
*cache1_result = -1;
|
*cache1_result = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool stored_data_matches = (A[i] == X[i]);
|
bool stored_data_matches = (A[i] == X[i]);
|
||||||
if (!stored_data_matches) {
|
if (!stored_data_matches) {
|
||||||
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
|
||||||
i, A[i], X[i]);
|
i, A[i], X[i]);
|
||||||
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
|
__opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
*cache1_result = -1;
|
*cache1_result = -1;
|
||||||
}
|
}
|
||||||
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
|
__opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
|
||||||
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
__ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
|
||||||
if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
|
||||||
*cache1_result = -1;
|
*cache1_result = -1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*cache1_result = 0;
|
*cache1_result = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool gpu_to_gpu_coherency() {
|
static bool gpu_to_gpu_coherency() {
|
||||||
int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
|
int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
|
||||||
int *A_h, *B_h, *X_h, *Y_h;
|
int *A_h, *B_h, *X_h, *Y_h;
|
||||||
unsigned int cache0_result, cache1_result;
|
unsigned int cache0_result, cache1_result;
|
||||||
size_t N = 1024;
|
size_t N = 1024;
|
||||||
size_t Nbytes = N * sizeof(int);
|
size_t Nbytes = N * sizeof(int);
|
||||||
int numDevices = 0;
|
int numDevices = 0;
|
||||||
int numTestDevices = 2;
|
int numTestDevices = 2;
|
||||||
|
|
||||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||||
if (numDevices < numTestDevices) {
|
if (numDevices < numTestDevices) {
|
||||||
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip this test if either device does not support this feature.
|
// Skip this test if either device does not support this feature.
|
||||||
hipDeviceProp_t props0, props1;
|
hipDeviceProp_t props0, props1;
|
||||||
HIP_CHECK(hipGetDeviceProperties(&props0, 0));
|
HIP_CHECK(hipGetDeviceProperties(&props0, 0));
|
||||||
HIP_CHECK(hipGetDeviceProperties(&props1, 1));
|
HIP_CHECK(hipGetDeviceProperties(&props1, 1));
|
||||||
if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
|
if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
|
||||||
strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
|
strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
|
||||||
(strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
|
(strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
|
||||||
strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
|
strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
|
||||||
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
printf("info: skipping test on devices other than gfx90a and gfx940.\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate Host Side Memory.
|
// Allocate Host Side Memory.
|
||||||
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||||
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
A_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
B_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
X_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
Y_h = reinterpret_cast<int*>(malloc(Nbytes));
|
||||||
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
|
||||||
|
|
||||||
// Initialize the arrays and atomic variables.
|
// Initialize the arrays and atomic variables.
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
X_h[i] = 100000000 + i;
|
X_h[i] = 100000000 + i;
|
||||||
Y_h[i] = 300000000 + i;
|
Y_h[i] = 300000000 + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize shared atomic flags on host coherent memory.
|
// Initialize shared atomic flags on host coherent memory.
|
||||||
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
|
||||||
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
|
||||||
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
|
||||||
AA1_h, 0));
|
AA1_h, 0));
|
||||||
*AA1_h = 0;
|
*AA1_h = 0;
|
||||||
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
|
||||||
AA2_h, 0));
|
AA2_h, 0));
|
||||||
*AA2_h = 0;
|
*AA2_h = 0;
|
||||||
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
|
||||||
BA1_h, 0));
|
BA1_h, 0));
|
||||||
*BA1_h = 0;
|
*BA1_h = 0;
|
||||||
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
|
||||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
|
||||||
BA2_h, 0));
|
BA2_h, 0));
|
||||||
*BA2_h = 0;
|
*BA2_h = 0;
|
||||||
|
|
||||||
// Skip the first stream.
|
// Skip the first stream.
|
||||||
hipStream_t stream[3];
|
hipStream_t stream[3];
|
||||||
HIP_CHECK(hipStreamCreate(&stream[0]));
|
HIP_CHECK(hipStreamCreate(&stream[0]));
|
||||||
|
|
||||||
// Set-up Device 0.
|
// Set-up Device 0.
|
||||||
HIP_CHECK(hipSetDevice(0));
|
HIP_CHECK(hipSetDevice(0));
|
||||||
// Enable P2P access to Device 1.
|
// Enable P2P access to Device 1.
|
||||||
HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
|
HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
|
||||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
|
||||||
// Allocating Coherent Memory for Array A_d on Device 0.
|
// Allocating Coherent Memory for Array A_d on Device 0.
|
||||||
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||||
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
|
||||||
Nbytes, hipDeviceMallocFinegrained);
|
Nbytes, hipDeviceMallocFinegrained);
|
||||||
REQUIRE(status == hipSuccess);
|
REQUIRE(status == hipSuccess);
|
||||||
HIP_CHECK(hipMalloc(&X_d0, Nbytes));
|
HIP_CHECK(hipMalloc(&X_d0, Nbytes));
|
||||||
HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
|
HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
|
||||||
|
|
||||||
// Set-up Device 1.
|
// Set-up Device 1.
|
||||||
HIP_CHECK(hipSetDevice(1));
|
HIP_CHECK(hipSetDevice(1));
|
||||||
// Enable P2P access to Device 0.
|
// Enable P2P access to Device 0.
|
||||||
HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
|
HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
|
||||||
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
|
HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
|
||||||
// Allocating Coherent Memory for Array B_d on Device 1.
|
// Allocating Coherent Memory for Array B_d on Device 1.
|
||||||
printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
|
||||||
status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
|
status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
|
||||||
Nbytes, hipDeviceMallocFinegrained);
|
Nbytes, hipDeviceMallocFinegrained);
|
||||||
REQUIRE(status == hipSuccess);
|
REQUIRE(status == hipSuccess);
|
||||||
HIP_CHECK(hipMalloc(&X_d1, Nbytes));
|
HIP_CHECK(hipMalloc(&X_d1, Nbytes));
|
||||||
HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
|
HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
|
||||||
|
|
||||||
// Transfer initialized data onto the device arrays.
|
// Transfer initialized data onto the device arrays.
|
||||||
HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// Prepare and launch the device kernels.
|
// Prepare and launch the device kernels.
|
||||||
const unsigned blocks = 1;
|
const unsigned blocks = 1;
|
||||||
const unsigned threadsPerBlock = 1;
|
const unsigned threadsPerBlock = 1;
|
||||||
HIP_CHECK(hipSetDevice(0));
|
HIP_CHECK(hipSetDevice(0));
|
||||||
hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, stream[1],
|
0, stream[1],
|
||||||
A_d, B_d, X_d0, Y_d0, N,
|
A_d, B_d, X_d0, Y_d0, N,
|
||||||
AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
|
AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
|
||||||
// Check if launch failed.
|
// Check if launch failed.
|
||||||
HIP_CHECK(hipGetLastError());
|
HIP_CHECK(hipGetLastError());
|
||||||
REQUIRE(cache0_result == 0);
|
REQUIRE(cache0_result == 0);
|
||||||
HIP_CHECK(hipSetDevice(1));
|
HIP_CHECK(hipSetDevice(1));
|
||||||
hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, stream[2],
|
0, stream[2],
|
||||||
A_d, B_d, X_d1, Y_d1, N,
|
A_d, B_d, X_d1, Y_d1, N,
|
||||||
AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
|
AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
|
||||||
HIP_CHECK(hipGetLastError());
|
HIP_CHECK(hipGetLastError());
|
||||||
REQUIRE(cache1_result == 0);
|
REQUIRE(cache1_result == 0);
|
||||||
|
|
||||||
// Wait for kernels on both devices.
|
// Wait for kernels on both devices.
|
||||||
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
HIP_CHECK(hipStreamSynchronize(stream[1]));
|
||||||
HIP_CHECK(hipStreamSynchronize(stream[2]));
|
HIP_CHECK(hipStreamSynchronize(stream[2]));
|
||||||
|
|
||||||
// Evaluate the resultant arrays A and B.
|
// Evaluate the resultant arrays A and B.
|
||||||
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
for (size_t i = 0; i < N; i++) {
|
for (size_t i = 0; i < N; i++) {
|
||||||
REQUIRE(A_h[i] == (100000000 + i));
|
REQUIRE(A_h[i] == (100000000 + i));
|
||||||
REQUIRE(B_h[i] == (300000000 + i));
|
REQUIRE(B_h[i] == (300000000 + i));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free all the device and host memory allocated.
|
// Free all the device and host memory allocated.
|
||||||
HIP_CHECK(hipFree(A_d));
|
HIP_CHECK(hipFree(A_d));
|
||||||
HIP_CHECK(hipFree(B_d));
|
HIP_CHECK(hipFree(B_d));
|
||||||
HIP_CHECK(hipFree(X_d0));
|
HIP_CHECK(hipFree(X_d0));
|
||||||
HIP_CHECK(hipFree(Y_d0));
|
HIP_CHECK(hipFree(Y_d0));
|
||||||
HIP_CHECK(hipFree(X_d1));
|
HIP_CHECK(hipFree(X_d1));
|
||||||
HIP_CHECK(hipFree(Y_d1));
|
HIP_CHECK(hipFree(Y_d1));
|
||||||
HIP_CHECK(hipHostFree(AA1_h));
|
HIP_CHECK(hipHostFree(AA1_h));
|
||||||
HIP_CHECK(hipHostFree(AA2_h));
|
HIP_CHECK(hipHostFree(AA2_h));
|
||||||
HIP_CHECK(hipHostFree(BA1_h));
|
HIP_CHECK(hipHostFree(BA1_h));
|
||||||
HIP_CHECK(hipHostFree(BA2_h));
|
HIP_CHECK(hipHostFree(BA2_h));
|
||||||
free(A_h);
|
free(A_h);
|
||||||
free(B_h);
|
free(B_h);
|
||||||
free(X_h);
|
free(X_h);
|
||||||
free(Y_h);
|
free(Y_h);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - This test runs on devices where XGMI enables fine-grained communication
|
* - This test runs on devices where XGMI enables fine-grained communication
|
||||||
* between GPUs. This performs a message passing test.
|
* between GPUs. This performs a message passing test.
|
||||||
* Array A is allocated on Device 0, and remotely on Device 1.
|
* Array A is allocated on Device 0, and remotely on Device 1.
|
||||||
* Device 0 also increments atomic ints AA1 and AA2.
|
* Device 0 also increments atomic ints AA1 and AA2.
|
||||||
* Array B is allocated on Device 1, and remotely on Device 0.
|
* Array B is allocated on Device 1, and remotely on Device 0.
|
||||||
* Device 1 also increments atomic ints BA1 and BA2.
|
* Device 1 also increments atomic ints BA1 and BA2.
|
||||||
* Kernel 0 will launch on Device 0, and store array X into array A.
|
* Kernel 0 will launch on Device 0, and store array X into array A.
|
||||||
* Kernel 1 will launch on Device 1, and store array Y into array B.
|
* Kernel 1 will launch on Device 1, and store array Y into array B.
|
||||||
* Kernel 0 will validate that the correct values of array Y are stored in B.
|
* Kernel 0 will validate that the correct values of array Y are stored in B.
|
||||||
* Kernel 1 will validate that the correct values of array X are stored in A.
|
* Kernel 1 will validate that the correct values of array X are stored in A.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
|
* - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
* - Test to be run only on AMD.
|
* - Test to be run only on AMD.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_cache_coherency_gpu_gpu") {
|
TEST_CASE("Unit_cache_coherency_gpu_gpu") {
|
||||||
bool passed = true;
|
bool passed = true;
|
||||||
// Coherency between GPUs accessing local or remote FB.
|
// Coherency between GPUs accessing local or remote FB.
|
||||||
REQUIRE(passed == gpu_to_gpu_coherency());
|
REQUIRE(passed == gpu_to_gpu_coherency());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,340 +1,340 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
|
|
||||||
unsigned threadsPerBlock = 256;
|
unsigned threadsPerBlock = 256;
|
||||||
unsigned blocksPerCU = 6;
|
unsigned blocksPerCU = 6;
|
||||||
|
|
||||||
class MemcpyFunction {
|
class MemcpyFunction {
|
||||||
public:
|
public:
|
||||||
MemcpyFunction(const char* fileName, const char* functionName) {
|
MemcpyFunction(const char* fileName, const char* functionName) {
|
||||||
load(fileName, functionName);
|
load(fileName, functionName);
|
||||||
}
|
}
|
||||||
void load(const char* fileName, const char* functionName);
|
void load(const char* fileName, const char* functionName);
|
||||||
void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
|
void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
hipFunction_t _function;
|
hipFunction_t _function;
|
||||||
hipModule_t _module;
|
hipModule_t _module;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void MemcpyFunction::load(const char* fileName, const char* functionName) {
|
void MemcpyFunction::load(const char* fileName, const char* functionName) {
|
||||||
HIP_CHECK(hipModuleLoad(&_module, fileName));
|
HIP_CHECK(hipModuleLoad(&_module, fileName));
|
||||||
HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
|
HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
|
void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
|
||||||
struct {
|
struct {
|
||||||
int* _dst;
|
int* _dst;
|
||||||
const int* _src;
|
const int* _src;
|
||||||
size_t _numElements;
|
size_t _numElements;
|
||||||
} args;
|
} args;
|
||||||
|
|
||||||
args._dst = dst;
|
args._dst = dst;
|
||||||
args._src = src;
|
args._src = src;
|
||||||
args._numElements = numElements;
|
args._numElements = numElements;
|
||||||
|
|
||||||
size_t size = sizeof(args);
|
size_t size = sizeof(args);
|
||||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
|
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
|
||||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
|
||||||
numElements);
|
numElements);
|
||||||
HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
|
HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
|
||||||
1, 1, 0, s, NULL,
|
1, 1, 0, s, NULL,
|
||||||
reinterpret_cast<void**>(&config)));
|
reinterpret_cast<void**>(&config)));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool g_warnOnFail = true;
|
bool g_warnOnFail = true;
|
||||||
int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
|
int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
|
||||||
|
|
||||||
// Set value of array to specified 32-bit integer:
|
// Set value of array to specified 32-bit integer:
|
||||||
__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
|
__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
|
||||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||||
int stride = blockDim.x * gridDim.x;
|
int stride = blockDim.x * gridDim.x;
|
||||||
for (size_t i = gid; i < numElements; i += stride) {
|
for (size_t i = gid; i < numElements; i += stride) {
|
||||||
ptr[i] = val;
|
ptr[i] = val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
|
__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
|
||||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||||
int stride = blockDim.x * gridDim.x;
|
int stride = blockDim.x * gridDim.x;
|
||||||
for (size_t i = gid; i < numElements; i += stride) {
|
for (size_t i = gid; i < numElements; i += stride) {
|
||||||
dst[i] = src[i];
|
dst[i] = src[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check arrays in reverse order, to more easily detect cases where
|
// Check arrays in reverse order, to more easily detect cases where
|
||||||
// the copy is "partially" done.
|
// the copy is "partially" done.
|
||||||
void checkReverse(const int* ptr, int numElements, int expected) {
|
void checkReverse(const int* ptr, int numElements, int expected) {
|
||||||
int mismatchCnt = 0;
|
int mismatchCnt = 0;
|
||||||
for (int i = numElements - 1; i >= 0; i--) {
|
for (int i = numElements - 1; i >= 0; i--) {
|
||||||
if (!g_warnOnFail) {
|
if (!g_warnOnFail) {
|
||||||
REQUIRE(ptr[i] == expected);
|
REQUIRE(ptr[i] == expected);
|
||||||
}
|
}
|
||||||
if (++mismatchCnt >= 10) {
|
if (++mismatchCnt >= 10) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ENUM_CASE_STR(x) \
|
#define ENUM_CASE_STR(x) \
|
||||||
case x: \
|
case x: \
|
||||||
return #x
|
return #x
|
||||||
|
|
||||||
enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
|
enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
|
||||||
|
|
||||||
const char* CmdTypeStr(CmdType c) {
|
const char* CmdTypeStr(CmdType c) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
ENUM_CASE_STR(COPY);
|
ENUM_CASE_STR(COPY);
|
||||||
ENUM_CASE_STR(KERNEL);
|
ENUM_CASE_STR(KERNEL);
|
||||||
ENUM_CASE_STR(MODULE_KERNEL);
|
ENUM_CASE_STR(MODULE_KERNEL);
|
||||||
default:
|
default:
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum SyncType {
|
enum SyncType {
|
||||||
NONE,
|
NONE,
|
||||||
EVENT_QUERY,
|
EVENT_QUERY,
|
||||||
EVENT_SYNC,
|
EVENT_SYNC,
|
||||||
STREAM_WAIT_EVENT,
|
STREAM_WAIT_EVENT,
|
||||||
STREAM_QUERY,
|
STREAM_QUERY,
|
||||||
STREAM_SYNC,
|
STREAM_SYNC,
|
||||||
DEVICE_SYNC,
|
DEVICE_SYNC,
|
||||||
MAX_SyncType
|
MAX_SyncType
|
||||||
};
|
};
|
||||||
|
|
||||||
const char* SyncTypeStr(SyncType s) {
|
const char* SyncTypeStr(SyncType s) {
|
||||||
switch (s) {
|
switch (s) {
|
||||||
ENUM_CASE_STR(NONE);
|
ENUM_CASE_STR(NONE);
|
||||||
ENUM_CASE_STR(EVENT_QUERY);
|
ENUM_CASE_STR(EVENT_QUERY);
|
||||||
ENUM_CASE_STR(EVENT_SYNC);
|
ENUM_CASE_STR(EVENT_SYNC);
|
||||||
ENUM_CASE_STR(STREAM_WAIT_EVENT);
|
ENUM_CASE_STR(STREAM_WAIT_EVENT);
|
||||||
ENUM_CASE_STR(STREAM_QUERY);
|
ENUM_CASE_STR(STREAM_QUERY);
|
||||||
ENUM_CASE_STR(STREAM_SYNC);
|
ENUM_CASE_STR(STREAM_SYNC);
|
||||||
ENUM_CASE_STR(DEVICE_SYNC);
|
ENUM_CASE_STR(DEVICE_SYNC);
|
||||||
default:
|
default:
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
|
void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
|
||||||
size_t numElements) {
|
size_t numElements) {
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case COPY:
|
case COPY:
|
||||||
HIP_CHECK(
|
HIP_CHECK(
|
||||||
hipMemcpyAsync(dst, src, numElements * sizeof(int),
|
hipMemcpyAsync(dst, src, numElements * sizeof(int),
|
||||||
hipMemcpyDeviceToDevice, s));
|
hipMemcpyDeviceToDevice, s));
|
||||||
break;
|
break;
|
||||||
case KERNEL: {
|
case KERNEL: {
|
||||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||||
threadsPerBlock, numElements);
|
threadsPerBlock, numElements);
|
||||||
hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, s, dst, src, numElements);
|
0, s, dst, src, numElements);
|
||||||
} break;
|
} break;
|
||||||
case MODULE_KERNEL: {
|
case MODULE_KERNEL: {
|
||||||
MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
|
MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
|
||||||
g_moduleMemcpy.launch(dst, src, numElements, s);
|
g_moduleMemcpy.launch(dst, src, numElements, s);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
printf("Info:unknown cmd=%d type", cmd);
|
printf("Info:unknown cmd=%d type", cmd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void resetInputs(int* Ad, int* Bd, int* Ch,
|
void resetInputs(int* Ad, int* Bd, int* Ch,
|
||||||
size_t numElements, int expected) {
|
size_t numElements, int expected) {
|
||||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
|
||||||
threadsPerBlock, numElements);
|
threadsPerBlock, numElements);
|
||||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, hipStream_t(0), Ad, expected, numElements);
|
0, hipStream_t(0), Ad, expected, numElements);
|
||||||
// poison with bad value to ensure is overwritten correctly
|
// poison with bad value to ensure is overwritten correctly
|
||||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
|
0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
|
||||||
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
|
||||||
0, hipStream_t(0), Bd, 0xF000BA55, numElements);
|
0, hipStream_t(0), Bd, 0xF000BA55, numElements);
|
||||||
memset(Ch, 13, numElements * sizeof(int));
|
memset(Ch, 13, numElements * sizeof(int));
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Intended to test proper synchronization and cache flushing
|
// Intended to test proper synchronization and cache flushing
|
||||||
// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
|
// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
|
||||||
// using either hipMemcpyAsync or kernel implementations.
|
// using either hipMemcpyAsync or kernel implementations.
|
||||||
// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
|
// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
|
||||||
// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
|
// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
|
||||||
// Correct result at the end is that Ch contains the
|
// Correct result at the end is that Ch contains the
|
||||||
// contents originally in Ad (integer 0x42)
|
// contents originally in Ad (integer 0x42)
|
||||||
|
|
||||||
void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
|
void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
|
||||||
hipStream_t stream1, hipStream_t stream2, int numElements,
|
hipStream_t stream1, hipStream_t stream2, int numElements,
|
||||||
int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
|
int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
|
||||||
hipEvent_t e;
|
hipEvent_t e;
|
||||||
HIP_CHECK(hipEventCreateWithFlags(&e, 0));
|
HIP_CHECK(hipEventCreateWithFlags(&e, 0));
|
||||||
|
|
||||||
resetInputs(Ad, Bd, Ch, numElements, expected);
|
resetInputs(Ad, Bd, Ch, numElements, expected);
|
||||||
|
|
||||||
const size_t sizeElements = numElements * sizeof(int);
|
const size_t sizeElements = numElements * sizeof(int);
|
||||||
fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
|
fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
|
||||||
sizeElements, static_cast<double>(sizeElements / 1024.0),
|
sizeElements, static_cast<double>(sizeElements / 1024.0),
|
||||||
CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
|
CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
|
||||||
|
|
||||||
/*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
|
/*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
|
||||||
fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
|
fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
|
||||||
return;
|
return;
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
// Step A:
|
// Step A:
|
||||||
runCmd(cmdAType, Bd, Ad, stream1, numElements);
|
runCmd(cmdAType, Bd, Ad, stream1, numElements);
|
||||||
|
|
||||||
// Sync in-between?
|
// Sync in-between?
|
||||||
switch (syncType) {
|
switch (syncType) {
|
||||||
case NONE:
|
case NONE:
|
||||||
break;
|
break;
|
||||||
case EVENT_QUERY: {
|
case EVENT_QUERY: {
|
||||||
hipError_t st = hipErrorNotReady;
|
hipError_t st = hipErrorNotReady;
|
||||||
HIP_CHECK(hipEventRecord(e, stream1));
|
HIP_CHECK(hipEventRecord(e, stream1));
|
||||||
do {
|
do {
|
||||||
st = hipEventQuery(e);
|
st = hipEventQuery(e);
|
||||||
} while (st == hipErrorNotReady);
|
} while (st == hipErrorNotReady);
|
||||||
HIP_CHECK(st);
|
HIP_CHECK(st);
|
||||||
} break;
|
} break;
|
||||||
case EVENT_SYNC:
|
case EVENT_SYNC:
|
||||||
HIP_CHECK(hipEventRecord(e, stream1));
|
HIP_CHECK(hipEventRecord(e, stream1));
|
||||||
HIP_CHECK(hipEventSynchronize(e));
|
HIP_CHECK(hipEventSynchronize(e));
|
||||||
break;
|
break;
|
||||||
case STREAM_WAIT_EVENT:
|
case STREAM_WAIT_EVENT:
|
||||||
HIP_CHECK(hipEventRecord(e, stream1));
|
HIP_CHECK(hipEventRecord(e, stream1));
|
||||||
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
|
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
|
||||||
break;
|
break;
|
||||||
case STREAM_QUERY: {
|
case STREAM_QUERY: {
|
||||||
hipError_t st = hipErrorNotReady;
|
hipError_t st = hipErrorNotReady;
|
||||||
do {
|
do {
|
||||||
st = hipStreamQuery(stream1);
|
st = hipStreamQuery(stream1);
|
||||||
} while (st == hipErrorNotReady);
|
} while (st == hipErrorNotReady);
|
||||||
HIP_CHECK(st);
|
HIP_CHECK(st);
|
||||||
} break;
|
} break;
|
||||||
case STREAM_SYNC:
|
case STREAM_SYNC:
|
||||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||||
break;
|
break;
|
||||||
case DEVICE_SYNC:
|
case DEVICE_SYNC:
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
|
fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
runCmd(cmdBType, Cd, Bd, stream2, numElements);
|
runCmd(cmdBType, Cd, Bd, stream2, numElements);
|
||||||
|
|
||||||
// Copy back to host, use async copy to avoid any extra synchronization
|
// Copy back to host, use async copy to avoid any extra synchronization
|
||||||
// that might mask issues.
|
// that might mask issues.
|
||||||
HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
|
HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
|
||||||
stream2));
|
stream2));
|
||||||
HIP_CHECK(hipStreamSynchronize(stream2));
|
HIP_CHECK(hipStreamSynchronize(stream2));
|
||||||
|
|
||||||
checkReverse(Ch, numElements, expected);
|
checkReverse(Ch, numElements, expected);
|
||||||
|
|
||||||
HIP_CHECK(hipEventDestroy(e));
|
HIP_CHECK(hipEventDestroy(e));
|
||||||
}
|
}
|
||||||
|
|
||||||
void testWrapper(size_t numElements) {
|
void testWrapper(size_t numElements) {
|
||||||
const size_t sizeElements = numElements * sizeof(int);
|
const size_t sizeElements = numElements * sizeof(int);
|
||||||
const int expected = 0x42;
|
const int expected = 0x42;
|
||||||
int *Ad, *Bd, *Cd, *Ch;
|
int *Ad, *Bd, *Cd, *Ch;
|
||||||
|
|
||||||
HIP_CHECK(hipMalloc(&Ad, sizeElements));
|
HIP_CHECK(hipMalloc(&Ad, sizeElements));
|
||||||
HIP_CHECK(hipMalloc(&Bd, sizeElements));
|
HIP_CHECK(hipMalloc(&Bd, sizeElements));
|
||||||
HIP_CHECK(hipMalloc(&Cd, sizeElements));
|
HIP_CHECK(hipMalloc(&Cd, sizeElements));
|
||||||
HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
|
HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
|
||||||
|
|
||||||
hipStream_t stream1, stream2;
|
hipStream_t stream1, stream2;
|
||||||
|
|
||||||
HIP_CHECK(hipStreamCreate(&stream1));
|
HIP_CHECK(hipStreamCreate(&stream1));
|
||||||
HIP_CHECK(hipStreamCreate(&stream2));
|
HIP_CHECK(hipStreamCreate(&stream2));
|
||||||
HIP_CHECK(hipDeviceSynchronize());
|
HIP_CHECK(hipDeviceSynchronize());
|
||||||
|
|
||||||
runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
|
runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
|
||||||
Ad, Bd, Cd, Ch, expected);
|
Ad, Bd, Cd, Ch, expected);
|
||||||
|
|
||||||
for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
|
for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
|
||||||
for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
|
for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
|
||||||
for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
|
for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
|
||||||
switch (syncMode) {
|
switch (syncMode) {
|
||||||
// case NONE::
|
// case NONE::
|
||||||
case EVENT_QUERY:
|
case EVENT_QUERY:
|
||||||
case EVENT_SYNC:
|
case EVENT_SYNC:
|
||||||
case STREAM_WAIT_EVENT:
|
case STREAM_WAIT_EVENT:
|
||||||
// case STREAM_QUERY:
|
// case STREAM_QUERY:
|
||||||
case STREAM_SYNC:
|
case STREAM_SYNC:
|
||||||
case DEVICE_SYNC:
|
case DEVICE_SYNC:
|
||||||
runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
|
runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
|
||||||
stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
|
stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
|
runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
|
||||||
numElements, Ad, Bd, Cd, Ch, expected);
|
numElements, Ad, Bd, Cd, Ch, expected);
|
||||||
runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
|
runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
|
||||||
Ad, Bd, Cd, Ch, expected);
|
Ad, Bd, Cd, Ch, expected);
|
||||||
runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
|
runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
|
||||||
numElements, Ad, Bd, Cd, Ch, expected);
|
numElements, Ad, Bd, Cd, Ch, expected);
|
||||||
runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
|
runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
|
||||||
Ad, Bd, Cd, Ch, expected);
|
Ad, Bd, Cd, Ch, expected);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
HIP_CHECK(hipFree(Ad));
|
HIP_CHECK(hipFree(Ad));
|
||||||
HIP_CHECK(hipFree(Bd));
|
HIP_CHECK(hipFree(Bd));
|
||||||
HIP_CHECK(hipFree(Cd));
|
HIP_CHECK(hipFree(Cd));
|
||||||
HIP_CHECK(hipHostFree(Ch));
|
HIP_CHECK(hipHostFree(Ch));
|
||||||
|
|
||||||
HIP_CHECK(hipStreamDestroy(stream1));
|
HIP_CHECK(hipStreamDestroy(stream1));
|
||||||
HIP_CHECK(hipStreamDestroy(stream2));
|
HIP_CHECK(hipStreamDestroy(stream2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test cache management (fences) and synchronization between
|
* - Test cache management (fences) and synchronization between
|
||||||
* kernel and copy commands. Exhaustively tests 3 command types
|
* kernel and copy commands. Exhaustively tests 3 command types
|
||||||
* (copy, kernel, module kernel), many sync types (see SyncType), followed by
|
* (copy, kernel, module kernel), many sync types (see SyncType), followed by
|
||||||
* another command, across a sweep of data sizes designed to stress
|
* another command, across a sweep of data sizes designed to stress
|
||||||
* various levels of the memory hierarchy.
|
* various levels of the memory hierarchy.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/synchronization/copy_coherency.cc
|
* - catch/unit/synchronization/copy_coherency.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.5
|
* - HIP_VERSION >= 5.5
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_Copy_Coherency") {
|
TEST_CASE("Unit_Copy_Coherency") {
|
||||||
for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
|
for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
|
||||||
size_t numElements = g_elementSizes[index];
|
size_t numElements = g_elementSizes[index];
|
||||||
testWrapper(numElements);
|
testWrapper(numElements);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,182 +1,182 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip/hip_fp16.h>
|
#include <hip/hip_fp16.h>
|
||||||
|
|
||||||
#define WIDTH 4
|
#define WIDTH 4
|
||||||
|
|
||||||
#define NUM (WIDTH * WIDTH)
|
#define NUM (WIDTH * WIDTH)
|
||||||
|
|
||||||
#define THREADS_PER_BLOCK_X 4
|
#define THREADS_PER_BLOCK_X 4
|
||||||
#define THREADS_PER_BLOCK_Y 4
|
#define THREADS_PER_BLOCK_Y 4
|
||||||
#define THREADS_PER_BLOCK_Z 1
|
#define THREADS_PER_BLOCK_Z 1
|
||||||
|
|
||||||
// Device (Kernel) function, it must be void
|
// Device (Kernel) function, it must be void
|
||||||
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
|
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
|
||||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
T val = in[x];
|
T val = in[x];
|
||||||
for (int i = 0; i < width; i++) {
|
for (int i = 0; i < width; i++) {
|
||||||
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
|
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// CPU implementation of matrix transpose
|
// CPU implementation of matrix transpose
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
|
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
|
||||||
for (unsigned int j = 0; j < width; j++) {
|
for (unsigned int j = 0; j < width; j++) {
|
||||||
for (unsigned int i = 0; i < width; i++) {
|
for (unsigned int i = 0; i < width; i++) {
|
||||||
output[i * width + j] = input[j * width + i];
|
output[i * width + j] = input[j * width + i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void getFactor(int* fact) { *fact = 101; }
|
static void getFactor(int* fact) { *fact = 101; }
|
||||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||||
static void getFactor(float* fact) { *fact = 2.5; }
|
static void getFactor(float* fact) { *fact = 2.5; }
|
||||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||||
static void getFactor(double* fact) { *fact = 2.5; }
|
static void getFactor(double* fact) { *fact = 2.5; }
|
||||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||||
|
|
||||||
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
|
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
for (int i = 0; i < NUM; i++) {
|
for (int i = 0; i < NUM; i++) {
|
||||||
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
|
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
|
||||||
errors++;
|
errors++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
|
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
for (int i = 0; i < NUM; i++) {
|
for (int i = 0; i < NUM; i++) {
|
||||||
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
|
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
|
||||||
errors++;
|
errors++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return errors;
|
return errors;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void init(T* Matrix) {
|
template <typename T> void init(T* Matrix) {
|
||||||
// initialize the input data
|
// initialize the input data
|
||||||
T factor;
|
T factor;
|
||||||
getFactor(&factor);
|
getFactor(&factor);
|
||||||
for (int i = 0; i < NUM; i++) {
|
for (int i = 0; i < NUM; i++) {
|
||||||
Matrix[i] = (T)i + factor;
|
Matrix[i] = (T)i + factor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> void init(__half* Matrix) {
|
template <> void init(__half* Matrix) {
|
||||||
// initialize the input data
|
// initialize the input data
|
||||||
__half factor;
|
__half factor;
|
||||||
getFactor(&factor);
|
getFactor(&factor);
|
||||||
for (int i = 0; i < NUM; i++) {
|
for (int i = 0; i < NUM; i++) {
|
||||||
Matrix[i] = i + __half2float(factor);
|
Matrix[i] = i + __half2float(factor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> static void runTest() {
|
template <typename T> static void runTest() {
|
||||||
T* Matrix;
|
T* Matrix;
|
||||||
T* TransposeMatrix;
|
T* TransposeMatrix;
|
||||||
T* cpuTransposeMatrix;
|
T* cpuTransposeMatrix;
|
||||||
|
|
||||||
T* gpuMatrix;
|
T* gpuMatrix;
|
||||||
T* gpuTransposeMatrix;
|
T* gpuTransposeMatrix;
|
||||||
|
|
||||||
hipDeviceProp_t devProp;
|
hipDeviceProp_t devProp;
|
||||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||||
|
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
|
|
||||||
Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||||
TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||||
cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
|
||||||
|
|
||||||
init(Matrix);
|
init(Matrix);
|
||||||
|
|
||||||
// allocate the memory on the device side
|
// allocate the memory on the device side
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
|
||||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
|
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
|
||||||
|
|
||||||
// Memory transfer from host to device
|
// Memory transfer from host to device
|
||||||
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
|
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
|
||||||
|
|
||||||
// Lauching kernel from host
|
// Lauching kernel from host
|
||||||
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
|
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
|
||||||
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||||
|
|
||||||
// Memory transfer from device to host
|
// Memory transfer from device to host
|
||||||
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
// CPU MatrixTranspose computation
|
// CPU MatrixTranspose computation
|
||||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||||
|
|
||||||
// verify the results
|
// verify the results
|
||||||
REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
|
REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
|
||||||
// free the resources on device side
|
// free the resources on device side
|
||||||
HIP_CHECK(hipFree(gpuMatrix));
|
HIP_CHECK(hipFree(gpuMatrix));
|
||||||
HIP_CHECK(hipFree(gpuTransposeMatrix));
|
HIP_CHECK(hipFree(gpuTransposeMatrix));
|
||||||
|
|
||||||
// free the resources on host side
|
// free the resources on host side
|
||||||
free(Matrix);
|
free(Matrix);
|
||||||
free(TransposeMatrix);
|
free(TransposeMatrix);
|
||||||
free(cpuTransposeMatrix);
|
free(cpuTransposeMatrix);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup __shfl __shfl
|
* @addtogroup __shfl __shfl
|
||||||
* @{
|
* @{
|
||||||
* @ingroup ShflTest
|
* @ingroup ShflTest
|
||||||
* `T __shfl(T var, int srcLane, int width=warpSize)` -
|
* `T __shfl(T var, int srcLane, int width=warpSize)` -
|
||||||
* Contains wrap __shfl functions.
|
* Contains wrap __shfl functions.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test case to verify __shfl warp functions for different datatypes.
|
* - Test case to verify __shfl warp functions for different datatypes.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipShflTests.cc
|
* - catch/unit/kernel/hipShflTests.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_hipShflTests") {
|
TEST_CASE("Unit_hipShflTests") {
|
||||||
SECTION("run test for int") { runTest<int>(); }
|
SECTION("run test for int") { runTest<int>(); }
|
||||||
SECTION("run test for float") { runTest<float>(); }
|
SECTION("run test for float") { runTest<float>(); }
|
||||||
SECTION("run test for double") { runTest<double>(); }
|
SECTION("run test for double") { runTest<double>(); }
|
||||||
// Test added to support half datatype.
|
// Test added to support half datatype.
|
||||||
SECTION("run test for __half") { runTest<__half>(); }
|
SECTION("run test for __half") { runTest<__half>(); }
|
||||||
SECTION("run test for int64_t") { runTest<int64_t>(); }
|
SECTION("run test for int64_t") { runTest<int64_t>(); }
|
||||||
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
|
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
|
||||||
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
|
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* End doxygen group ShflTest.
|
* End doxygen group ShflTest.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,241 +1,241 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <hip_test_kernels.hh>
|
#include <hip_test_kernels.hh>
|
||||||
#include <hip_test_checkers.hh>
|
#include <hip_test_checkers.hh>
|
||||||
#include <hip_test_common.hh>
|
#include <hip_test_common.hh>
|
||||||
#include <hip/hip_fp16.h>
|
#include <hip/hip_fp16.h>
|
||||||
|
|
||||||
const int size = 32;
|
const int size = 32;
|
||||||
|
|
||||||
template <typename T> __global__ void shflDownSum(T* a, int size) {
|
template <typename T> __global__ void shflDownSum(T* a, int size) {
|
||||||
T val = a[threadIdx.x];
|
T val = a[threadIdx.x];
|
||||||
for (int i = size / 2; i > 0; i /= 2) {
|
for (int i = size / 2; i > 0; i /= 2) {
|
||||||
val += __shfl_down(val, i, size);
|
val += __shfl_down(val, i, size);
|
||||||
}
|
}
|
||||||
a[threadIdx.x] = val;
|
a[threadIdx.x] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> __global__ void shflUpSum(T* a, int size) {
|
template <typename T> __global__ void shflUpSum(T* a, int size) {
|
||||||
T val = a[threadIdx.x];
|
T val = a[threadIdx.x];
|
||||||
for (int i = size / 2; i > 0; i /= 2) {
|
for (int i = size / 2; i > 0; i /= 2) {
|
||||||
val += __shfl_up(val, i, size);
|
val += __shfl_up(val, i, size);
|
||||||
}
|
}
|
||||||
a[threadIdx.x] = val;
|
a[threadIdx.x] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> __global__ void shflXorSum(T* a, int size) {
|
template <typename T> __global__ void shflXorSum(T* a, int size) {
|
||||||
T val = a[threadIdx.x];
|
T val = a[threadIdx.x];
|
||||||
for (int i = size / 2; i > 0; i /= 2) {
|
for (int i = size / 2; i > 0; i /= 2) {
|
||||||
val += __shfl_xor(val, i, size);
|
val += __shfl_xor(val, i, size);
|
||||||
}
|
}
|
||||||
a[threadIdx.x] = val;
|
a[threadIdx.x] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void getFactor(int* fact) { *fact = 101; }
|
static void getFactor(int* fact) { *fact = 101; }
|
||||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||||
static void getFactor(float* fact) { *fact = 2.5; }
|
static void getFactor(float* fact) { *fact = 2.5; }
|
||||||
static void getFactor(double* fact) { *fact = 2.5; }
|
static void getFactor(double* fact) { *fact = 2.5; }
|
||||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||||
|
|
||||||
template <typename T> T sum(T* a) {
|
template <typename T> T sum(T* a) {
|
||||||
T cpuSum = 0;
|
T cpuSum = 0;
|
||||||
T factor;
|
T factor;
|
||||||
getFactor(&factor);
|
getFactor(&factor);
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
a[i] = i + factor;
|
a[i] = i + factor;
|
||||||
cpuSum += a[i];
|
cpuSum += a[i];
|
||||||
}
|
}
|
||||||
return cpuSum;
|
return cpuSum;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> __half sum(__half* a) {
|
template <> __half sum(__half* a) {
|
||||||
__half cpuSum = 0;
|
__half cpuSum = 0;
|
||||||
__half factor;
|
__half factor;
|
||||||
getFactor(&factor);
|
getFactor(&factor);
|
||||||
for (int i = 0; i < size; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
a[i] = i + __half2float(factor);
|
a[i] = i + __half2float(factor);
|
||||||
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
|
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
|
||||||
}
|
}
|
||||||
return cpuSum;
|
return cpuSum;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> bool compare(T gpuSum, T cpuSum) {
|
template <typename T> bool compare(T gpuSum, T cpuSum) {
|
||||||
if (gpuSum != cpuSum) {
|
if (gpuSum != cpuSum) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <> bool compare(__half gpuSum, __half cpuSum) {
|
template <> bool compare(__half gpuSum, __half cpuSum) {
|
||||||
if (__half2float(gpuSum) != __half2float(cpuSum)) {
|
if (__half2float(gpuSum) != __half2float(cpuSum)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> static void runTestShflUp() {
|
template <typename T> static void runTestShflUp() {
|
||||||
const int size = 32;
|
const int size = 32;
|
||||||
T a[size];
|
T a[size];
|
||||||
T cpuSum = sum(a);
|
T cpuSum = sum(a);
|
||||||
T* d_a;
|
T* d_a;
|
||||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||||
hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
|
hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
|
||||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||||
REQUIRE((compare(a[size - 1], cpuSum)) == 0);
|
REQUIRE((compare(a[size - 1], cpuSum)) == 0);
|
||||||
HIP_CHECK(hipFree(d_a));
|
HIP_CHECK(hipFree(d_a));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> static void runTestShflDown() {
|
template <typename T> static void runTestShflDown() {
|
||||||
T a[size];
|
T a[size];
|
||||||
T cpuSum = sum(a);
|
T cpuSum = sum(a);
|
||||||
T* d_a;
|
T* d_a;
|
||||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||||
hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
|
hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
|
||||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||||
REQUIRE((compare(a[0], cpuSum)) == 0);
|
REQUIRE((compare(a[0], cpuSum)) == 0);
|
||||||
HIP_CHECK(hipFree(d_a));
|
HIP_CHECK(hipFree(d_a));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> static void runTestShflXor() {
|
template <typename T> static void runTestShflXor() {
|
||||||
T a[size];
|
T a[size];
|
||||||
T cpuSum = sum(a);
|
T cpuSum = sum(a);
|
||||||
T* d_a;
|
T* d_a;
|
||||||
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
|
||||||
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
|
||||||
hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
|
hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
|
||||||
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
|
||||||
REQUIRE((compare(a[0], cpuSum)) == 0);
|
REQUIRE((compare(a[0], cpuSum)) == 0);
|
||||||
HIP_CHECK(hipFree(d_a));
|
HIP_CHECK(hipFree(d_a));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup __shfl __shfl
|
* @addtogroup __shfl __shfl
|
||||||
* @{
|
* @{
|
||||||
* @ingroup ShflTest
|
* @ingroup ShflTest
|
||||||
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
|
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||||
* Contains warp __shfl_up function
|
* Contains warp __shfl_up function
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test case to verify __shfl_up warp functions for different datatypes.
|
* - Test case to verify __shfl_up warp functions for different datatypes.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
* - Gaurding this test against cuda with refernce to mentioned
|
* - Gaurding this test against cuda with refernce to mentioned
|
||||||
* ticket SWDEV-379177
|
* ticket SWDEV-379177
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_runTestShfl_up") {
|
TEST_CASE("Unit_runTestShfl_up") {
|
||||||
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
|
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
|
||||||
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
|
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
|
||||||
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
|
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
|
||||||
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
|
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
|
||||||
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
|
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
|
||||||
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
|
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
|
||||||
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
|
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* End doxygen group __shfl.
|
* End doxygen group __shfl.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup __shfl __shfl
|
* @addtogroup __shfl __shfl
|
||||||
* @{
|
* @{
|
||||||
* @ingroup ShflTest
|
* @ingroup ShflTest
|
||||||
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
|
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||||
* Contains warp __shfl_down function
|
* Contains warp __shfl_down function
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test case to verify __shfl_down warp functions for different datatypes.
|
* - Test case to verify __shfl_down warp functions for different datatypes.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
* - Gaurding this test against cuda with refernce to mentioned
|
* - Gaurding this test against cuda with refernce to mentioned
|
||||||
* ticket SWDEV-379177
|
* ticket SWDEV-379177
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_runTestShfl_Down") {
|
TEST_CASE("Unit_runTestShfl_Down") {
|
||||||
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
|
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
|
||||||
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
|
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
|
||||||
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
|
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
|
||||||
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
|
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
|
||||||
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
|
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
|
||||||
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
|
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
|
||||||
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
|
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* End doxygen group __shfl.
|
* End doxygen group __shfl.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @addtogroup __shfl __shfl
|
* @addtogroup __shfl __shfl
|
||||||
* @{
|
* @{
|
||||||
* @ingroup ShflTest
|
* @ingroup ShflTest
|
||||||
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
|
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
|
||||||
* Contains warp __shfl_xor function
|
* Contains warp __shfl_xor function
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Description
|
* Test Description
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - Test case to verify __shfl_xor warp functions for different datatypes.
|
* - Test case to verify __shfl_xor warp functions for different datatypes.
|
||||||
|
|
||||||
* Test source
|
* Test source
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - catch/unit/kernel/hipShflUpDownTest.cc
|
* - catch/unit/kernel/hipShflUpDownTest.cc
|
||||||
* Test requirements
|
* Test requirements
|
||||||
* ------------------------
|
* ------------------------
|
||||||
* - HIP_VERSION >= 5.6
|
* - HIP_VERSION >= 5.6
|
||||||
* - Gaurding this test against cuda with refernce to mentioned
|
* - Gaurding this test against cuda with refernce to mentioned
|
||||||
* ticket SWDEV-379177
|
* ticket SWDEV-379177
|
||||||
*/
|
*/
|
||||||
|
|
||||||
TEST_CASE("Unit_runTestShfl_Xor") {
|
TEST_CASE("Unit_runTestShfl_Xor") {
|
||||||
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
|
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
|
||||||
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
|
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
|
||||||
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
|
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
|
||||||
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
|
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
|
||||||
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
|
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
|
||||||
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
|
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
|
||||||
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
|
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* End doxygen group __shfl.
|
* End doxygen group __shfl.
|
||||||
* @}
|
* @}
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -1,437 +1,437 @@
|
|||||||
/*
|
/*
|
||||||
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
in the Software without restriction, including without limitation the rights
|
in the Software without restriction, including without limitation the rights
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
furnished to do so, subject to the following conditions:
|
furnished to do so, subject to the following conditions:
|
||||||
The above copyright notice and this permission notice shall be included in
|
The above copyright notice and this permission notice shall be included in
|
||||||
all copies or substantial portions of the Software.
|
all copies or substantial portions of the Software.
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
THE SOFTWARE.
|
THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* HIT_START
|
/* HIT_START
|
||||||
* BUILD: %t %s ../../src/test_common.cpp
|
* BUILD: %t %s ../../src/test_common.cpp
|
||||||
* TEST: %t
|
* TEST: %t
|
||||||
* HIT_END
|
* HIT_END
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "test_common.h"
|
#include "test_common.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
|
||||||
static unsigned int sizeList[] = {
|
static unsigned int sizeList[] = {
|
||||||
256, 512, 1024, 2048, 4096, 8192,
|
256, 512, 1024, 2048, 4096, 8192,
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned int eleNumList[] = {
|
static unsigned int eleNumList[] = {
|
||||||
0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
|
0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
|
||||||
0x200000, 0x400000, 0x800000, 0x1000000
|
0x200000, 0x400000, 0x800000, 0x1000000
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct _dataType {
|
typedef struct _dataType {
|
||||||
char memsetval = 0x42;
|
char memsetval = 0x42;
|
||||||
char memsetD8val = 0xDE;
|
char memsetD8val = 0xDE;
|
||||||
int16_t memsetD16val = 0xDEAD;
|
int16_t memsetD16val = 0xDEAD;
|
||||||
int memsetD32val = 0xDEADBEEF;
|
int memsetD32val = 0xDEADBEEF;
|
||||||
}dataType;
|
}dataType;
|
||||||
|
|
||||||
#define NUM_ITER 1000
|
#define NUM_ITER 1000
|
||||||
|
|
||||||
enum MemsetType {
|
enum MemsetType {
|
||||||
hipMemsetTypeDefault,
|
hipMemsetTypeDefault,
|
||||||
hipMemsetTypeD8,
|
hipMemsetTypeD8,
|
||||||
hipMemsetTypeD16,
|
hipMemsetTypeD16,
|
||||||
hipMemsetTypeD32,
|
hipMemsetTypeD32,
|
||||||
hipMemsetTypeMax
|
hipMemsetTypeMax
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
class hipPerfMemset {
|
class hipPerfMemset {
|
||||||
private:
|
private:
|
||||||
uint64_t bufSize_;
|
uint64_t bufSize_;
|
||||||
unsigned int num_elements_;
|
unsigned int num_elements_;
|
||||||
unsigned int testNumEle_;
|
unsigned int testNumEle_;
|
||||||
unsigned int _numSubTests = 0;
|
unsigned int _numSubTests = 0;
|
||||||
unsigned int _numSubTests2D = 0;
|
unsigned int _numSubTests2D = 0;
|
||||||
unsigned int _numSubTests3D = 0;
|
unsigned int _numSubTests3D = 0;
|
||||||
unsigned int num_sizes_ =0;
|
unsigned int num_sizes_ =0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
hipPerfMemset() {
|
hipPerfMemset() {
|
||||||
num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
|
num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
|
||||||
_numSubTests = num_elements_ * hipMemsetTypeMax;
|
_numSubTests = num_elements_ * hipMemsetTypeMax;
|
||||||
|
|
||||||
num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
|
num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
|
||||||
_numSubTests2D = num_sizes_;
|
_numSubTests2D = num_sizes_;
|
||||||
_numSubTests3D = _numSubTests2D;
|
_numSubTests3D = _numSubTests2D;
|
||||||
};
|
};
|
||||||
|
|
||||||
~hipPerfMemset() {};
|
~hipPerfMemset() {};
|
||||||
|
|
||||||
void open(int deviceID);
|
void open(int deviceID);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
|
||||||
|
|
||||||
uint getNumTests() {
|
uint getNumTests() {
|
||||||
return _numSubTests;
|
return _numSubTests;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint getNumTests2D() {
|
uint getNumTests2D() {
|
||||||
return _numSubTests2D;
|
return _numSubTests2D;
|
||||||
}
|
}
|
||||||
uint getNumTests3D() {
|
uint getNumTests3D() {
|
||||||
return _numSubTests3D;
|
return _numSubTests3D;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
void hipPerfMemset::open(int deviceId) {
|
void hipPerfMemset::open(int deviceId) {
|
||||||
int nGpu = 0;
|
int nGpu = 0;
|
||||||
HIPCHECK(hipGetDeviceCount(&nGpu));
|
HIPCHECK(hipGetDeviceCount(&nGpu));
|
||||||
if (nGpu < 1) {
|
if (nGpu < 1) {
|
||||||
failed("No GPU!");
|
failed("No GPU!");
|
||||||
}
|
}
|
||||||
|
|
||||||
HIPCHECK(hipSetDevice(deviceId));
|
HIPCHECK(hipSetDevice(deviceId));
|
||||||
hipDeviceProp_t props = {0};
|
hipDeviceProp_t props = {0};
|
||||||
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
HIPCHECK(hipGetDeviceProperties(&props, deviceId));
|
||||||
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
|
||||||
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
<< " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||||
|
|
||||||
T * A_h;
|
T * A_h;
|
||||||
T * A_d;
|
T * A_d;
|
||||||
|
|
||||||
testNumEle_ = eleNumList[test % num_elements_];
|
testNumEle_ = eleNumList[test % num_elements_];
|
||||||
|
|
||||||
bufSize_ = testNumEle_ * sizeof(uint32_t);
|
bufSize_ = testNumEle_ * sizeof(uint32_t);
|
||||||
|
|
||||||
HIPCHECK(hipMalloc(&A_d, bufSize_));
|
HIPCHECK(hipMalloc(&A_d, bufSize_));
|
||||||
|
|
||||||
A_h = reinterpret_cast<T*> (malloc(bufSize_));
|
A_h = reinterpret_cast<T*> (malloc(bufSize_));
|
||||||
|
|
||||||
hipStream_t stream;
|
hipStream_t stream;
|
||||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||||
|
|
||||||
// Warm-up
|
// Warm-up
|
||||||
if (async) {
|
if (async) {
|
||||||
HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
|
HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
|
||||||
HIPCHECK(hipStreamSynchronize(stream));
|
HIPCHECK(hipStreamSynchronize(stream));
|
||||||
} else {
|
} else {
|
||||||
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
||||||
HIPCHECK(hipDeviceSynchronize());
|
HIPCHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto start = chrono::high_resolution_clock::now();
|
auto start = chrono::high_resolution_clock::now();
|
||||||
for (uint i = 0; i < NUM_ITER; i++) {
|
for (uint i = 0; i < NUM_ITER; i++) {
|
||||||
if (type == hipMemsetTypeDefault && !async) {
|
if (type == hipMemsetTypeDefault && !async) {
|
||||||
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeDefault && async) {
|
else if (type == hipMemsetTypeDefault && async) {
|
||||||
HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
|
HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeD8 && !async){
|
else if (type == hipMemsetTypeD8 && !async){
|
||||||
HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
|
HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeD8 && async) {
|
else if (type == hipMemsetTypeD8 && async) {
|
||||||
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
|
HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeD16 && !async) {
|
else if (type == hipMemsetTypeD16 && !async) {
|
||||||
HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeD16 && async) {
|
else if (type == hipMemsetTypeD16 && async) {
|
||||||
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeD32 && !async) {
|
else if (type == hipMemsetTypeD32 && !async) {
|
||||||
HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeD32 && async) {
|
else if (type == hipMemsetTypeD32 && async) {
|
||||||
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (async) {
|
if (async) {
|
||||||
HIPCHECK(hipStreamSynchronize(stream));
|
HIPCHECK(hipStreamSynchronize(stream));
|
||||||
} else {
|
} else {
|
||||||
HIPCHECK(hipDeviceSynchronize());
|
HIPCHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto end = chrono::high_resolution_clock::now();
|
auto end = chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
|
HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
|
||||||
|
|
||||||
for (int i = 0; i < bufSize_ / sizeof(T); i++) {
|
for (int i = 0; i < bufSize_ / sizeof(T); i++) {
|
||||||
if (A_h[i] != memsetval) {
|
if (A_h[i] != memsetval) {
|
||||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HIPCHECK(hipFree(A_d));
|
HIPCHECK(hipFree(A_d));
|
||||||
free(A_h);
|
free(A_h);
|
||||||
|
|
||||||
auto diff = std::chrono::duration<double>(end - start);
|
auto diff = std::chrono::duration<double>(end - start);
|
||||||
auto sec = diff.count();
|
auto sec = diff.count();
|
||||||
|
|
||||||
auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
|
auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
|
||||||
|
|
||||||
cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
|
cout << "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
|
||||||
<< " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
|
<< " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||||
|
|
||||||
bufSize_ = sizeList[test % num_sizes_];
|
bufSize_ = sizeList[test % num_sizes_];
|
||||||
|
|
||||||
size_t numH = bufSize_;
|
size_t numH = bufSize_;
|
||||||
size_t numW = bufSize_;
|
size_t numW = bufSize_;
|
||||||
size_t pitch_A;
|
size_t pitch_A;
|
||||||
size_t width = numW * sizeof(char);
|
size_t width = numW * sizeof(char);
|
||||||
size_t sizeElements = width * numH;
|
size_t sizeElements = width * numH;
|
||||||
size_t elements = numW* numH;
|
size_t elements = numW* numH;
|
||||||
|
|
||||||
T * A_h;
|
T * A_h;
|
||||||
T * A_d;
|
T * A_d;
|
||||||
|
|
||||||
HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
|
HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
|
||||||
numH));
|
numH));
|
||||||
A_h = reinterpret_cast<char*>(malloc(sizeElements));
|
A_h = reinterpret_cast<char*>(malloc(sizeElements));
|
||||||
|
|
||||||
for (size_t i=0; i < elements; i++) {
|
for (size_t i=0; i < elements; i++) {
|
||||||
A_h[i] = 1;
|
A_h[i] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
hipStream_t stream;
|
hipStream_t stream;
|
||||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||||
|
|
||||||
// Warm-up
|
// Warm-up
|
||||||
if (async) {
|
if (async) {
|
||||||
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
||||||
HIPCHECK(hipStreamSynchronize(stream));
|
HIPCHECK(hipStreamSynchronize(stream));
|
||||||
} else {
|
} else {
|
||||||
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
||||||
HIPCHECK(hipDeviceSynchronize());
|
HIPCHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto start = chrono::steady_clock::now();
|
auto start = chrono::steady_clock::now();
|
||||||
|
|
||||||
for (uint i = 0; i < NUM_ITER; i++) {
|
for (uint i = 0; i < NUM_ITER; i++) {
|
||||||
if (type == hipMemsetTypeDefault && !async) {
|
if (type == hipMemsetTypeDefault && !async) {
|
||||||
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeDefault && async) {
|
else if (type == hipMemsetTypeDefault && async) {
|
||||||
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (async) {
|
if (async) {
|
||||||
HIPCHECK(hipStreamSynchronize(stream));
|
HIPCHECK(hipStreamSynchronize(stream));
|
||||||
} else {
|
} else {
|
||||||
HIPCHECK(hipDeviceSynchronize());
|
HIPCHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto end = chrono::steady_clock::now();
|
auto end = chrono::steady_clock::now();
|
||||||
|
|
||||||
HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
|
HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
|
||||||
hipMemcpyDeviceToHost));
|
hipMemcpyDeviceToHost));
|
||||||
|
|
||||||
for (int i=0; i < elements; i++) {
|
for (int i=0; i < elements; i++) {
|
||||||
if (A_h[i] != memsetval) {
|
if (A_h[i] != memsetval) {
|
||||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
chrono::duration<double> diff = end - start;
|
chrono::duration<double> diff = end - start;
|
||||||
|
|
||||||
auto sec = diff.count();
|
auto sec = diff.count();
|
||||||
|
|
||||||
auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
|
auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
|
||||||
|
|
||||||
cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] "
|
cout << " hipPerf2DMemset" << (async ? "Async" : " ") << "[" << test << "] "
|
||||||
<< " " << "(GB/s) for " << setw(5) << bufSize_
|
<< " " << "(GB/s) for " << setw(5) << bufSize_
|
||||||
<< " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl;
|
<< " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf << endl;
|
||||||
|
|
||||||
HIPCHECK(hipStreamDestroy(stream));
|
HIPCHECK(hipStreamDestroy(stream));
|
||||||
HIPCHECK(hipFree(A_d));
|
HIPCHECK(hipFree(A_d));
|
||||||
free(A_h);
|
free(A_h);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
|
||||||
|
|
||||||
bufSize_ = sizeList[test % num_sizes_];
|
bufSize_ = sizeList[test % num_sizes_];
|
||||||
|
|
||||||
size_t numH = bufSize_;
|
size_t numH = bufSize_;
|
||||||
size_t numW = bufSize_;
|
size_t numW = bufSize_;
|
||||||
size_t depth = 10;
|
size_t depth = 10;
|
||||||
size_t width = numW * sizeof(char);
|
size_t width = numW * sizeof(char);
|
||||||
size_t sizeElements = width * numH * depth;
|
size_t sizeElements = width * numH * depth;
|
||||||
size_t elements = numW* numH* depth;
|
size_t elements = numW* numH* depth;
|
||||||
|
|
||||||
hipStream_t stream;
|
hipStream_t stream;
|
||||||
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
|
||||||
|
|
||||||
T *A_h;
|
T *A_h;
|
||||||
|
|
||||||
hipExtent extent = make_hipExtent(width, numH, depth);
|
hipExtent extent = make_hipExtent(width, numH, depth);
|
||||||
hipPitchedPtr devPitchedPtr;
|
hipPitchedPtr devPitchedPtr;
|
||||||
|
|
||||||
HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
|
HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
|
||||||
A_h = (char*)malloc(sizeElements);
|
A_h = (char*)malloc(sizeElements);
|
||||||
HIPASSERT(A_h != NULL);
|
HIPASSERT(A_h != NULL);
|
||||||
|
|
||||||
for (size_t i=0; i<elements; i++) {
|
for (size_t i=0; i<elements; i++) {
|
||||||
A_h[i] = 1;
|
A_h[i] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Warm-up
|
// Warm-up
|
||||||
if (async) {
|
if (async) {
|
||||||
HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
|
HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
|
||||||
HIPCHECK(hipStreamSynchronize(stream));
|
HIPCHECK(hipStreamSynchronize(stream));
|
||||||
} else {
|
} else {
|
||||||
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
||||||
HIPCHECK(hipDeviceSynchronize());
|
HIPCHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
auto start = chrono::steady_clock::now();
|
auto start = chrono::steady_clock::now();
|
||||||
|
|
||||||
for (uint i = 0; i < NUM_ITER; i++) {
|
for (uint i = 0; i < NUM_ITER; i++) {
|
||||||
if (type == hipMemsetTypeDefault && !async) {
|
if (type == hipMemsetTypeDefault && !async) {
|
||||||
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
|
||||||
}
|
}
|
||||||
else if (type == hipMemsetTypeDefault && async) {
|
else if (type == hipMemsetTypeDefault && async) {
|
||||||
HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
|
HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (async) {
|
if (async) {
|
||||||
HIPCHECK(hipStreamSynchronize(stream));
|
HIPCHECK(hipStreamSynchronize(stream));
|
||||||
} else {
|
} else {
|
||||||
HIPCHECK(hipDeviceSynchronize());
|
HIPCHECK(hipDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto end = chrono::steady_clock::now();
|
auto end = chrono::steady_clock::now();
|
||||||
|
|
||||||
hipMemcpy3DParms myparms = {0};
|
hipMemcpy3DParms myparms = {0};
|
||||||
myparms.srcPos = make_hipPos(0,0,0);
|
myparms.srcPos = make_hipPos(0,0,0);
|
||||||
myparms.dstPos = make_hipPos(0,0,0);
|
myparms.dstPos = make_hipPos(0,0,0);
|
||||||
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
|
myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
|
||||||
myparms.srcPtr = devPitchedPtr;
|
myparms.srcPtr = devPitchedPtr;
|
||||||
myparms.extent = extent;
|
myparms.extent = extent;
|
||||||
|
|
||||||
myparms.kind = hipMemcpyDeviceToHost;
|
myparms.kind = hipMemcpyDeviceToHost;
|
||||||
|
|
||||||
HIPCHECK(hipMemcpy3D(&myparms));
|
HIPCHECK(hipMemcpy3D(&myparms));
|
||||||
|
|
||||||
for (int i=0; i<elements; i++) {
|
for (int i=0; i<elements; i++) {
|
||||||
if (A_h[i] != memsetval) {
|
if (A_h[i] != memsetval) {
|
||||||
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
|
||||||
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
<< ", memsetval: " << static_cast<int> (memsetval) << endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
chrono::duration<double> diff = end - start;
|
chrono::duration<double> diff = end - start;
|
||||||
|
|
||||||
auto sec = diff.count();
|
auto sec = diff.count();
|
||||||
|
|
||||||
auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
|
auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
|
||||||
|
|
||||||
cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " "
|
cout << " hipPerf3DMemset" << (async ? "Async" : " ") << "[" << test << "] " << " "
|
||||||
<< "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
|
<< "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
|
||||||
<< bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl;
|
<< bufSize_ << " x " << depth << " bytes : " << setw(7) << perf << endl;
|
||||||
HIPCHECK(hipFree(devPitchedPtr.ptr));
|
HIPCHECK(hipFree(devPitchedPtr.ptr));
|
||||||
free(A_h);
|
free(A_h);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
hipPerfMemset hipPerfMemset;
|
hipPerfMemset hipPerfMemset;
|
||||||
|
|
||||||
dataType pattern;
|
dataType pattern;
|
||||||
int deviceId = 0;
|
int deviceId = 0;
|
||||||
hipPerfMemset.open(deviceId);
|
hipPerfMemset.open(deviceId);
|
||||||
MemsetType type;
|
MemsetType type;
|
||||||
|
|
||||||
int numTests = hipPerfMemset.getNumTests();
|
int numTests = hipPerfMemset.getNumTests();
|
||||||
int numTests2D = hipPerfMemset.getNumTests2D();
|
int numTests2D = hipPerfMemset.getNumTests2D();
|
||||||
int numTests3D = hipPerfMemset.getNumTests3D();
|
int numTests3D = hipPerfMemset.getNumTests3D();
|
||||||
|
|
||||||
|
|
||||||
cout << "--------------------- 1D buffer -------------------" << endl;
|
cout << "--------------------- 1D buffer -------------------" << endl;
|
||||||
bool async= false;
|
bool async= false;
|
||||||
for (uint i = 0; i < 2 ; i++) {
|
for (uint i = 0; i < 2 ; i++) {
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
|
||||||
for (auto testCase = 0; testCase < numTests; testCase++) {
|
for (auto testCase = 0; testCase < numTests; testCase++) {
|
||||||
if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
|
if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||||
cout << "API: hipMemsetD8" << (async ? "Async " : " ");
|
cout << "API: hipMemsetD8" << (async ? "Async " : " ");
|
||||||
hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
|
hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||||
cout << "API: hipMemsetD16" << (async ? "Async" : " ");
|
cout << "API: hipMemsetD16" << (async ? "Async" : " ");
|
||||||
hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
|
hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
|
||||||
cout << "API: hipMemsetD32" << (async ? "Async" : " ");
|
cout << "API: hipMemsetD32" << (async ? "Async" : " ");
|
||||||
hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
|
hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
cout << "API: hipMemset" << (async ? "Async " : " ");
|
cout << "API: hipMemset" << (async ? "Async " : " ");
|
||||||
hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
|
hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
async = true;
|
async = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << endl;
|
cout << endl;
|
||||||
cout << "------------------ 2D buffer arrays ---------------" << endl;
|
cout << "------------------ 2D buffer arrays ---------------" << endl;
|
||||||
|
|
||||||
async = false;
|
async = false;
|
||||||
for (uint i = 0; i < 2; i++) {
|
for (uint i = 0; i < 2; i++) {
|
||||||
cout << endl;
|
cout << endl;
|
||||||
for (uint test = 0; test < numTests2D; test++) {
|
for (uint test = 0; test < numTests2D; test++) {
|
||||||
hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
||||||
}
|
}
|
||||||
async = true;
|
async = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << endl;
|
cout << endl;
|
||||||
cout << "------------------ 3D buffer arrays ---------------" << endl;
|
cout << "------------------ 3D buffer arrays ---------------" << endl;
|
||||||
|
|
||||||
async = false;
|
async = false;
|
||||||
for (uint i = 0; i < 2; i++) {
|
for (uint i = 0; i < 2; i++) {
|
||||||
cout << endl;
|
cout << endl;
|
||||||
for (uint test =0; test < numTests3D; test++) {
|
for (uint test =0; test < numTests3D; test++) {
|
||||||
hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
|
||||||
}
|
}
|
||||||
async = true;
|
async = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
passed();
|
passed();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,4 +41,4 @@ cmake ../samples
|
|||||||
|
|
||||||
make package_samples
|
make package_samples
|
||||||
|
|
||||||
## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.
|
## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.
|
||||||
|
|||||||
مرجع در شماره جدید
Block a user