EXSWHTEC-281 - Implement Unit Tests for assert functions #210

Change-Id: I6c05915c957d9b67951b3e97cc35cb1ca72a945f


[ROCm/hip-tests commit: 044a59496c]
This commit is contained in:
Mirza Halilcevic
2023-12-28 17:41:54 +00:00
zatwierdzone przez Rakesh Roy
rodzic 1cd4e6a55b
commit dcade635c6
31 zmienionych plików z 674 dodań i 463 usunięć
@@ -309,6 +309,8 @@
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_Assert_Positive_Basic_KernelFail",
#endif
#if defined VEGA20
"=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===",
@@ -440,6 +440,11 @@
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
"Unit_Thread_Block_Tile_Getters_Positive_Basic",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_StaticAssert_Positive_Basic_RTC",
"Unit_Assert_Positive_Basic_KernelFail",
"Unit_StaticAssert_Positive_Basic",
"Unit_StaticAssert_Negative_Basic",
#endif
"End of json"
]
@@ -89,6 +89,9 @@
"Performance_hipMemsetD32",
"Performance_hipMemsetD32Async",
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
"Unit_StaticAssert_Positive_Basic",
"Unit_StaticAssert_Negative_Basic"
]
}
@@ -129,6 +129,19 @@ THE SOFTWARE.
} \
}
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
{ \
auto localError = errorExpr; \
INFO("Matching Errors: " \
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
<< "\n Expected Code: " << expectedError << '\n' \
<< " Actual Error: " << hiprtcGetErrorString(localError) \
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
REQUIRE(localError == expectedError); \
}
#define HIPASSERT(condition) \
if (!(condition)) { \
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
@@ -59,3 +59,4 @@ add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246
endif()
add_subdirectory(synchronization)
add_subdirectory(launchBounds)
add_subdirectory(assertion)
@@ -0,0 +1,48 @@
# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
if(HIP_PLATFORM MATCHES "nvidia")
set(TEST_SRC
assert.cc
)
hip_add_exe_to_target(NAME AssertionTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS nvrtc)
elseif(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
static_assert.cc
assert.cc
)
hip_add_exe_to_target(NAME AssertionTest
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS hiprtc)
endif()
add_test(NAME Unit_StaticAssert_Positive_Basic
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
static_assert_kernels_positive.cc 2)
add_test(NAME Unit_StaticAssert_Negative_Basic
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
static_assert_kernels_negative.cc 2)
@@ -0,0 +1,118 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <csetjmp>
#include <csignal>
/**
* @addtogroup assert assert
* @{
* @ingroup DeviceLanguageTest
* `void assert(int expression)` -
* Stops the kernel execution if expression is equal to zero.
*/
jmp_buf env_ignore_abort;
volatile int abort_raised_flag = 0;
void on_sigabrt(int signum) {
signal(signum, SIG_DFL);
abort_raised_flag = 1;
longjmp(env_ignore_abort, 1);
}
void try_and_catch_abort(void (*func)()) {
if (!setjmp(env_ignore_abort)) {
signal(SIGABRT, &on_sigabrt);
(*func)();
signal(SIGABRT, SIG_DFL);
}
}
__global__ void AssertPassKernel() {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
// expected always to be true
assert(tid >= 0);
}
__global__ void AssertFailKernel() {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
// expected to fail for the even thread indices
assert(tid % 2 == 1);
}
template <bool should_abort> void LaunchAssertKernel() {
const int num_blocks = 2;
const int num_threads = 16;
if constexpr (should_abort) {
AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>();
#if HT_AMD
HIP_CHECK(hipDeviceSynchronize());
#else
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
#endif
} else {
AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>();
HIP_CHECK(hipDeviceSynchronize());
}
}
/**
* Test Description
* ------------------------
* - Launches kernels with asserts that have an expression equal to 1.
* - Expects that SIGABRT is not raised and kernels have executed successfully.
* Test source
* ------------------------
* - unit/assertion/assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
try_and_catch_abort(&LaunchAssertKernel<false>);
REQUIRE(abort_raised_flag == 0);
}
/**
* Test Description
* ------------------------
* - Launches kernels with asserts that have an expression equal to 0.
* - Expects that SIGABRT is raised and kernels have been stopped on AMD.
* - The HIP runtime also aborts the host code, so this test case uses signal handlers
* to avoid host code abortion.
* - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA.
* - The host code is not aborted.
* Test source
* ------------------------
* - unit/assertion/assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") {
try_and_catch_abort(&LaunchAssertKernel<true>);
#if HT_AMD
REQUIRE(abort_raised_flag == 1);
#else
REQUIRE(abort_raised_flag == 0);
#endif
}
@@ -0,0 +1,88 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include "static_assert_kernels_rtc.hh"
/**
* @addtogroup static_assert static_assert
* @{
* @ingroup DeviceLanguageTest
* `void static_assert(constexpr expression, const char* message)` -
* Stops the compilation if expression is equal to zero, and displays the specified message.
*/
void StaticAssertWrapper(const char* program_source) {
hiprtcProgram program{};
HIPRTC_CHECK(
hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr));
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
// Get the compile log and count compiler error messages
size_t log_size{};
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
std::string log(log_size, ' ');
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
int error_count{0};
int expected_error_count{2};
std::string error_message{"error:"};
size_t n_pos = log.find(error_message, 0);
while (n_pos != std::string::npos) {
++error_count;
n_pos = log.find(error_message, n_pos + 1);
}
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
REQUIRE(error_count == expected_error_count);
}
/**
* Test Description
* ------------------------
* - Compiles kernels with static_assert calls:
* -# Expected that static_assert passes and compilation is successful.
* -# Expected that static_assert fails and compilation has errors.
* - Uses RTC to perform compilation.
* Test source
* ------------------------
* - unit/assertion/static_assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); }
/**
* Test Description
* ------------------------
* - Passes invalidly formed expressions to static_assert calls.
* - Uses expressions that are not constexpr and values that are not known during compilation.
* - Uses RTC to perform compilation.
* Test source
* ------------------------
* - unit/assertion/static_assert.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); }
@@ -0,0 +1,30 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
__global__ void StaticAssertErrorKernel1() {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
}
__global__ void StaticAssertErrorKernel2() {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
}
@@ -0,0 +1,32 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
__global__ void StaticAssertPassKernel1() {
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
}
__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); }
__global__ void StaticAssertFailKernel1() {
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
}
__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); }
@@ -0,0 +1,56 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
/*
Positive and negative kernels used for the static_assert Test Cases that are using RTC.
*/
static constexpr auto kStaticAssert_Positive{
R"(
__global__ void StaticAssertPassKernel1() {
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
}
__global__ void StaticAssertPassKernel2() {
static_assert(10 > 5, "[StaticAssertPassKernel2]");
}
__global__ void StaticAssertFailKernel1() {
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
}
__global__ void StaticAssertFailKernel2() {
static_assert(10 < 5, "[StaticAssertFailKernel2]");
}
)"};
static constexpr auto kStaticAssert_Negative{
R"(
__global__ void StaticAssertErrorKernel1() {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
}
__global__ void StaticAssertErrorKernel2() {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
}
)"};
@@ -22,7 +22,6 @@ THE SOFTWARE.
#include <functional>
#include <vector>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
@@ -75,7 +74,7 @@ void GraphMemcpyFromSymbolShell(void* symbol, size_t offset, const std::vector<T
* ------------------------
* - Verify that data is correctly copied from a symbol. A graph is constructed to which a
* MemcpyFromSymbol node is added. After graph execution, values in destination memory are compared
* against values known to be in symbol memory.
* against values known to be in symbol memory.
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
* Verification is performed for destination memory allocated on host and device.
@@ -106,7 +105,7 @@ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Positive_Basic") {
/**
* Test Description
* ------------------------
* ------------------------
* - Verify API behavior with invalid arguments:
* -# pGraphNodes is nullptr
* -# graph is nullptr
@@ -122,12 +121,12 @@ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Positive_Basic") {
* -# kind is illogical (hipMemcpyHostToDevice)
* -# kind is an invalid enum value
* Test source
* ------------------------
* ------------------------
* - unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc
* Test requirements
* ------------------------
* ------------------------
* - HIP_VERSION >= 5.2
*/
*/
TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Negative_Parameters") {
using namespace std::placeholders;
hipGraph_t graph = nullptr;
@@ -23,7 +23,6 @@ THE SOFTWARE.
#include <functional>
#include <vector>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
@@ -78,7 +77,7 @@ void GraphMemcpyToSymbolShell(const void* symbol, size_t offset, const std::vect
* - Verify that data is correctly copied to a symbol. A graph is constructed to which a
* MemcpyToSymbol node is added. After graph execution, a MemcpyFromSymbol is performed and
* the copied values are compared against values known to have been copied to symbol memory
* previously.
* previously.
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
* Verification is performed for source memory allocated on host and device.
@@ -22,7 +22,6 @@ THE SOFTWARE.
#include <functional>
#include <vector>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <resource_guards.hh>
#include <utils.hh>
@@ -129,7 +128,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_Negative_Parameters") {
* Allocate a 2D array using hipMallocPitch. Initialize the allocated memory
* using hipGraphAddMemsetNode. Copy the values in device memory to host using
* hipGraphAddMemcpyNode. Verify the results.
*/
*/
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
CHECK_IMAGE_SUPPORT
@@ -147,22 +146,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
}
}
// 2D Memory allocation hipMallocPitch
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
numH));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width, numH));
// Create Graph
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t memsetNode, memcpyNode;
// Add MemSet Node
hipMemsetParams memsetParams{};
memset(&memsetParams, 0, sizeof(memsetParams));
memsetParams.dst = reinterpret_cast<void *>(A_d);
memsetParams.dst = reinterpret_cast<void*>(A_d);
memsetParams.value = memSetVal;
memsetParams.pitch = pitch_A;
memsetParams.elementSize = sizeof(char);
memsetParams.width = numW;
memsetParams.height = numH;
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
&memsetParams));
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
nodeDependencies.push_back(memsetNode);
// Add MemCpy Node
hipMemcpy3DParms myparms{};
@@ -173,21 +170,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
myparms.extent = make_hipExtent(width, numH, 1);
myparms.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &myparms));
nodeDependencies.size(), &myparms));
nodeDependencies.clear();
// Create executable graph
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verfication
for (size_t i = 0; i < numW; i++) {
for (size_t j = 0; j < numH; j++) {
REQUIRE(*(A_h + i*numH + j) == memSetVal);
REQUIRE(*(A_h + i * numH + j) == memSetVal);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
@@ -200,12 +196,12 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
* Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using
* hipGraphAddMemsetNode. Copy the values in device memory to host using
* hipGraphAddMemcpyNode. Verify the results.
*/
*/
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
CHECK_IMAGE_SUPPORT
size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A;
char *A_d;
char* A_d;
// Initialize the host memory
std::vector<char> A_h(numW, ' ');
@@ -213,22 +209,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
hipGraph_t graph;
std::vector<hipGraphNode_t> nodeDependencies;
// 1D Memory allocation hipMallocPitch
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
1));
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width, 1));
// Create Graph
HIP_CHECK(hipGraphCreate(&graph, 0));
hipGraphNode_t memsetNode, memcpyNode;
// Add MemSet Node
hipMemsetParams memsetParams{};
memset(&memsetParams, 0, sizeof(memsetParams));
memsetParams.dst = reinterpret_cast<void *>(A_d);
memsetParams.dst = reinterpret_cast<void*>(A_d);
memsetParams.value = memSetVal;
memsetParams.pitch = pitch_A;
memsetParams.elementSize = sizeof(char);
memsetParams.width = numW;
memsetParams.height = 1;
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
&memsetParams));
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
nodeDependencies.push_back(memsetNode);
// Add MemCpy Node
hipMemcpy3DParms myparms{};
@@ -239,15 +233,14 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
myparms.extent = make_hipExtent(width, 1, 1);
myparms.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &myparms));
nodeDependencies.size(), &myparms));
nodeDependencies.clear();
// Create executable graph
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
@@ -264,7 +257,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
* Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using
* hipGraphAddMemsetNode. Copy the values in device memory to host using
* hipGraphAddMemcpyNode. Verify the results.
*/
*/
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
CHECK_IMAGE_SUPPORT
@@ -300,8 +293,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
memsetParams.elementSize = sizeof(char);
memsetParams.width = numW;
memsetParams.height = numH;
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
&memsetParams));
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
nodeDependencies.push_back(memsetNode);
// MemCpy params
@@ -315,22 +307,21 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
// Add MemCpy Node
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &myparms));
nodeDependencies.size(), &myparms));
nodeDependencies.clear();
// Create executable graph
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verfication
for (size_t i = 0; i < numW; i++) {
for (size_t j = 0; j < numH; j++) {
REQUIRE(*(A_h + i*numH + j) == memSetVal);
REQUIRE(*(A_h + i * numH + j) == memSetVal);
}
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
@@ -343,7 +334,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
* Allocate a 1D array using hipMalloc3D. Initialize the allocated
* memory using hipGraphAddMemsetNode. Copy the values in device
* memory to host using hipGraphAddMemcpyNode. Verify the results.
*/
*/
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
CHECK_IMAGE_SUPPORT
@@ -375,8 +366,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
memsetParams.elementSize = sizeof(char);
memsetParams.width = numW;
memsetParams.height = 1;
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
&memsetParams));
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
nodeDependencies.push_back(memsetNode);
// MemCpy params
@@ -390,21 +380,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
// Add MemCpy Node
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &myparms));
nodeDependencies.size(), &myparms));
nodeDependencies.clear();
// Create executable graph
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verfication
for (size_t i = 0; i < numW; i++) {
REQUIRE(A_h[i] == memSetVal);
REQUIRE(A_h[i] == memSetVal);
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
@@ -415,9 +404,9 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
* Allocate a 1D array using hipMalloc. Initialize the allocated memory using
* hipGraphAddMemsetNode. Copy the values in device memory to host using
* hipGraphAddMemcpyNode. Verify the results.
*/
*/
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
char *A_d;
char* A_d;
size_t NumW = SIZE;
size_t Nbytes1D = SIZE * sizeof(char);
@@ -436,14 +425,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
// Add Memset node
hipMemsetParams memsetParams{};
memset(&memsetParams, 0, sizeof(memsetParams));
memsetParams.dst = reinterpret_cast<void *>(A_d);
memsetParams.dst = reinterpret_cast<void*>(A_d);
memsetParams.value = memSetVal;
memsetParams.pitch = Nbytes1D;
memsetParams.elementSize = sizeof(char);
memsetParams.width = NumW;
memsetParams.height = 1;
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
&memsetParams));
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
nodeDependencies.push_back(memsetNode);
// Add MemCpy Node
hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0};
@@ -456,20 +444,19 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
myparms.extent = make_hipExtent(Nbytes1D, 1, 1);
myparms.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &myparms));
nodeDependencies.size(), &myparms));
nodeDependencies.clear();
// Create executable graph
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verfication
for (size_t i = 0; i < NumW; i++) {
REQUIRE(A_h[i] == memSetVal);
REQUIRE(A_h[i] == memSetVal);
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
@@ -479,16 +466,15 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
int managed = 0;
HIP_CHECK(hipDeviceGetAttribute(&managed,
hipDeviceAttributeManagedMemory, 0));
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0));
INFO("hipDeviceAttributeManagedMemory: " << managed);
if (managed != 1) {
WARN(
"GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute"
"so defaulting to system memory.");
"GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute"
"so defaulting to system memory.");
}
size_t Nbytes1D = SIZE * sizeof(char);
char *A_d;
char* A_d;
// Initialize the host memory
std::vector<char> A_h(SIZE, ' ');
// Device Memory
@@ -502,14 +488,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
// Add Memset node
hipMemsetParams memsetParams{};
memset(&memsetParams, 0, sizeof(memsetParams));
memsetParams.dst = reinterpret_cast<void *>(A_d);
memsetParams.dst = reinterpret_cast<void*>(A_d);
memsetParams.value = memSetVal;
memsetParams.pitch = Nbytes1D;
memsetParams.elementSize = sizeof(char);
memsetParams.width = SIZE;
memsetParams.height = 1;
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
&memsetParams));
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
nodeDependencies.push_back(memsetNode);
// Add MemCpy Node
@@ -524,21 +509,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
myparms.extent = make_hipExtent(Nbytes1D, 1, 1);
myparms.kind = hipMemcpyDeviceToHost;
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
nodeDependencies.size(), &myparms));
nodeDependencies.size(), &myparms));
nodeDependencies.clear();
// Create executable graph
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
HIP_CHECK(hipStreamCreate(&streamForGraph));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
// Verfication
for (size_t i = 0; i < SIZE; i++) {
REQUIRE(A_h[i] == memSetVal);
REQUIRE(A_h[i] == memSetVal);
}
HIP_CHECK(hipGraphExecDestroy(graphExec));
@@ -22,7 +22,6 @@ THE SOFTWARE.
#include <functional>
#include <vector>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
@@ -86,7 +85,7 @@ void GraphExecMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_s
* node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but
* incorrect parameters. After the graph is instantiated the parameters are updated to correct
* values and the graph executed. After graph execution, a MemcpyFromSymbol is performed and the
* copied values are compared against values known to have been copied to symbol memory previously.
* copied values are compared against values known to have been copied to symbol memory previously.
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
* Verification is performed for destination memory allocated on host and device.
@@ -21,7 +21,6 @@ THE SOFTWARE.
#include <functional>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include "graph_memset_node_test_common.hh"
@@ -46,7 +45,7 @@ THE SOFTWARE.
* which also constitutes a test for said API.
* The test is repeated for all valid element sizes(1,
* 2, 4), and several allocations of different width(height is always 1 because only 1D memset nodes
* can be updated), both on host and device
* can be updated), both on host and device
* Test source
* ------------------------
* - unit/graph/hipGraphExecMemsetNodeSetParams.cc
@@ -22,7 +22,6 @@ THE SOFTWARE.
#include <functional>
#include <vector>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
@@ -85,7 +84,7 @@ void GraphMemcpyFromSymbolSetParamsShell(const void* symbol, const void* alt_sym
* - Verify that data is correctly copied from a symbol after node parameters are set following
* node addition. A graph is constructed to which a MemcpyFromSymbol node is added with valid but
* incorrect parameters. The parameters are then updated to correct values and the graph executed.
* Values in destination memory are compared against values known to be in symbol memory.
* Values in destination memory are compared against values known to be in symbol memory.
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
* Verification is performed for destination memory allocated on host and device.
@@ -22,7 +22,6 @@ THE SOFTWARE.
#include <functional>
#include <vector>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
@@ -86,7 +85,7 @@ void GraphMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_symbo
* node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but
* incorrect parameters. The parameters are then updated to correct values and the graph executed.
* After graph execution, a MemcpyFromSymbol is performed and the copied values are compared against
* values known to have been copied to symbol memory previously.
* values known to have been copied to symbol memory previously.
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
* Verification is performed for destination memory allocated on host and device.
@@ -19,7 +19,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include <resource_guards.hh>
@@ -21,7 +21,6 @@ THE SOFTWARE.
#include <functional>
#include <hip_test_defgroups.hh>
#include <hip_test_common.hh>
#include "graph_memset_node_test_common.hh"
@@ -44,7 +43,7 @@ THE SOFTWARE.
* The parameters are also verified via hipGraphMemsetNodeGetParams, which also constitutes a test
* for said API.
* The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different
* height and width both on host and device
* height and width both on host and device
* Test source
* ------------------------
* - unit/graph/hipGraphMemsetNodeSetParams.cc
@@ -18,7 +18,6 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include "stream_capture_common.hh"
@@ -19,8 +19,7 @@ THE SOFTWARE.
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include "stream_capture_common.hh" // NOLINT
#include "stream_capture_common.hh" // NOLINT
#pragma clang diagnostic ignored "-Wunused-variable"
/**
@@ -56,8 +55,7 @@ static void hostNodeCallback(void* data) {
}
template <typename T, typename F>
void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode,
hipStream_t stream) {
void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) {
constexpr size_t N = 1000000;
size_t Nbytes = N * sizeof(T);
@@ -89,8 +87,7 @@ void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode,
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
HIP_CHECK(hipGraphLaunch(graphExec, stream));
HIP_CHECK(hipStreamSynchronize(stream));
ArrayFindIfNot(B_h.host_ptr(),
static_cast<float>(i) * static_cast<float>(i), N);
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i) * static_cast<float>(i), N);
}
HIP_CHECK(hipGraphExecDestroy(graphExec))
@@ -117,16 +114,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") {
StreamGuard stream_guard(stream_type);
hipStream_t stream = stream_guard.stream();
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
EventsGuard events_guard(3);
StreamsGuard streams_guard(2);
SECTION("Linear graph capture") {
captureStreamAndLaunchGraph<float>(
[](float* A_h, float* A_d, float* B_h, float* B_d, size_t N,
hipStream_t stream) {
[](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) {
return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream);
},
captureMode, stream);
@@ -134,10 +130,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") {
SECTION("Branched graph capture") {
captureStreamAndLaunchGraph<float>(
[&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h,
float* B_d, size_t N, hipStream_t stream) {
captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream,
streams_guard.stream_list(), events_guard.event_list());
[&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N,
hipStream_t stream) {
captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(),
events_guard.event_list());
},
captureMode, stream);
}
@@ -173,8 +169,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") {
hipErrorIllegalState);
}
SECTION("Creating hipStream with invalid mode") {
HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)),
hipErrorInvalidValue);
HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue);
}
#if HT_NVIDIA // EXSWHTEC-216
SECTION("Stream capture on uninitialized stream returns error code.") {
@@ -182,8 +177,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") {
StreamGuard sg(Streams::created);
return sg.stream();
};
HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(),
hipStreamCaptureModeGlobal),
HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), hipStreamCaptureModeGlobal),
hipErrorContextIsDestroyed);
}
#endif
@@ -207,8 +201,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") {
StreamGuard stream_guard(stream_type);
hipStream_t s = stream_guard.stream();
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
HIP_CHECK(hipStreamBeginCapture(s, captureMode));
@@ -218,8 +212,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") {
/* Local function for inter stream event synchronization
*/
static void interStrmEventSyncCapture(const hipStream_t& stream1,
const hipStream_t& stream2) {
static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) {
hipGraph_t graph1{nullptr}, graph2{nullptr};
hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr};
@@ -266,8 +259,7 @@ static void interStrmEventSyncCapture(const hipStream_t& stream1,
/* Local function for colligated stream capture
*/
static void colligatedStrmCapture(const hipStream_t& stream1,
const hipStream_t& stream2) {
static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) {
hipGraph_t graph1{nullptr}, graph2{nullptr};
hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr};
@@ -310,8 +302,7 @@ static void colligatedStrmCapture(const hipStream_t& stream1,
/* Local function for colligated stream capture functionality
*/
static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
const hipStream_t& stream2) {
static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) {
constexpr size_t N = 1000000;
size_t Nbytes = N * sizeof(int);
@@ -331,10 +322,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
// Capture 2 streams
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal));
captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(),
N, stream1);
captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(),
N, stream2);
captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1);
captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2);
captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1);
captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2);
HIP_CHECK(hipStreamEndCapture(stream1, &graph1));
@@ -370,9 +359,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
/* Stream Capture thread function
*/
static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d,
int* B_h, int* B_d, hipGraph_t* graph,
size_t N, hipStreamCaptureMode mode) {
static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d,
hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) {
// Capture stream
HIP_CHECK(hipStreamBeginCapture(stream, mode));
captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream);
@@ -404,10 +392,10 @@ static void multithreadedTest(hipStreamCaptureMode mode) {
LinearAllocGuard<int> D_d(LinearAllocs::hipMalloc, Nbytes);
// Launch 2 threads to capture the 2 streams into graphs
std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(),
B_h.host_ptr(), B_d.ptr(), &graph1, N, mode);
std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(),
D_h.host_ptr(), D_d.ptr(), &graph2, N, mode);
std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(),
B_d.ptr(), &graph1, N, mode);
std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(),
D_d.ptr(), &graph2, N, mode);
t1.join();
t2.join();
@@ -480,11 +468,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") {
TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") {
int minPriority = 0, maxPriority = 0;
HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority));
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault,
minPriority);
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority);
hipStream_t stream1 = stream_guard1.stream();
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault,
maxPriority);
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority);
hipStream_t stream2 = stream_guard2.stream();
interStrmEventSyncCapture(stream1, stream2);
}
@@ -533,11 +519,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") {
TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") {
int minPriority = 0, maxPriority = 0;
HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority));
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault,
minPriority);
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority);
hipStream_t stream1 = stream_guard1.stream();
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault,
maxPriority);
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority);
hipStream_t stream2 = stream_guard2.stream();
colligatedStrmCapture(stream1, stream2);
}
@@ -578,8 +562,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") {
* - HIP_VERSION >= 5.2
*/
TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") {
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
multithreadedTest(captureMode);
}
@@ -708,8 +692,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") {
HIP_CHECK(hipEventRecord(events[2], streams[2]));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault,
streams[0]));
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0]));
HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture
// Reset device memory
HIP_CHECK(hipMemset(devMem, 0, sizeof(int)));
@@ -751,8 +734,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") {
dummyKernel<<<1, 1, 0, streams[0]>>>();
// Since stream[1] is already in capture mode due to event wait
// hipStreamBeginCapture on stream[1] is expected to return error.
HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1],
hipStreamCaptureModeGlobal),
HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal),
hipErrorIllegalState);
}
@@ -785,8 +767,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") {
for (int i = 0; i < 3; i++) {
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem);
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault,
stream1));
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1));
HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i]));
}
// Instantiate and execute all graphs
@@ -825,22 +806,19 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") {
EventsGuard events_guard(1);
hipEvent_t e = events_guard[0];
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
const hipStreamCaptureMode captureMode = GENERATE(
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
SECTION("Synchronize stream during capture") {
HIP_CHECK_ERROR(hipStreamSynchronize(stream),
hipErrorStreamCaptureUnsupported);
HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported);
}
SECTION("Query stream during capture") {
HIP_CHECK_ERROR(hipStreamQuery(stream),
hipErrorStreamCaptureUnsupported);
HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported);
}
#if HT_NVIDIA
SECTION("Synchronize device during capture") {
HIP_CHECK_ERROR(hipDeviceSynchronize(),
hipErrorStreamCaptureUnsupported);
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported);
}
SECTION("Synchronize event during capture") {
HIP_CHECK(hipEventRecord(e, stream));
@@ -884,17 +862,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") {
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
SECTION("hipMalloc during capture") {
HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)),
hipErrorStreamCaptureUnsupported);
HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported);
}
SECTION("hipMemcpy during capture") {
HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int),
hipMemcpyHostToDevice),
HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice),
hipErrorStreamCaptureImplicit);
}
SECTION("hipMemset during capture") {
HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)),
hipErrorStreamCaptureImplicit);
HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit);
}
}
#endif
@@ -931,8 +906,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
HIP_CHECK(hipEventRecord(e, stream1));
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
dummyKernel<<<1, 1, 0, stream2>>>();
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph),
hipErrorStreamCaptureUnjoined);
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined);
}
SECTION("End strm capture when forked strm still has operations") {
EventsGuard events_guard(2);
@@ -946,8 +920,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
HIP_CHECK(hipEventRecord(e2, stream2));
HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0));
dummyKernel<<<1, 1, 0, stream2>>>();
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph),
hipErrorStreamCaptureUnjoined);
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined);
}
}
/**
@@ -970,19 +943,17 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
SUCCEED("skipping the testcases as numDevices < 2");
return;
}
hipStream_t* stream = reinterpret_cast<hipStream_t*>
(malloc(devcount * sizeof(hipStream_t)));
hipStream_t* stream = reinterpret_cast<hipStream_t*>(malloc(devcount * sizeof(hipStream_t)));
REQUIRE(stream != nullptr);
hipGraph_t* graph = reinterpret_cast<hipGraph_t*>
(malloc(devcount * sizeof(hipGraph_t)));
hipGraph_t* graph = reinterpret_cast<hipGraph_t*>(malloc(devcount * sizeof(hipGraph_t)));
REQUIRE(graph != nullptr);
int **devMem{nullptr}, **hostMem{nullptr};
hostMem = reinterpret_cast<int**>(malloc(sizeof(int*) * devcount));
REQUIRE(hostMem != nullptr);
devMem = reinterpret_cast<int**>(malloc(sizeof(int*) * devcount));
REQUIRE(devMem != nullptr);
hipGraphExec_t* graphExec = reinterpret_cast<hipGraphExec_t*>
(malloc(devcount * sizeof(hipGraphExec_t)));
hipGraphExec_t* graphExec =
reinterpret_cast<hipGraphExec_t*>(malloc(devcount * sizeof(hipGraphExec_t)));
// Capture stream in each device
for (int dev = 0; dev < devcount; dev++) {
HIP_CHECK(hipSetDevice(dev));
@@ -994,15 +965,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
for (int i = 0; i < (dev + 1); i++) {
incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]);
}
HIP_CHECK(hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int),
hipMemcpyDefault, stream[dev]));
HIP_CHECK(
hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev]));
HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev]));
}
// Launch the captured graphs in the respective device
for (int dev = 0; dev < devcount; dev++) {
HIP_CHECK(hipSetDevice(dev));
HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr,
nullptr, 0));
HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev]));
}
// Validate output
@@ -1069,8 +1039,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") {
HIP_CHECK(hipEventRecord(events[3], streams[2]));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int),
hipMemcpyDefault, streams[0]));
HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault,
streams[0]));
HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture
// Reset device memory
HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int)));
@@ -1108,23 +1078,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
hipGraph_t graphs[3];
StreamsGuard streams(3);
EventsGuard events(4);
LinearAllocGuard<int> hostMem_g1 = LinearAllocGuard<int>
(LinearAllocs::malloc, sizeof(int));
LinearAllocGuard<int> hostMem_g2 = LinearAllocGuard<int>
(LinearAllocs::malloc, sizeof(int));
LinearAllocGuard<int> hostMem_g3 = LinearAllocGuard<int>
(LinearAllocs::malloc, sizeof(int));
LinearAllocGuard<int> devMem_g1 = LinearAllocGuard<int>
(LinearAllocs::hipMalloc, sizeof(int));
LinearAllocGuard<int> devMem_g2 = LinearAllocGuard<int>
(LinearAllocs::hipMalloc, sizeof(int));
LinearAllocGuard<int> devMem_g3 = LinearAllocGuard<int>
(LinearAllocs::hipMalloc, sizeof(int));
LinearAllocGuard<int> hostMem_g1 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
LinearAllocGuard<int> hostMem_g2 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
LinearAllocGuard<int> hostMem_g3 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
LinearAllocGuard<int> devMem_g1 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
LinearAllocGuard<int> devMem_g2 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
LinearAllocGuard<int> devMem_g3 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
std::vector<int*> hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(),
hostMem_g3.host_ptr()};
std::vector<int*> devMem = {devMem_g1.ptr(), devMem_g2.ptr(),
devMem_g3.ptr()};
std::vector<int*> hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()};
std::vector<int*> devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()};
// Create a device memory of size int and initialize it to 0
for (int i = 0; i < 3; i++) {
memset(hostMem[i], 0, sizeof(int));
@@ -1148,16 +1110,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
HIP_CHECK(hipEventRecord(events[3], streams[2]));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int),
hipMemcpyDefault, streams[0]));
HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0]));
HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture
// Start capturing graph2 from stream 2
HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal));
incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int),
hipMemcpyDefault, streams[1]));
HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1]));
HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture
// Start capturing graph3 from stream 3
HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal));
@@ -1166,8 +1126,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int),
hipMemcpyDefault, streams[2]));
HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2]));
HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture
// Reset device memory
HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int)));
@@ -1211,40 +1170,32 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
EventsGuard events(7);
// Allocate Device memory and Host memory
size_t N = GRIDSIZE * BLOCKSIZE;
LinearAllocGuard<int> Ah = LinearAllocGuard<int>
(LinearAllocs::malloc, N * sizeof(int));
LinearAllocGuard<int> Bh = LinearAllocGuard<int>
(LinearAllocs::malloc, N * sizeof(int));
LinearAllocGuard<int> Ch = LinearAllocGuard<int>
(LinearAllocs::malloc, N * sizeof(int));
LinearAllocGuard<int> Ad = LinearAllocGuard<int>
(LinearAllocs::hipMalloc, N * sizeof(int));
LinearAllocGuard<int> Bd = LinearAllocGuard<int>
(LinearAllocs::hipMalloc, N * sizeof(int));
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
LinearAllocGuard<int> Ch = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc, N * sizeof(int));
LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc, N * sizeof(int));
// Capture streams into graph
HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal));
HIP_CHECK(hipEventRecord(events[0], streams[0]));
HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0));
HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)),
hipMemcpyDefault, streams[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)),
hipMemcpyDefault, streams[4]));
HIP_CHECK(
hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0]));
HIP_CHECK(
hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4]));
hipHostFn_t fn = hostNodeCallback;
HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr));
HIP_CHECK(hipEventRecord(events[1], streams[0]));
HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0));
int* Ad_2nd_half = Ad.ptr() + N / 2;
int* Ad_1st_half = Ad.ptr();
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[0]>>>(Ad_2nd_half,
CONST_KER2_VAL);
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half,
CONST_KER1_VAL);
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[0]>>>(Ad_2nd_half, CONST_KER2_VAL);
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half, CONST_KER1_VAL);
HIP_CHECK(hipEventRecord(events[2], streams[1]));
HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0));
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half,
CONST_KER3_VAL);
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half, CONST_KER3_VAL);
HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr));
HIP_CHECK(hipEventRecord(events[6], streams[1]));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0));
@@ -1255,8 +1206,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
HIP_CHECK(hipEventRecord(events[4], streams[3]));
HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0));
HIP_CHECK(hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)),
hipMemcpyDefault, streams[0]));
HIP_CHECK(
hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0]));
HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture
// Execute and test the graph
hipGraphExec_t graphExec{nullptr};
@@ -1269,11 +1220,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
HIP_CHECK(hipStreamSynchronize(streams[0]));
for (size_t i = 0; i < N; i++) {
if (i > (N / 2 - 1)) {
REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] +
Ah.host_ptr()[i] * CONST_KER2_VAL));
REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL));
} else {
REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] +
Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL));
REQUIRE(Ch.host_ptr()[i] ==
(Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL));
}
}
}
@@ -1340,14 +1290,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
hipGraph_t graph{nullptr};
hipGraphExec_t graphExec{nullptr};
// Allocate device memory
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Ad =
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bd =
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
// Fill input data
std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1);
std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2);
@@ -1357,10 +1305,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
SECTION("Stream Creation Before Capture") {
StreamsGuard stream1(1);
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipStreamSynchronize(stream1[0]));
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture
@@ -1368,10 +1316,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
SECTION("Synchronizing multiple streams during Capture") {
StreamsGuard stream1(1), stream2(1);
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream2[0]));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream2[0]));
HIP_CHECK(hipStreamSynchronize(stream1[0]));
HIP_CHECK(hipStreamSynchronize(stream2[0]));
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
@@ -1380,20 +1328,20 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
SECTION("Stream Creation After Capture") {
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
StreamsGuard stream1(1);
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipStreamSynchronize(stream1[0]));
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture
}
SECTION("Stream Synchronize Before Capture") {
StreamsGuard stream1(1);
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipStreamSynchronize(stream1[0]));
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
@@ -1404,10 +1352,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture
StreamsGuard stream1(1);
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream1[0]));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream1[0]));
HIP_CHECK(hipStreamSynchronize(stream1[0]));
}
// Execute and test the graph
@@ -1415,8 +1363,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
HIP_CHECK(hipGraphLaunch(graphExec, stream0[0]));
HIP_CHECK(hipStreamSynchronize(stream0[0]));
// Check output
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost));
for (int idx = 0; idx < BLOCKSIZE; idx++) {
REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2));
}
@@ -1437,20 +1384,16 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
* - HIP_VERSION >= 5.6
*/
// Local function executed as thread
static void strmSyncThread(int *Ah, int *Ad, int *Bh, int *Bd,
int BLOCKSIZE, hipError_t *error) {
static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) {
StreamsGuard stream(1);
HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream[0]));
HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int),
hipMemcpyDefault, stream[0]));
HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0]));
HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0]));
*error = hipStreamSynchronize(stream[0]);
}
// Local function executed as thread
static void captureStrmThread(hipGraph_t *graph, int *Ah, int *Ad,
int *Bh, int *Bd, int BLOCKSIZE, int GRIDSIZE,
hipStreamCaptureMode flag, hipError_t *error) {
static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE,
int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) {
StreamsGuard stream(1);
// Capture streams into graph
HIP_CHECK(hipStreamBeginCapture(stream[0], flag));
@@ -1466,14 +1409,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
constexpr int VALUE1 = 7, VALUE2 = 11;
hipGraph_t graph{nullptr};
// Allocate device memory
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Ad =
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
LinearAllocGuard<int> Bd =
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
// Fill input data
std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1);
std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2);
@@ -1483,10 +1424,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
StreamsGuard stream(2);
// Capture streams into graph
HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(),
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(),
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream[1]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream[1]));
error = hipStreamSynchronize(stream[1]);
REQUIRE(error == hipErrorStreamCaptureUnsupported);
}
@@ -1494,34 +1435,30 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") {
StreamsGuard stream(2);
// Capture streams into graph
HIP_CHECK(hipStreamBeginCapture(stream[0],
hipStreamCaptureModeThreadLocal));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(),
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(),
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal));
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream[1]));
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
stream[1]));
error = hipStreamSynchronize(stream[1]);
REQUIRE(error == hipErrorStreamCaptureUnsupported);
}
#endif
#if HT_AMD
SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") {
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
hipStreamCaptureModeGlobal, &error);
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
hipStreamCaptureModeGlobal, &error);
REQUIRE(error == hipErrorStreamCaptureUnsupported);
}
#endif
SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") {
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
hipStreamCaptureModeThreadLocal, &error);
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
hipStreamCaptureModeThreadLocal, &error);
REQUIRE(error == hipSuccess);
}
SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") {
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
hipStreamCaptureModeRelaxed, &error);
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
hipStreamCaptureModeRelaxed, &error);
REQUIRE(error == hipSuccess);
}
if (graph != nullptr) {
@@ -1532,8 +1469,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
HIP_CHECK(hipGraphLaunch(graphExec, stream[0]));
HIP_CHECK(hipStreamSynchronize(stream[0]));
// Check output
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int),
hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost));
for (int idx = 0; idx < BLOCKSIZE; idx++) {
REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2));
}
@@ -19,7 +19,6 @@ THE SOFTWARE.
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include "stream_capture_common.hh"
@@ -18,7 +18,6 @@ THE SOFTWARE.
*/
#include <hip_test_checkers.hh>
#include <hip_test_defgroups.hh>
#include <hip_test_kernels.hh>
#include "stream_capture_common.hh"
@@ -19,7 +19,6 @@ THE SOFTWARE.
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include "stream_capture_common.hh"
@@ -18,7 +18,6 @@ THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_defgroups.hh>
#include <hip_test_kernels.hh>
#include "stream_capture_common.hh"
@@ -20,7 +20,6 @@ THE SOFTWARE.
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include "stream_capture_common.hh"
@@ -367,7 +366,7 @@ TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") {
const hipStreamUpdateCaptureDependenciesFlags flag =
GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies);
HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); //hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); // hipStreamCaptureModeGlobal));
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag));
@@ -20,7 +20,6 @@ THE SOFTWARE.
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_defgroups.hh>
#include "stream_capture_common.hh"
@@ -21,7 +21,6 @@ THE SOFTWARE.
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/hip_fp16.h>
#include <hip_test_defgroups.hh>
#define WIDTH 4
@@ -32,20 +31,17 @@ THE SOFTWARE.
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
template <typename T>
__global__ void matrixTranspose(T* out, T* in, const int width) {
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
int x = blockDim.x * blockIdx.x + threadIdx.x;
T val = in[x];
for (int i = 0; i < width; i++) {
for (int j = 0; j < width; j++)
out[i * width + j] = __shfl(val, j * width + i);
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
}
}
// CPU implementation of matrix transpose
template <typename T>
void matrixTransposeCPUReference(T* output,
T* input, const unsigned int width) {
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
@@ -54,61 +50,52 @@ void matrixTransposeCPUReference(T* output,
}
static void getFactor(int* fact) { *fact = 101; }
static void getFactor(unsigned int* fact) {
*fact = static_cast<unsigned int>(INT32_MAX)+1;
}
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
static void getFactor(float* fact) { *fact = 2.5; }
static void getFactor(__half* fact) { *fact = 2.5; }
static void getFactor(double* fact) { *fact = 2.5; }
static void getFactor(int64_t* fact) { *fact = 303; }
static void getFactor(uint64_t* fact) {
*fact = static_cast<uint64_t>(__LONG_LONG_MAX__)+1;
}
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
template <typename T>
int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
errors++;
}
}
return errors;
}
template <>
int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
errors++;
}
}
return errors;
}
template <typename T>
void init(T* Matrix) {
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
int errors = 0;
for (int i = 0; i < NUM; i++) {
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
errors++;
}
}
return errors;
}
template <typename T> void init(T* Matrix) {
// initialize the input data
T factor;
getFactor(&factor);
for (int i = 0; i < NUM; i++) {
Matrix[i] = (T)i + factor;
Matrix[i] = (T)i + factor;
}
}
template <>
void init(__half* Matrix) {
template <> void init(__half* Matrix) {
// initialize the input data
__half factor;
getFactor(&factor);
for (int i = 0; i < NUM; i++) {
Matrix[i] = i + __half2float(factor);
Matrix[i] = i + __half2float(factor);
}
}
template<typename T>
static void runTest() {
template <typename T> static void runTest() {
T* Matrix;
T* TransposeMatrix;
T* cpuTransposeMatrix;
@@ -129,21 +116,17 @@ static void runTest() {
// allocate the memory on the device side
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix),
NUM * sizeof(T)));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
// Memory transfer from host to device
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T),
hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1),
dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH);
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix,
NUM * sizeof(T), hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
@@ -183,26 +166,12 @@ static void runTest() {
*/
TEST_CASE("Unit_hipShflTests") {
SECTION("run test for int") {
runTest<int>();
}
SECTION("run test for float") {
runTest<float>();
}
SECTION("run test for double") {
runTest<double>();
}
SECTION("run test for int") { runTest<int>(); }
SECTION("run test for float") { runTest<float>(); }
SECTION("run test for double") { runTest<double>(); }
// Test added to support half datatype.
SECTION("run test for __half") {
runTest<__half>();
}
SECTION("run test for int64_t") {
runTest<int64_t>();
}
SECTION("run test for unsigned int") {
runTest<unsigned int>();
}
SECTION("run test for uint64_t") {
runTest<uint64_t>();
}
SECTION("run test for __half") { runTest<__half>(); }
SECTION("run test for int64_t") { runTest<int64_t>(); }
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
}
@@ -21,12 +21,10 @@ THE SOFTWARE.
#include <hip_test_checkers.hh>
#include <hip_test_common.hh>
#include <hip/hip_fp16.h>
#include <hip_test_defgroups.hh>
const int size = 32;
template <typename T>
__global__ void shflDownSum(T* a, int size) {
template <typename T> __global__ void shflDownSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_down(val, i, size);
@@ -34,8 +32,7 @@ __global__ void shflDownSum(T* a, int size) {
a[threadIdx.x] = val;
}
template <typename T>
__global__ void shflUpSum(T* a, int size) {
template <typename T> __global__ void shflUpSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_up(val, i, size);
@@ -43,34 +40,29 @@ __global__ void shflUpSum(T* a, int size) {
a[threadIdx.x] = val;
}
template <typename T>
__global__ void shflXorSum(T* a, int size) {
template <typename T> __global__ void shflXorSum(T* a, int size) {
T val = a[threadIdx.x];
for (int i = size/2; i > 0; i /= 2) {
for (int i = size / 2; i > 0; i /= 2) {
val += __shfl_xor(val, i, size);
}
a[threadIdx.x] = val;
}
static void getFactor(int* fact) { *fact = 101; }
static void getFactor(unsigned int* fact) {
*fact = static_cast<unsigned int>(INT32_MAX)+1;
}
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
static void getFactor(float* fact) { *fact = 2.5; }
static void getFactor(double* fact) { *fact = 2.5; }
static void getFactor(__half* fact) { *fact = 2.5; }
static void getFactor(int64_t* fact) { *fact = 303; }
static void getFactor(uint64_t* fact) {
*fact = static_cast<uint64_t>(__LONG_LONG_MAX__)+1;
}
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
template <typename T> T sum(T* a) {
T cpuSum = 0;
T factor;
getFactor(&factor);
for (int i = 0; i < size; i++) {
a[i] = i + factor;
cpuSum += a[i];
a[i] = i + factor;
cpuSum += a[i];
}
return cpuSum;
}
@@ -80,8 +72,8 @@ template <> __half sum(__half* a) {
__half factor;
getFactor(&factor);
for (int i = 0; i < size; i++) {
a[i] = i + __half2float(factor);
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
a[i] = i + __half2float(factor);
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
}
return cpuSum;
}
@@ -100,8 +92,7 @@ template <> bool compare(__half gpuSum, __half cpuSum) {
return false;
}
template <typename T>
static void runTestShflUp() {
template <typename T> static void runTestShflUp() {
const int size = 32;
T a[size];
T cpuSum = sum(a);
@@ -114,8 +105,7 @@ static void runTestShflUp() {
HIP_CHECK(hipFree(d_a));
}
template <typename T>
static void runTestShflDown() {
template <typename T> static void runTestShflDown() {
T a[size];
T cpuSum = sum(a);
T* d_a;
@@ -127,8 +117,7 @@ static void runTestShflDown() {
HIP_CHECK(hipFree(d_a));
}
template <typename T>
static void runTestShflXor() {
template <typename T> static void runTestShflXor() {
T a[size];
T cpuSum = sum(a);
T* d_a;
@@ -141,12 +130,12 @@ static void runTestShflXor() {
}
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_up function
*/
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_up function
*/
/**
* Test Description
@@ -164,27 +153,13 @@ static void runTestShflXor() {
*/
TEST_CASE("Unit_runTestShfl_up") {
SECTION("runTestShflUp for int") {
runTestShflUp<int>();
}
SECTION("runTestShflUp for float") {
runTestShflUp<float>();
}
SECTION("runTestShflUp for double") {
runTestShflUp<double>();
}
SECTION("runTestShflUp for __half") {
runTestShflUp<__half>();
}
SECTION("runTestShflUp for int64_t") {
runTestShflUp<int64_t>();
}
SECTION("runTestShflUp for unsigned int") {
runTestShflUp<unsigned int>();
}
SECTION("runTestShflUp for uint64_t") {
runTestShflUp<uint64_t>();
}
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
}
/**
* End doxygen group __shfl.
@@ -192,12 +167,12 @@ TEST_CASE("Unit_runTestShfl_up") {
*/
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_down function
*/
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
* Contains warp __shfl_down function
*/
/**
* Test Description
@@ -215,27 +190,13 @@ TEST_CASE("Unit_runTestShfl_up") {
*/
TEST_CASE("Unit_runTestShfl_Down") {
SECTION("runTestShflDown for int") {
runTestShflDown<int>();
}
SECTION("runTestShflDown for float") {
runTestShflDown<float>();
}
SECTION("runTestShflDown for double") {
runTestShflDown<double>();
}
SECTION("runTestShflDown for __half") {
runTestShflDown<__half>();
}
SECTION("runTestShflDown for int64_t") {
runTestShflDown<int64_t>();
}
SECTION("runTestShflDown for unsigned int") {
runTestShflDown<unsigned int>();
}
SECTION("runTestShflDown for uint64_t") {
runTestShflDown<uint64_t>();
}
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
}
/**
* End doxygen group __shfl.
@@ -243,12 +204,12 @@ TEST_CASE("Unit_runTestShfl_Down") {
*/
/**
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
* Contains warp __shfl_xor function
*/
* @addtogroup __shfl __shfl
* @{
* @ingroup ShflTest
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
* Contains warp __shfl_xor function
*/
/**
* Test Description
@@ -266,27 +227,13 @@ TEST_CASE("Unit_runTestShfl_Down") {
*/
TEST_CASE("Unit_runTestShfl_Xor") {
SECTION("runTestShflXor for int") {
runTestShflXor<int>();
}
SECTION("runTestShflXor for float") {
runTestShflXor<float>();
}
SECTION("runTestShflXor for double") {
runTestShflXor<double>();
}
SECTION("runTestShflXor for __half") {
runTestShflXor<__half>();
}
SECTION("runTestShflXor for int64_t") {
runTestShflXor<int64_t>();
}
SECTION("runTestShflXor for unsigned int") {
runTestShflXor<unsigned int>();
}
SECTION("runTestShflXor for uint64_t") {
runTestShflXor<uint64_t>();
}
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
}
/**
* End doxygen group __shfl.
@@ -20,7 +20,6 @@ THE SOFTWARE.
#include <hip_test_kernels.hh>
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_defgroups.hh>
#define NUMBER_OF_THREADS 10
static bool thread_results[NUMBER_OF_THREADS];
@@ -54,8 +53,7 @@ TEST_CASE("Unit_hipStreamGetDevice_Negative") {
HIP_CHECK(hipStreamCreate(&stream));
HIP_CHECK_ERROR(hipStreamGetDevice(nullptr, nullptr), hipErrorInvalidValue);
HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr),
hipErrorInvalidValue);
HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), hipErrorInvalidValue);
HIP_CHECK_ERROR(hipStreamGetDevice(stream, nullptr), hipErrorInvalidValue);
HIP_CHECK(hipStreamDestroy(stream));
}
@@ -145,9 +143,7 @@ static bool validateStreamGetDevice() {
return true;
}
static void thread_Test(int threadNum) {
thread_results[threadNum] = validateStreamGetDevice();
}
static void thread_Test(int threadNum) { thread_results[threadNum] = validateStreamGetDevice(); }
static bool test_hipStreamGetDevice_MThread() {
std::vector<std::thread> tests;
@@ -158,7 +154,7 @@ static bool test_hipStreamGetDevice_MThread() {
tests.push_back(std::thread(thread_Test, idx));
}
// Wait for all threads to complete
for (std::thread &t : tests) {
for (std::thread& t : tests) {
t.join();
}
// Wait for thread
@@ -169,9 +165,7 @@ static bool test_hipStreamGetDevice_MThread() {
return status;
}
TEST_CASE("Unit_hipStreamGetDevice_MThread") {
REQUIRE(true == test_hipStreamGetDevice_MThread());
}
TEST_CASE("Unit_hipStreamGetDevice_MThread") { REQUIRE(true == test_hipStreamGetDevice_MThread()); }
/**
* Test Description