EXSWHTEC-281 - Implement Unit Tests for assert functions #210
Change-Id: I6c05915c957d9b67951b3e97cc35cb1ca72a945f
[ROCm/hip-tests commit: 044a59496c]
This commit is contained in:
zatwierdzone przez
Rakesh Roy
rodzic
1cd4e6a55b
commit
dcade635c6
@@ -309,6 +309,8 @@
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
#endif
|
||||
#if defined VEGA20
|
||||
"=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===",
|
||||
|
||||
@@ -440,6 +440,11 @@
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - float",
|
||||
"Unit_Thread_Block_Tile_Shfl_Positive_Basic - double",
|
||||
"Unit_Thread_Block_Tile_Getters_Positive_Basic",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_StaticAssert_Positive_Basic_RTC",
|
||||
"Unit_Assert_Positive_Basic_KernelFail",
|
||||
"Unit_StaticAssert_Positive_Basic",
|
||||
"Unit_StaticAssert_Negative_Basic",
|
||||
#endif
|
||||
"End of json"
|
||||
]
|
||||
|
||||
@@ -89,6 +89,9 @@
|
||||
"Performance_hipMemsetD32",
|
||||
"Performance_hipMemsetD32Async",
|
||||
"Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior",
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior"
|
||||
"Unit_hipMemcpy_Positive_Synchronization_Behavior",
|
||||
"=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===",
|
||||
"Unit_StaticAssert_Positive_Basic",
|
||||
"Unit_StaticAssert_Negative_Basic"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -129,6 +129,19 @@ THE SOFTWARE.
|
||||
} \
|
||||
}
|
||||
|
||||
// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError.
|
||||
#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \
|
||||
{ \
|
||||
auto localError = errorExpr; \
|
||||
INFO("Matching Errors: " \
|
||||
<< "\n Expected Error: " << hiprtcGetErrorString(expectedError) \
|
||||
<< "\n Expected Code: " << expectedError << '\n' \
|
||||
<< " Actual Error: " << hiprtcGetErrorString(localError) \
|
||||
<< "\n Actual Code: " << localError << "\nStr: " << #errorExpr \
|
||||
<< "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \
|
||||
REQUIRE(localError == expectedError); \
|
||||
}
|
||||
|
||||
#define HIPASSERT(condition) \
|
||||
if (!(condition)) { \
|
||||
printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \
|
||||
|
||||
@@ -59,3 +59,4 @@ add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246
|
||||
endif()
|
||||
add_subdirectory(synchronization)
|
||||
add_subdirectory(launchBounds)
|
||||
add_subdirectory(assertion)
|
||||
@@ -0,0 +1,48 @@
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
set(TEST_SRC
|
||||
assert.cc
|
||||
)
|
||||
hip_add_exe_to_target(NAME AssertionTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS nvrtc)
|
||||
elseif(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
static_assert.cc
|
||||
assert.cc
|
||||
)
|
||||
hip_add_exe_to_target(NAME AssertionTest
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
LINKER_LIBS hiprtc)
|
||||
endif()
|
||||
|
||||
add_test(NAME Unit_StaticAssert_Positive_Basic
|
||||
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
static_assert_kernels_positive.cc 2)
|
||||
|
||||
add_test(NAME Unit_StaticAssert_Negative_Basic
|
||||
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
static_assert_kernels_negative.cc 2)
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <csetjmp>
|
||||
#include <csignal>
|
||||
|
||||
/**
|
||||
* @addtogroup assert assert
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* `void assert(int expression)` -
|
||||
* Stops the kernel execution if expression is equal to zero.
|
||||
*/
|
||||
|
||||
jmp_buf env_ignore_abort;
|
||||
volatile int abort_raised_flag = 0;
|
||||
|
||||
void on_sigabrt(int signum) {
|
||||
signal(signum, SIG_DFL);
|
||||
abort_raised_flag = 1;
|
||||
longjmp(env_ignore_abort, 1);
|
||||
}
|
||||
|
||||
void try_and_catch_abort(void (*func)()) {
|
||||
if (!setjmp(env_ignore_abort)) {
|
||||
signal(SIGABRT, &on_sigabrt);
|
||||
(*func)();
|
||||
signal(SIGABRT, SIG_DFL);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void AssertPassKernel() {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
// expected always to be true
|
||||
assert(tid >= 0);
|
||||
}
|
||||
|
||||
__global__ void AssertFailKernel() {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
// expected to fail for the even thread indices
|
||||
assert(tid % 2 == 1);
|
||||
}
|
||||
|
||||
template <bool should_abort> void LaunchAssertKernel() {
|
||||
const int num_blocks = 2;
|
||||
const int num_threads = 16;
|
||||
|
||||
if constexpr (should_abort) {
|
||||
AssertFailKernel<<<num_blocks, num_threads, 0, 0>>>();
|
||||
#if HT_AMD
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
#else
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert);
|
||||
#endif
|
||||
} else {
|
||||
AssertPassKernel<<<num_blocks, num_threads, 0, 0>>>();
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels with asserts that have an expression equal to 1.
|
||||
* - Expects that SIGABRT is not raised and kernels have executed successfully.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") {
|
||||
try_and_catch_abort(&LaunchAssertKernel<false>);
|
||||
REQUIRE(abort_raised_flag == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Launches kernels with asserts that have an expression equal to 0.
|
||||
* - Expects that SIGABRT is raised and kernels have been stopped on AMD.
|
||||
* - The HIP runtime also aborts the host code, so this test case uses signal handlers
|
||||
* to avoid host code abortion.
|
||||
* - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA.
|
||||
* - The host code is not aborted.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") {
|
||||
try_and_catch_abort(&LaunchAssertKernel<true>);
|
||||
#if HT_AMD
|
||||
REQUIRE(abort_raised_flag == 1);
|
||||
#else
|
||||
REQUIRE(abort_raised_flag == 0);
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include "static_assert_kernels_rtc.hh"
|
||||
|
||||
/**
|
||||
* @addtogroup static_assert static_assert
|
||||
* @{
|
||||
* @ingroup DeviceLanguageTest
|
||||
* `void static_assert(constexpr expression, const char* message)` -
|
||||
* Stops the compilation if expression is equal to zero, and displays the specified message.
|
||||
*/
|
||||
|
||||
void StaticAssertWrapper(const char* program_source) {
|
||||
hiprtcProgram program{};
|
||||
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{2};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Compiles kernels with static_assert calls:
|
||||
* -# Expected that static_assert passes and compilation is successful.
|
||||
* -# Expected that static_assert fails and compilation has errors.
|
||||
* - Uses RTC to perform compilation.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/static_assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); }
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Passes invalidly formed expressions to static_assert calls.
|
||||
* - Uses expressions that are not constexpr and values that are not known during compilation.
|
||||
* - Uses RTC to perform compilation.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/assertion/static_assert.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); }
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
__global__ void StaticAssertErrorKernel1() {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertErrorKernel2() {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
__global__ void StaticAssertPassKernel1() {
|
||||
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); }
|
||||
|
||||
__global__ void StaticAssertFailKernel1() {
|
||||
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); }
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Positive and negative kernels used for the static_assert Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kStaticAssert_Positive{
|
||||
R"(
|
||||
__global__ void StaticAssertPassKernel1() {
|
||||
static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertPassKernel2() {
|
||||
static_assert(10 > 5, "[StaticAssertPassKernel2]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertFailKernel1() {
|
||||
static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertFailKernel2() {
|
||||
static_assert(10 < 5, "[StaticAssertFailKernel2]");
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kStaticAssert_Negative{
|
||||
R"(
|
||||
__global__ void StaticAssertErrorKernel1() {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]");
|
||||
}
|
||||
|
||||
__global__ void StaticAssertErrorKernel2() {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
static_assert(++tid > 2, "[StaticAssertErrorKernel2]");
|
||||
}
|
||||
)"};
|
||||
@@ -22,7 +22,6 @@ THE SOFTWARE.
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
@@ -75,7 +74,7 @@ void GraphMemcpyFromSymbolShell(void* symbol, size_t offset, const std::vector<T
|
||||
* ------------------------
|
||||
* - Verify that data is correctly copied from a symbol. A graph is constructed to which a
|
||||
* MemcpyFromSymbol node is added. After graph execution, values in destination memory are compared
|
||||
* against values known to be in symbol memory.
|
||||
* against values known to be in symbol memory.
|
||||
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
|
||||
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
|
||||
* Verification is performed for destination memory allocated on host and device.
|
||||
@@ -106,7 +105,7 @@ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Positive_Basic") {
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* ------------------------
|
||||
* - Verify API behavior with invalid arguments:
|
||||
* -# pGraphNodes is nullptr
|
||||
* -# graph is nullptr
|
||||
@@ -122,12 +121,12 @@ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Positive_Basic") {
|
||||
* -# kind is illogical (hipMemcpyHostToDevice)
|
||||
* -# kind is an invalid enum value
|
||||
* Test source
|
||||
* ------------------------
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Negative_Parameters") {
|
||||
using namespace std::placeholders;
|
||||
hipGraph_t graph = nullptr;
|
||||
|
||||
@@ -23,7 +23,6 @@ THE SOFTWARE.
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
@@ -78,7 +77,7 @@ void GraphMemcpyToSymbolShell(const void* symbol, size_t offset, const std::vect
|
||||
* - Verify that data is correctly copied to a symbol. A graph is constructed to which a
|
||||
* MemcpyToSymbol node is added. After graph execution, a MemcpyFromSymbol is performed and
|
||||
* the copied values are compared against values known to have been copied to symbol memory
|
||||
* previously.
|
||||
* previously.
|
||||
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
|
||||
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
|
||||
* Verification is performed for source memory allocated on host and device.
|
||||
|
||||
@@ -22,7 +22,6 @@ THE SOFTWARE.
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
#include <utils.hh>
|
||||
@@ -129,7 +128,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_Negative_Parameters") {
|
||||
* Allocate a 2D array using hipMallocPitch. Initialize the allocated memory
|
||||
* using hipGraphAddMemsetNode. Copy the values in device memory to host using
|
||||
* hipGraphAddMemcpyNode. Verify the results.
|
||||
*/
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
@@ -147,22 +146,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
|
||||
}
|
||||
}
|
||||
// 2D Memory allocation hipMallocPitch
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
|
||||
numH));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width, numH));
|
||||
// Create Graph
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t memsetNode, memcpyNode;
|
||||
// Add MemSet Node
|
||||
hipMemsetParams memsetParams{};
|
||||
memset(&memsetParams, 0, sizeof(memsetParams));
|
||||
memsetParams.dst = reinterpret_cast<void *>(A_d);
|
||||
memsetParams.dst = reinterpret_cast<void*>(A_d);
|
||||
memsetParams.value = memSetVal;
|
||||
memsetParams.pitch = pitch_A;
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = numW;
|
||||
memsetParams.height = numH;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
|
||||
nodeDependencies.push_back(memsetNode);
|
||||
// Add MemCpy Node
|
||||
hipMemcpy3DParms myparms{};
|
||||
@@ -173,21 +170,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
|
||||
myparms.extent = make_hipExtent(width, numH, 1);
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.clear();
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verfication
|
||||
for (size_t i = 0; i < numW; i++) {
|
||||
for (size_t j = 0; j < numH; j++) {
|
||||
REQUIRE(*(A_h + i*numH + j) == memSetVal);
|
||||
REQUIRE(*(A_h + i * numH + j) == memSetVal);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
@@ -200,12 +196,12 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") {
|
||||
* Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using
|
||||
* hipGraphAddMemsetNode. Copy the values in device memory to host using
|
||||
* hipGraphAddMemcpyNode. Verify the results.
|
||||
*/
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A;
|
||||
char *A_d;
|
||||
char* A_d;
|
||||
|
||||
// Initialize the host memory
|
||||
std::vector<char> A_h(numW, ' ');
|
||||
@@ -213,22 +209,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
|
||||
hipGraph_t graph;
|
||||
std::vector<hipGraphNode_t> nodeDependencies;
|
||||
// 1D Memory allocation hipMallocPitch
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width,
|
||||
1));
|
||||
HIP_CHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width, 1));
|
||||
// Create Graph
|
||||
HIP_CHECK(hipGraphCreate(&graph, 0));
|
||||
hipGraphNode_t memsetNode, memcpyNode;
|
||||
// Add MemSet Node
|
||||
hipMemsetParams memsetParams{};
|
||||
memset(&memsetParams, 0, sizeof(memsetParams));
|
||||
memsetParams.dst = reinterpret_cast<void *>(A_d);
|
||||
memsetParams.dst = reinterpret_cast<void*>(A_d);
|
||||
memsetParams.value = memSetVal;
|
||||
memsetParams.pitch = pitch_A;
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = numW;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
|
||||
nodeDependencies.push_back(memsetNode);
|
||||
// Add MemCpy Node
|
||||
hipMemcpy3DParms myparms{};
|
||||
@@ -239,15 +233,14 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
|
||||
myparms.extent = make_hipExtent(width, 1, 1);
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.clear();
|
||||
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
@@ -264,7 +257,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") {
|
||||
* Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using
|
||||
* hipGraphAddMemsetNode. Copy the values in device memory to host using
|
||||
* hipGraphAddMemcpyNode. Verify the results.
|
||||
*/
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
@@ -300,8 +293,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = numW;
|
||||
memsetParams.height = numH;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
|
||||
nodeDependencies.push_back(memsetNode);
|
||||
|
||||
// MemCpy params
|
||||
@@ -315,22 +307,21 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
|
||||
|
||||
// Add MemCpy Node
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.clear();
|
||||
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verfication
|
||||
for (size_t i = 0; i < numW; i++) {
|
||||
for (size_t j = 0; j < numH; j++) {
|
||||
REQUIRE(*(A_h + i*numH + j) == memSetVal);
|
||||
REQUIRE(*(A_h + i * numH + j) == memSetVal);
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
@@ -343,7 +334,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") {
|
||||
* Allocate a 1D array using hipMalloc3D. Initialize the allocated
|
||||
* memory using hipGraphAddMemsetNode. Copy the values in device
|
||||
* memory to host using hipGraphAddMemcpyNode. Verify the results.
|
||||
*/
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
|
||||
CHECK_IMAGE_SUPPORT
|
||||
|
||||
@@ -375,8 +366,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = numW;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
|
||||
nodeDependencies.push_back(memsetNode);
|
||||
|
||||
// MemCpy params
|
||||
@@ -390,21 +380,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
|
||||
|
||||
// Add MemCpy Node
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.clear();
|
||||
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verfication
|
||||
for (size_t i = 0; i < numW; i++) {
|
||||
REQUIRE(A_h[i] == memSetVal);
|
||||
REQUIRE(A_h[i] == memSetVal);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
@@ -415,9 +404,9 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") {
|
||||
* Allocate a 1D array using hipMalloc. Initialize the allocated memory using
|
||||
* hipGraphAddMemsetNode. Copy the values in device memory to host using
|
||||
* hipGraphAddMemcpyNode. Verify the results.
|
||||
*/
|
||||
*/
|
||||
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
|
||||
char *A_d;
|
||||
char* A_d;
|
||||
size_t NumW = SIZE;
|
||||
size_t Nbytes1D = SIZE * sizeof(char);
|
||||
|
||||
@@ -436,14 +425,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
|
||||
// Add Memset node
|
||||
hipMemsetParams memsetParams{};
|
||||
memset(&memsetParams, 0, sizeof(memsetParams));
|
||||
memsetParams.dst = reinterpret_cast<void *>(A_d);
|
||||
memsetParams.dst = reinterpret_cast<void*>(A_d);
|
||||
memsetParams.value = memSetVal;
|
||||
memsetParams.pitch = Nbytes1D;
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = NumW;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
|
||||
nodeDependencies.push_back(memsetNode);
|
||||
// Add MemCpy Node
|
||||
hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0};
|
||||
@@ -456,20 +444,19 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
|
||||
myparms.extent = make_hipExtent(Nbytes1D, 1, 1);
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.clear();
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verfication
|
||||
for (size_t i = 0; i < NumW; i++) {
|
||||
REQUIRE(A_h[i] == memSetVal);
|
||||
REQUIRE(A_h[i] == memSetVal);
|
||||
}
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
HIP_CHECK(hipGraphDestroy(graph));
|
||||
@@ -479,16 +466,15 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") {
|
||||
|
||||
TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
|
||||
int managed = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed,
|
||||
hipDeviceAttributeManagedMemory, 0));
|
||||
HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0));
|
||||
INFO("hipDeviceAttributeManagedMemory: " << managed);
|
||||
if (managed != 1) {
|
||||
WARN(
|
||||
"GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute"
|
||||
"so defaulting to system memory.");
|
||||
"GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute"
|
||||
"so defaulting to system memory.");
|
||||
}
|
||||
size_t Nbytes1D = SIZE * sizeof(char);
|
||||
char *A_d;
|
||||
char* A_d;
|
||||
// Initialize the host memory
|
||||
std::vector<char> A_h(SIZE, ' ');
|
||||
// Device Memory
|
||||
@@ -502,14 +488,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
|
||||
// Add Memset node
|
||||
hipMemsetParams memsetParams{};
|
||||
memset(&memsetParams, 0, sizeof(memsetParams));
|
||||
memsetParams.dst = reinterpret_cast<void *>(A_d);
|
||||
memsetParams.dst = reinterpret_cast<void*>(A_d);
|
||||
memsetParams.value = memSetVal;
|
||||
memsetParams.pitch = Nbytes1D;
|
||||
memsetParams.elementSize = sizeof(char);
|
||||
memsetParams.width = SIZE;
|
||||
memsetParams.height = 1;
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0,
|
||||
&memsetParams));
|
||||
HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams));
|
||||
nodeDependencies.push_back(memsetNode);
|
||||
|
||||
// Add MemCpy Node
|
||||
@@ -524,21 +509,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") {
|
||||
myparms.extent = make_hipExtent(Nbytes1D, 1, 1);
|
||||
myparms.kind = hipMemcpyDeviceToHost;
|
||||
HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(),
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.size(), &myparms));
|
||||
nodeDependencies.clear();
|
||||
|
||||
// Create executable graph
|
||||
hipStream_t streamForGraph;
|
||||
hipGraphExec_t graphExec;
|
||||
HIP_CHECK(hipStreamCreate(&streamForGraph));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
|
||||
HIP_CHECK(hipStreamSynchronize(streamForGraph));
|
||||
|
||||
// Verfication
|
||||
for (size_t i = 0; i < SIZE; i++) {
|
||||
REQUIRE(A_h[i] == memSetVal);
|
||||
REQUIRE(A_h[i] == memSetVal);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec));
|
||||
|
||||
@@ -22,7 +22,6 @@ THE SOFTWARE.
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
@@ -86,7 +85,7 @@ void GraphExecMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_s
|
||||
* node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but
|
||||
* incorrect parameters. After the graph is instantiated the parameters are updated to correct
|
||||
* values and the graph executed. After graph execution, a MemcpyFromSymbol is performed and the
|
||||
* copied values are compared against values known to have been copied to symbol memory previously.
|
||||
* copied values are compared against values known to have been copied to symbol memory previously.
|
||||
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
|
||||
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
|
||||
* Verification is performed for destination memory allocated on host and device.
|
||||
|
||||
@@ -21,7 +21,6 @@ THE SOFTWARE.
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "graph_memset_node_test_common.hh"
|
||||
@@ -46,7 +45,7 @@ THE SOFTWARE.
|
||||
* which also constitutes a test for said API.
|
||||
* The test is repeated for all valid element sizes(1,
|
||||
* 2, 4), and several allocations of different width(height is always 1 because only 1D memset nodes
|
||||
* can be updated), both on host and device
|
||||
* can be updated), both on host and device
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphExecMemsetNodeSetParams.cc
|
||||
|
||||
@@ -22,7 +22,6 @@ THE SOFTWARE.
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
@@ -85,7 +84,7 @@ void GraphMemcpyFromSymbolSetParamsShell(const void* symbol, const void* alt_sym
|
||||
* - Verify that data is correctly copied from a symbol after node parameters are set following
|
||||
* node addition. A graph is constructed to which a MemcpyFromSymbol node is added with valid but
|
||||
* incorrect parameters. The parameters are then updated to correct values and the graph executed.
|
||||
* Values in destination memory are compared against values known to be in symbol memory.
|
||||
* Values in destination memory are compared against values known to be in symbol memory.
|
||||
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
|
||||
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
|
||||
* Verification is performed for destination memory allocated on host and device.
|
||||
|
||||
@@ -22,7 +22,6 @@ THE SOFTWARE.
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
@@ -86,7 +85,7 @@ void GraphMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_symbo
|
||||
* node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but
|
||||
* incorrect parameters. The parameters are then updated to correct values and the graph executed.
|
||||
* After graph execution, a MemcpyFromSymbol is performed and the copied values are compared against
|
||||
* values known to have been copied to symbol memory previously.
|
||||
* values known to have been copied to symbol memory previously.
|
||||
* The test is run for scalar, const scalar, array, and const array symbols of types char, int,
|
||||
* float and double. For array symbols, the test is repeated for zero and non-zero offset values.
|
||||
* Verification is performed for destination memory allocated on host and device.
|
||||
|
||||
@@ -19,7 +19,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <resource_guards.hh>
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ THE SOFTWARE.
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include "graph_memset_node_test_common.hh"
|
||||
@@ -44,7 +43,7 @@ THE SOFTWARE.
|
||||
* The parameters are also verified via hipGraphMemsetNodeGetParams, which also constitutes a test
|
||||
* for said API.
|
||||
* The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different
|
||||
* height and width both on host and device
|
||||
* height and width both on host and device
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/graph/hipGraphMemsetNodeSetParams.cc
|
||||
|
||||
@@ -18,7 +18,6 @@ THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
|
||||
@@ -19,8 +19,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include "stream_capture_common.hh" // NOLINT
|
||||
#include "stream_capture_common.hh" // NOLINT
|
||||
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
/**
|
||||
@@ -56,8 +55,7 @@ static void hostNodeCallback(void* data) {
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode,
|
||||
hipStream_t stream) {
|
||||
void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) {
|
||||
constexpr size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
@@ -89,8 +87,7 @@ void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode,
|
||||
std::fill_n(A_h.host_ptr(), N, static_cast<float>(i));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
ArrayFindIfNot(B_h.host_ptr(),
|
||||
static_cast<float>(i) * static_cast<float>(i), N);
|
||||
ArrayFindIfNot(B_h.host_ptr(), static_cast<float>(i) * static_cast<float>(i), N);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGraphExecDestroy(graphExec))
|
||||
@@ -117,16 +114,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") {
|
||||
StreamGuard stream_guard(stream_type);
|
||||
hipStream_t stream = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
|
||||
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
|
||||
EventsGuard events_guard(3);
|
||||
StreamsGuard streams_guard(2);
|
||||
|
||||
SECTION("Linear graph capture") {
|
||||
captureStreamAndLaunchGraph<float>(
|
||||
[](float* A_h, float* A_d, float* B_h, float* B_d, size_t N,
|
||||
hipStream_t stream) {
|
||||
[](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) {
|
||||
return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream);
|
||||
},
|
||||
captureMode, stream);
|
||||
@@ -134,10 +130,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") {
|
||||
|
||||
SECTION("Branched graph capture") {
|
||||
captureStreamAndLaunchGraph<float>(
|
||||
[&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h,
|
||||
float* B_d, size_t N, hipStream_t stream) {
|
||||
captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream,
|
||||
streams_guard.stream_list(), events_guard.event_list());
|
||||
[&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N,
|
||||
hipStream_t stream) {
|
||||
captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(),
|
||||
events_guard.event_list());
|
||||
},
|
||||
captureMode, stream);
|
||||
}
|
||||
@@ -173,8 +169,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") {
|
||||
hipErrorIllegalState);
|
||||
}
|
||||
SECTION("Creating hipStream with invalid mode") {
|
||||
HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)),
|
||||
hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue);
|
||||
}
|
||||
#if HT_NVIDIA // EXSWHTEC-216
|
||||
SECTION("Stream capture on uninitialized stream returns error code.") {
|
||||
@@ -182,8 +177,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") {
|
||||
StreamGuard sg(Streams::created);
|
||||
return sg.stream();
|
||||
};
|
||||
HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(),
|
||||
hipStreamCaptureModeGlobal),
|
||||
HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), hipStreamCaptureModeGlobal),
|
||||
hipErrorContextIsDestroyed);
|
||||
}
|
||||
#endif
|
||||
@@ -207,8 +201,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") {
|
||||
StreamGuard stream_guard(stream_type);
|
||||
hipStream_t s = stream_guard.stream();
|
||||
|
||||
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
|
||||
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(s, captureMode));
|
||||
|
||||
@@ -218,8 +212,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") {
|
||||
|
||||
/* Local function for inter stream event synchronization
|
||||
*/
|
||||
static void interStrmEventSyncCapture(const hipStream_t& stream1,
|
||||
const hipStream_t& stream2) {
|
||||
static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) {
|
||||
hipGraph_t graph1{nullptr}, graph2{nullptr};
|
||||
hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr};
|
||||
|
||||
@@ -266,8 +259,7 @@ static void interStrmEventSyncCapture(const hipStream_t& stream1,
|
||||
|
||||
/* Local function for colligated stream capture
|
||||
*/
|
||||
static void colligatedStrmCapture(const hipStream_t& stream1,
|
||||
const hipStream_t& stream2) {
|
||||
static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) {
|
||||
hipGraph_t graph1{nullptr}, graph2{nullptr};
|
||||
hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr};
|
||||
|
||||
@@ -310,8 +302,7 @@ static void colligatedStrmCapture(const hipStream_t& stream1,
|
||||
|
||||
/* Local function for colligated stream capture functionality
|
||||
*/
|
||||
static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
|
||||
const hipStream_t& stream2) {
|
||||
static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) {
|
||||
constexpr size_t N = 1000000;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
|
||||
@@ -331,10 +322,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
|
||||
// Capture 2 streams
|
||||
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal));
|
||||
captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(),
|
||||
N, stream1);
|
||||
captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(),
|
||||
N, stream2);
|
||||
captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1);
|
||||
captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2);
|
||||
captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1);
|
||||
captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2);
|
||||
HIP_CHECK(hipStreamEndCapture(stream1, &graph1));
|
||||
@@ -370,9 +359,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1,
|
||||
|
||||
/* Stream Capture thread function
|
||||
*/
|
||||
static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d,
|
||||
int* B_h, int* B_d, hipGraph_t* graph,
|
||||
size_t N, hipStreamCaptureMode mode) {
|
||||
static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d,
|
||||
hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) {
|
||||
// Capture stream
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, mode));
|
||||
captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream);
|
||||
@@ -404,10 +392,10 @@ static void multithreadedTest(hipStreamCaptureMode mode) {
|
||||
LinearAllocGuard<int> D_d(LinearAllocs::hipMalloc, Nbytes);
|
||||
|
||||
// Launch 2 threads to capture the 2 streams into graphs
|
||||
std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(),
|
||||
B_h.host_ptr(), B_d.ptr(), &graph1, N, mode);
|
||||
std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(),
|
||||
D_h.host_ptr(), D_d.ptr(), &graph2, N, mode);
|
||||
std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(),
|
||||
B_d.ptr(), &graph1, N, mode);
|
||||
std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(),
|
||||
D_d.ptr(), &graph2, N, mode);
|
||||
t1.join();
|
||||
t2.join();
|
||||
|
||||
@@ -480,11 +468,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") {
|
||||
TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") {
|
||||
int minPriority = 0, maxPriority = 0;
|
||||
HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority));
|
||||
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault,
|
||||
minPriority);
|
||||
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority);
|
||||
hipStream_t stream1 = stream_guard1.stream();
|
||||
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault,
|
||||
maxPriority);
|
||||
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority);
|
||||
hipStream_t stream2 = stream_guard2.stream();
|
||||
interStrmEventSyncCapture(stream1, stream2);
|
||||
}
|
||||
@@ -533,11 +519,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") {
|
||||
TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") {
|
||||
int minPriority = 0, maxPriority = 0;
|
||||
HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority));
|
||||
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault,
|
||||
minPriority);
|
||||
StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority);
|
||||
hipStream_t stream1 = stream_guard1.stream();
|
||||
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault,
|
||||
maxPriority);
|
||||
StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority);
|
||||
hipStream_t stream2 = stream_guard2.stream();
|
||||
colligatedStrmCapture(stream1, stream2);
|
||||
}
|
||||
@@ -578,8 +562,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") {
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") {
|
||||
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
|
||||
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
multithreadedTest(captureMode);
|
||||
}
|
||||
|
||||
@@ -708,8 +692,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") {
|
||||
HIP_CHECK(hipEventRecord(events[2], streams[2]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault,
|
||||
streams[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture
|
||||
// Reset device memory
|
||||
HIP_CHECK(hipMemset(devMem, 0, sizeof(int)));
|
||||
@@ -751,8 +734,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") {
|
||||
dummyKernel<<<1, 1, 0, streams[0]>>>();
|
||||
// Since stream[1] is already in capture mode due to event wait
|
||||
// hipStreamBeginCapture on stream[1] is expected to return error.
|
||||
HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1],
|
||||
hipStreamCaptureModeGlobal),
|
||||
HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal),
|
||||
hipErrorIllegalState);
|
||||
}
|
||||
|
||||
@@ -785,8 +767,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal));
|
||||
for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem);
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault,
|
||||
stream1));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1));
|
||||
HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i]));
|
||||
}
|
||||
// Instantiate and execute all graphs
|
||||
@@ -825,22 +806,19 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") {
|
||||
EventsGuard events_guard(1);
|
||||
hipEvent_t e = events_guard[0];
|
||||
|
||||
const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal,
|
||||
hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
const hipStreamCaptureMode captureMode = GENERATE(
|
||||
hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
SECTION("Synchronize stream during capture") {
|
||||
HIP_CHECK_ERROR(hipStreamSynchronize(stream),
|
||||
hipErrorStreamCaptureUnsupported);
|
||||
HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
SECTION("Query stream during capture") {
|
||||
HIP_CHECK_ERROR(hipStreamQuery(stream),
|
||||
hipErrorStreamCaptureUnsupported);
|
||||
HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
#if HT_NVIDIA
|
||||
SECTION("Synchronize device during capture") {
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(),
|
||||
hipErrorStreamCaptureUnsupported);
|
||||
HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
SECTION("Synchronize event during capture") {
|
||||
HIP_CHECK(hipEventRecord(e, stream));
|
||||
@@ -884,17 +862,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") {
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode));
|
||||
SECTION("hipMalloc during capture") {
|
||||
HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)),
|
||||
hipErrorStreamCaptureUnsupported);
|
||||
HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
SECTION("hipMemcpy during capture") {
|
||||
HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int),
|
||||
hipMemcpyHostToDevice),
|
||||
HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice),
|
||||
hipErrorStreamCaptureImplicit);
|
||||
}
|
||||
SECTION("hipMemset during capture") {
|
||||
HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)),
|
||||
hipErrorStreamCaptureImplicit);
|
||||
HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -931,8 +906,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
|
||||
HIP_CHECK(hipEventRecord(e, stream1));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
|
||||
dummyKernel<<<1, 1, 0, stream2>>>();
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph),
|
||||
hipErrorStreamCaptureUnjoined);
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined);
|
||||
}
|
||||
SECTION("End strm capture when forked strm still has operations") {
|
||||
EventsGuard events_guard(2);
|
||||
@@ -946,8 +920,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") {
|
||||
HIP_CHECK(hipEventRecord(e2, stream2));
|
||||
HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0));
|
||||
dummyKernel<<<1, 1, 0, stream2>>>();
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph),
|
||||
hipErrorStreamCaptureUnjoined);
|
||||
HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined);
|
||||
}
|
||||
}
|
||||
/**
|
||||
@@ -970,19 +943,17 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
|
||||
SUCCEED("skipping the testcases as numDevices < 2");
|
||||
return;
|
||||
}
|
||||
hipStream_t* stream = reinterpret_cast<hipStream_t*>
|
||||
(malloc(devcount * sizeof(hipStream_t)));
|
||||
hipStream_t* stream = reinterpret_cast<hipStream_t*>(malloc(devcount * sizeof(hipStream_t)));
|
||||
REQUIRE(stream != nullptr);
|
||||
hipGraph_t* graph = reinterpret_cast<hipGraph_t*>
|
||||
(malloc(devcount * sizeof(hipGraph_t)));
|
||||
hipGraph_t* graph = reinterpret_cast<hipGraph_t*>(malloc(devcount * sizeof(hipGraph_t)));
|
||||
REQUIRE(graph != nullptr);
|
||||
int **devMem{nullptr}, **hostMem{nullptr};
|
||||
hostMem = reinterpret_cast<int**>(malloc(sizeof(int*) * devcount));
|
||||
REQUIRE(hostMem != nullptr);
|
||||
devMem = reinterpret_cast<int**>(malloc(sizeof(int*) * devcount));
|
||||
REQUIRE(devMem != nullptr);
|
||||
hipGraphExec_t* graphExec = reinterpret_cast<hipGraphExec_t*>
|
||||
(malloc(devcount * sizeof(hipGraphExec_t)));
|
||||
hipGraphExec_t* graphExec =
|
||||
reinterpret_cast<hipGraphExec_t*>(malloc(devcount * sizeof(hipGraphExec_t)));
|
||||
// Capture stream in each device
|
||||
for (int dev = 0; dev < devcount; dev++) {
|
||||
HIP_CHECK(hipSetDevice(dev));
|
||||
@@ -994,15 +965,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") {
|
||||
for (int i = 0; i < (dev + 1); i++) {
|
||||
incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]);
|
||||
}
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int),
|
||||
hipMemcpyDefault, stream[dev]));
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev]));
|
||||
HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev]));
|
||||
}
|
||||
// Launch the captured graphs in the respective device
|
||||
for (int dev = 0; dev < devcount; dev++) {
|
||||
HIP_CHECK(hipSetDevice(dev));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr,
|
||||
nullptr, 0));
|
||||
HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0));
|
||||
HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev]));
|
||||
}
|
||||
// Validate output
|
||||
@@ -1069,8 +1039,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") {
|
||||
HIP_CHECK(hipEventRecord(events[3], streams[2]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int),
|
||||
hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault,
|
||||
streams[0]));
|
||||
HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture
|
||||
// Reset device memory
|
||||
HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int)));
|
||||
@@ -1108,23 +1078,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
|
||||
hipGraph_t graphs[3];
|
||||
StreamsGuard streams(3);
|
||||
EventsGuard events(4);
|
||||
LinearAllocGuard<int> hostMem_g1 = LinearAllocGuard<int>
|
||||
(LinearAllocs::malloc, sizeof(int));
|
||||
LinearAllocGuard<int> hostMem_g2 = LinearAllocGuard<int>
|
||||
(LinearAllocs::malloc, sizeof(int));
|
||||
LinearAllocGuard<int> hostMem_g3 = LinearAllocGuard<int>
|
||||
(LinearAllocs::malloc, sizeof(int));
|
||||
LinearAllocGuard<int> devMem_g1 = LinearAllocGuard<int>
|
||||
(LinearAllocs::hipMalloc, sizeof(int));
|
||||
LinearAllocGuard<int> devMem_g2 = LinearAllocGuard<int>
|
||||
(LinearAllocs::hipMalloc, sizeof(int));
|
||||
LinearAllocGuard<int> devMem_g3 = LinearAllocGuard<int>
|
||||
(LinearAllocs::hipMalloc, sizeof(int));
|
||||
LinearAllocGuard<int> hostMem_g1 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
|
||||
LinearAllocGuard<int> hostMem_g2 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
|
||||
LinearAllocGuard<int> hostMem_g3 = LinearAllocGuard<int>(LinearAllocs::malloc, sizeof(int));
|
||||
LinearAllocGuard<int> devMem_g1 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
|
||||
LinearAllocGuard<int> devMem_g2 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
|
||||
LinearAllocGuard<int> devMem_g3 = LinearAllocGuard<int>(LinearAllocs::hipMalloc, sizeof(int));
|
||||
|
||||
std::vector<int*> hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(),
|
||||
hostMem_g3.host_ptr()};
|
||||
std::vector<int*> devMem = {devMem_g1.ptr(), devMem_g2.ptr(),
|
||||
devMem_g3.ptr()};
|
||||
std::vector<int*> hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()};
|
||||
std::vector<int*> devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()};
|
||||
// Create a device memory of size int and initialize it to 0
|
||||
for (int i = 0; i < 3; i++) {
|
||||
memset(hostMem[i], 0, sizeof(int));
|
||||
@@ -1148,16 +1110,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
|
||||
HIP_CHECK(hipEventRecord(events[3], streams[2]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int),
|
||||
hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture
|
||||
// Start capturing graph2 from stream 2
|
||||
HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal));
|
||||
incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
|
||||
incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
|
||||
incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]);
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int),
|
||||
hipMemcpyDefault, streams[1]));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1]));
|
||||
HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture
|
||||
// Start capturing graph3 from stream 3
|
||||
HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal));
|
||||
@@ -1166,8 +1126,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") {
|
||||
incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
|
||||
incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
|
||||
incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]);
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int),
|
||||
hipMemcpyDefault, streams[2]));
|
||||
HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2]));
|
||||
HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture
|
||||
// Reset device memory
|
||||
HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int)));
|
||||
@@ -1211,40 +1170,32 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
|
||||
EventsGuard events(7);
|
||||
// Allocate Device memory and Host memory
|
||||
size_t N = GRIDSIZE * BLOCKSIZE;
|
||||
LinearAllocGuard<int> Ah = LinearAllocGuard<int>
|
||||
(LinearAllocs::malloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Bh = LinearAllocGuard<int>
|
||||
(LinearAllocs::malloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Ch = LinearAllocGuard<int>
|
||||
(LinearAllocs::malloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Ad = LinearAllocGuard<int>
|
||||
(LinearAllocs::hipMalloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Bd = LinearAllocGuard<int>
|
||||
(LinearAllocs::hipMalloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Ch = LinearAllocGuard<int>(LinearAllocs::malloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc, N * sizeof(int));
|
||||
LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc, N * sizeof(int));
|
||||
|
||||
// Capture streams into graph
|
||||
HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipEventRecord(events[0], streams[0]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)),
|
||||
hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)),
|
||||
hipMemcpyDefault, streams[4]));
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4]));
|
||||
hipHostFn_t fn = hostNodeCallback;
|
||||
HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr));
|
||||
HIP_CHECK(hipEventRecord(events[1], streams[0]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0));
|
||||
int* Ad_2nd_half = Ad.ptr() + N / 2;
|
||||
int* Ad_1st_half = Ad.ptr();
|
||||
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[0]>>>(Ad_2nd_half,
|
||||
CONST_KER2_VAL);
|
||||
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half,
|
||||
CONST_KER1_VAL);
|
||||
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[0]>>>(Ad_2nd_half, CONST_KER2_VAL);
|
||||
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half, CONST_KER1_VAL);
|
||||
HIP_CHECK(hipEventRecord(events[2], streams[1]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0));
|
||||
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half,
|
||||
CONST_KER3_VAL);
|
||||
mymul<<<GRIDSIZE / 2, BLOCKSIZE, 0, streams[1]>>>(Ad_1st_half, CONST_KER3_VAL);
|
||||
HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr));
|
||||
HIP_CHECK(hipEventRecord(events[6], streams[1]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0));
|
||||
@@ -1255,8 +1206,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0));
|
||||
HIP_CHECK(hipEventRecord(events[4], streams[3]));
|
||||
HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0));
|
||||
HIP_CHECK(hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)),
|
||||
hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0]));
|
||||
HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture
|
||||
// Execute and test the graph
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
@@ -1269,11 +1220,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") {
|
||||
HIP_CHECK(hipStreamSynchronize(streams[0]));
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (i > (N / 2 - 1)) {
|
||||
REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] +
|
||||
Ah.host_ptr()[i] * CONST_KER2_VAL));
|
||||
REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL));
|
||||
} else {
|
||||
REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] +
|
||||
Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL));
|
||||
REQUIRE(Ch.host_ptr()[i] ==
|
||||
(Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1340,14 +1290,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
hipGraph_t graph{nullptr};
|
||||
hipGraphExec_t graphExec{nullptr};
|
||||
// Allocate device memory
|
||||
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Ad =
|
||||
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bd =
|
||||
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
|
||||
// Fill input data
|
||||
std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1);
|
||||
std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2);
|
||||
@@ -1357,10 +1305,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
SECTION("Stream Creation Before Capture") {
|
||||
StreamsGuard stream1(1);
|
||||
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1[0]));
|
||||
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
|
||||
HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture
|
||||
@@ -1368,10 +1316,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
SECTION("Synchronizing multiple streams during Capture") {
|
||||
StreamsGuard stream1(1), stream2(1);
|
||||
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream2[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream2[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream2[0]));
|
||||
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
|
||||
@@ -1380,20 +1328,20 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
SECTION("Stream Creation After Capture") {
|
||||
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
|
||||
StreamsGuard stream1(1);
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1[0]));
|
||||
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
|
||||
HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture
|
||||
}
|
||||
SECTION("Stream Synchronize Before Capture") {
|
||||
StreamsGuard stream1(1);
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1[0]));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream0[0], flag));
|
||||
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
|
||||
@@ -1404,10 +1352,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
myadd<<<GRIDSIZE, BLOCKSIZE, 0, stream0[0]>>>(Ad.ptr(), Bd.ptr());
|
||||
HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture
|
||||
StreamsGuard stream1(1);
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream1[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1[0]));
|
||||
}
|
||||
// Execute and test the graph
|
||||
@@ -1415,8 +1363,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream0[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream0[0]));
|
||||
// Check output
|
||||
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost));
|
||||
for (int idx = 0; idx < BLOCKSIZE; idx++) {
|
||||
REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2));
|
||||
}
|
||||
@@ -1437,20 +1384,16 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") {
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
// Local function executed as thread
|
||||
static void strmSyncThread(int *Ah, int *Ad, int *Bh, int *Bd,
|
||||
int BLOCKSIZE, hipError_t *error) {
|
||||
static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) {
|
||||
StreamsGuard stream(1);
|
||||
HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDefault, stream[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0]));
|
||||
*error = hipStreamSynchronize(stream[0]);
|
||||
}
|
||||
|
||||
// Local function executed as thread
|
||||
static void captureStrmThread(hipGraph_t *graph, int *Ah, int *Ad,
|
||||
int *Bh, int *Bd, int BLOCKSIZE, int GRIDSIZE,
|
||||
hipStreamCaptureMode flag, hipError_t *error) {
|
||||
static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE,
|
||||
int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) {
|
||||
StreamsGuard stream(1);
|
||||
// Capture streams into graph
|
||||
HIP_CHECK(hipStreamBeginCapture(stream[0], flag));
|
||||
@@ -1466,14 +1409,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
|
||||
constexpr int VALUE1 = 7, VALUE2 = 11;
|
||||
hipGraph_t graph{nullptr};
|
||||
// Allocate device memory
|
||||
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Ad = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bd = LinearAllocGuard<int>(LinearAllocs::hipMalloc,
|
||||
BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Ah = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Ad =
|
||||
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bh = LinearAllocGuard<int>(LinearAllocs::malloc, BLOCKSIZE * sizeof(int));
|
||||
LinearAllocGuard<int> Bd =
|
||||
LinearAllocGuard<int>(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int));
|
||||
// Fill input data
|
||||
std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1);
|
||||
std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2);
|
||||
@@ -1483,10 +1424,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
|
||||
StreamsGuard stream(2);
|
||||
// Capture streams into graph
|
||||
HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(),
|
||||
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(),
|
||||
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream[1]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream[1]));
|
||||
error = hipStreamSynchronize(stream[1]);
|
||||
REQUIRE(error == hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
@@ -1494,34 +1435,30 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
|
||||
SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") {
|
||||
StreamsGuard stream(2);
|
||||
// Capture streams into graph
|
||||
HIP_CHECK(hipStreamBeginCapture(stream[0],
|
||||
hipStreamCaptureModeThreadLocal));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(),
|
||||
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(),
|
||||
BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1]));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal));
|
||||
HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream[1]));
|
||||
HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault,
|
||||
stream[1]));
|
||||
error = hipStreamSynchronize(stream[1]);
|
||||
REQUIRE(error == hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
#endif
|
||||
#if HT_AMD
|
||||
SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") {
|
||||
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
|
||||
Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
|
||||
hipStreamCaptureModeGlobal, &error);
|
||||
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
|
||||
hipStreamCaptureModeGlobal, &error);
|
||||
REQUIRE(error == hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
#endif
|
||||
SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") {
|
||||
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
|
||||
Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
|
||||
hipStreamCaptureModeThreadLocal, &error);
|
||||
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
|
||||
hipStreamCaptureModeThreadLocal, &error);
|
||||
REQUIRE(error == hipSuccess);
|
||||
}
|
||||
SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") {
|
||||
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(),
|
||||
Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
|
||||
hipStreamCaptureModeRelaxed, &error);
|
||||
captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE,
|
||||
hipStreamCaptureModeRelaxed, &error);
|
||||
REQUIRE(error == hipSuccess);
|
||||
}
|
||||
if (graph != nullptr) {
|
||||
@@ -1532,8 +1469,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") {
|
||||
HIP_CHECK(hipGraphLaunch(graphExec, stream[0]));
|
||||
HIP_CHECK(hipStreamSynchronize(stream[0]));
|
||||
// Check output
|
||||
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost));
|
||||
for (int idx = 0; idx < BLOCKSIZE; idx++) {
|
||||
REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2));
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
@@ -19,7 +19,6 @@ THE SOFTWARE.
|
||||
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
@@ -20,7 +20,6 @@ THE SOFTWARE.
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
@@ -367,7 +366,7 @@ TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") {
|
||||
const hipStreamUpdateCaptureDependenciesFlags flag =
|
||||
GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies);
|
||||
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); //hipStreamCaptureModeGlobal));
|
||||
HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); // hipStreamCaptureModeGlobal));
|
||||
|
||||
HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag));
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ THE SOFTWARE.
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#include "stream_capture_common.hh"
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ THE SOFTWARE.
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
#define WIDTH 4
|
||||
|
||||
@@ -32,20 +31,17 @@ THE SOFTWARE.
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
template <typename T>
|
||||
__global__ void matrixTranspose(T* out, T* in, const int width) {
|
||||
template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
T val = in[x];
|
||||
for (int i = 0; i < width; i++) {
|
||||
for (int j = 0; j < width; j++)
|
||||
out[i * width + j] = __shfl(val, j * width + i);
|
||||
for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
|
||||
}
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
template <typename T>
|
||||
void matrixTransposeCPUReference(T* output,
|
||||
T* input, const unsigned int width) {
|
||||
void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
@@ -54,61 +50,52 @@ void matrixTransposeCPUReference(T* output,
|
||||
}
|
||||
|
||||
static void getFactor(int* fact) { *fact = 101; }
|
||||
static void getFactor(unsigned int* fact) {
|
||||
*fact = static_cast<unsigned int>(INT32_MAX)+1;
|
||||
}
|
||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||
static void getFactor(float* fact) { *fact = 2.5; }
|
||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||
static void getFactor(double* fact) { *fact = 2.5; }
|
||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||
static void getFactor(uint64_t* fact) {
|
||||
*fact = static_cast<uint64_t>(__LONG_LONG_MAX__)+1;
|
||||
}
|
||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||
|
||||
template <typename T>
|
||||
int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
|
||||
template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <>
|
||||
int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
|
||||
if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void init(T* Matrix) {
|
||||
template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
|
||||
int errors = 0;
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
template <typename T> void init(T* Matrix) {
|
||||
// initialize the input data
|
||||
T factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (T)i + factor;
|
||||
Matrix[i] = (T)i + factor;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void init(__half* Matrix) {
|
||||
template <> void init(__half* Matrix) {
|
||||
// initialize the input data
|
||||
__half factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < NUM; i++) {
|
||||
Matrix[i] = i + __half2float(factor);
|
||||
Matrix[i] = i + __half2float(factor);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void runTest() {
|
||||
template <typename T> static void runTest() {
|
||||
T* Matrix;
|
||||
T* TransposeMatrix;
|
||||
T* cpuTransposeMatrix;
|
||||
@@ -129,21 +116,17 @@ static void runTest() {
|
||||
|
||||
// allocate the memory on the device side
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix),
|
||||
NUM * sizeof(T)));
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
|
||||
|
||||
// Memory transfer from host to device
|
||||
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T),
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1),
|
||||
dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
|
||||
0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix,
|
||||
NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
@@ -183,26 +166,12 @@ static void runTest() {
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_hipShflTests") {
|
||||
SECTION("run test for int") {
|
||||
runTest<int>();
|
||||
}
|
||||
SECTION("run test for float") {
|
||||
runTest<float>();
|
||||
}
|
||||
SECTION("run test for double") {
|
||||
runTest<double>();
|
||||
}
|
||||
SECTION("run test for int") { runTest<int>(); }
|
||||
SECTION("run test for float") { runTest<float>(); }
|
||||
SECTION("run test for double") { runTest<double>(); }
|
||||
// Test added to support half datatype.
|
||||
SECTION("run test for __half") {
|
||||
runTest<__half>();
|
||||
}
|
||||
SECTION("run test for int64_t") {
|
||||
runTest<int64_t>();
|
||||
}
|
||||
SECTION("run test for unsigned int") {
|
||||
runTest<unsigned int>();
|
||||
}
|
||||
SECTION("run test for uint64_t") {
|
||||
runTest<uint64_t>();
|
||||
}
|
||||
SECTION("run test for __half") { runTest<__half>(); }
|
||||
SECTION("run test for int64_t") { runTest<int64_t>(); }
|
||||
SECTION("run test for unsigned int") { runTest<unsigned int>(); }
|
||||
SECTION("run test for uint64_t") { runTest<uint64_t>(); }
|
||||
}
|
||||
|
||||
@@ -21,12 +21,10 @@ THE SOFTWARE.
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <hip_test_defgroups.hh>
|
||||
|
||||
const int size = 32;
|
||||
|
||||
template <typename T>
|
||||
__global__ void shflDownSum(T* a, int size) {
|
||||
template <typename T> __global__ void shflDownSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_down(val, i, size);
|
||||
@@ -34,8 +32,7 @@ __global__ void shflDownSum(T* a, int size) {
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void shflUpSum(T* a, int size) {
|
||||
template <typename T> __global__ void shflUpSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_up(val, i, size);
|
||||
@@ -43,34 +40,29 @@ __global__ void shflUpSum(T* a, int size) {
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void shflXorSum(T* a, int size) {
|
||||
template <typename T> __global__ void shflXorSum(T* a, int size) {
|
||||
T val = a[threadIdx.x];
|
||||
for (int i = size/2; i > 0; i /= 2) {
|
||||
for (int i = size / 2; i > 0; i /= 2) {
|
||||
val += __shfl_xor(val, i, size);
|
||||
}
|
||||
a[threadIdx.x] = val;
|
||||
}
|
||||
|
||||
static void getFactor(int* fact) { *fact = 101; }
|
||||
static void getFactor(unsigned int* fact) {
|
||||
*fact = static_cast<unsigned int>(INT32_MAX)+1;
|
||||
}
|
||||
static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
|
||||
static void getFactor(float* fact) { *fact = 2.5; }
|
||||
static void getFactor(double* fact) { *fact = 2.5; }
|
||||
static void getFactor(__half* fact) { *fact = 2.5; }
|
||||
static void getFactor(int64_t* fact) { *fact = 303; }
|
||||
static void getFactor(uint64_t* fact) {
|
||||
*fact = static_cast<uint64_t>(__LONG_LONG_MAX__)+1;
|
||||
}
|
||||
static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
|
||||
|
||||
template <typename T> T sum(T* a) {
|
||||
T cpuSum = 0;
|
||||
T factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = i + factor;
|
||||
cpuSum += a[i];
|
||||
a[i] = i + factor;
|
||||
cpuSum += a[i];
|
||||
}
|
||||
return cpuSum;
|
||||
}
|
||||
@@ -80,8 +72,8 @@ template <> __half sum(__half* a) {
|
||||
__half factor;
|
||||
getFactor(&factor);
|
||||
for (int i = 0; i < size; i++) {
|
||||
a[i] = i + __half2float(factor);
|
||||
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
|
||||
a[i] = i + __half2float(factor);
|
||||
cpuSum = __half2float(cpuSum) + __half2float(a[i]);
|
||||
}
|
||||
return cpuSum;
|
||||
}
|
||||
@@ -100,8 +92,7 @@ template <> bool compare(__half gpuSum, __half cpuSum) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void runTestShflUp() {
|
||||
template <typename T> static void runTestShflUp() {
|
||||
const int size = 32;
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
@@ -114,8 +105,7 @@ static void runTestShflUp() {
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void runTestShflDown() {
|
||||
template <typename T> static void runTestShflDown() {
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
@@ -127,8 +117,7 @@ static void runTestShflDown() {
|
||||
HIP_CHECK(hipFree(d_a));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void runTestShflXor() {
|
||||
template <typename T> static void runTestShflXor() {
|
||||
T a[size];
|
||||
T cpuSum = sum(a);
|
||||
T* d_a;
|
||||
@@ -141,12 +130,12 @@ static void runTestShflXor() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_up function
|
||||
*/
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_up function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
@@ -164,27 +153,13 @@ static void runTestShflXor() {
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_up") {
|
||||
SECTION("runTestShflUp for int") {
|
||||
runTestShflUp<int>();
|
||||
}
|
||||
SECTION("runTestShflUp for float") {
|
||||
runTestShflUp<float>();
|
||||
}
|
||||
SECTION("runTestShflUp for double") {
|
||||
runTestShflUp<double>();
|
||||
}
|
||||
SECTION("runTestShflUp for __half") {
|
||||
runTestShflUp<__half>();
|
||||
}
|
||||
SECTION("runTestShflUp for int64_t") {
|
||||
runTestShflUp<int64_t>();
|
||||
}
|
||||
SECTION("runTestShflUp for unsigned int") {
|
||||
runTestShflUp<unsigned int>();
|
||||
}
|
||||
SECTION("runTestShflUp for uint64_t") {
|
||||
runTestShflUp<uint64_t>();
|
||||
}
|
||||
SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
|
||||
SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
|
||||
SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
|
||||
SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
|
||||
SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
|
||||
SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
|
||||
SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
@@ -192,12 +167,12 @@ TEST_CASE("Unit_runTestShfl_up") {
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_down function
|
||||
*/
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
|
||||
* Contains warp __shfl_down function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
@@ -215,27 +190,13 @@ TEST_CASE("Unit_runTestShfl_up") {
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_Down") {
|
||||
SECTION("runTestShflDown for int") {
|
||||
runTestShflDown<int>();
|
||||
}
|
||||
SECTION("runTestShflDown for float") {
|
||||
runTestShflDown<float>();
|
||||
}
|
||||
SECTION("runTestShflDown for double") {
|
||||
runTestShflDown<double>();
|
||||
}
|
||||
SECTION("runTestShflDown for __half") {
|
||||
runTestShflDown<__half>();
|
||||
}
|
||||
SECTION("runTestShflDown for int64_t") {
|
||||
runTestShflDown<int64_t>();
|
||||
}
|
||||
SECTION("runTestShflDown for unsigned int") {
|
||||
runTestShflDown<unsigned int>();
|
||||
}
|
||||
SECTION("runTestShflDown for uint64_t") {
|
||||
runTestShflDown<uint64_t>();
|
||||
}
|
||||
SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
|
||||
SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
|
||||
SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
|
||||
SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
|
||||
SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
|
||||
SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
|
||||
SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
@@ -243,12 +204,12 @@ TEST_CASE("Unit_runTestShfl_Down") {
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
|
||||
* Contains warp __shfl_xor function
|
||||
*/
|
||||
* @addtogroup __shfl __shfl
|
||||
* @{
|
||||
* @ingroup ShflTest
|
||||
* `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
|
||||
* Contains warp __shfl_xor function
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
@@ -266,27 +227,13 @@ TEST_CASE("Unit_runTestShfl_Down") {
|
||||
*/
|
||||
|
||||
TEST_CASE("Unit_runTestShfl_Xor") {
|
||||
SECTION("runTestShflXor for int") {
|
||||
runTestShflXor<int>();
|
||||
}
|
||||
SECTION("runTestShflXor for float") {
|
||||
runTestShflXor<float>();
|
||||
}
|
||||
SECTION("runTestShflXor for double") {
|
||||
runTestShflXor<double>();
|
||||
}
|
||||
SECTION("runTestShflXor for __half") {
|
||||
runTestShflXor<__half>();
|
||||
}
|
||||
SECTION("runTestShflXor for int64_t") {
|
||||
runTestShflXor<int64_t>();
|
||||
}
|
||||
SECTION("runTestShflXor for unsigned int") {
|
||||
runTestShflXor<unsigned int>();
|
||||
}
|
||||
SECTION("runTestShflXor for uint64_t") {
|
||||
runTestShflXor<uint64_t>();
|
||||
}
|
||||
SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
|
||||
SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
|
||||
SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
|
||||
SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
|
||||
SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
|
||||
SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
|
||||
SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
|
||||
}
|
||||
/**
|
||||
* End doxygen group __shfl.
|
||||
|
||||
@@ -20,7 +20,6 @@ THE SOFTWARE.
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_defgroups.hh>
|
||||
#define NUMBER_OF_THREADS 10
|
||||
static bool thread_results[NUMBER_OF_THREADS];
|
||||
|
||||
@@ -54,8 +53,7 @@ TEST_CASE("Unit_hipStreamGetDevice_Negative") {
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
HIP_CHECK_ERROR(hipStreamGetDevice(nullptr, nullptr), hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr),
|
||||
hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), hipErrorInvalidValue);
|
||||
HIP_CHECK_ERROR(hipStreamGetDevice(stream, nullptr), hipErrorInvalidValue);
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
@@ -145,9 +143,7 @@ static bool validateStreamGetDevice() {
|
||||
return true;
|
||||
}
|
||||
|
||||
static void thread_Test(int threadNum) {
|
||||
thread_results[threadNum] = validateStreamGetDevice();
|
||||
}
|
||||
static void thread_Test(int threadNum) { thread_results[threadNum] = validateStreamGetDevice(); }
|
||||
|
||||
static bool test_hipStreamGetDevice_MThread() {
|
||||
std::vector<std::thread> tests;
|
||||
@@ -158,7 +154,7 @@ static bool test_hipStreamGetDevice_MThread() {
|
||||
tests.push_back(std::thread(thread_Test, idx));
|
||||
}
|
||||
// Wait for all threads to complete
|
||||
for (std::thread &t : tests) {
|
||||
for (std::thread& t : tests) {
|
||||
t.join();
|
||||
}
|
||||
// Wait for thread
|
||||
@@ -169,9 +165,7 @@ static bool test_hipStreamGetDevice_MThread() {
|
||||
return status;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipStreamGetDevice_MThread") {
|
||||
REQUIRE(true == test_hipStreamGetDevice_MThread());
|
||||
}
|
||||
TEST_CASE("Unit_hipStreamGetDevice_MThread") { REQUIRE(true == test_hipStreamGetDevice_MThread()); }
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
|
||||
Reference in New Issue
Block a user