diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index 43bbdd1c86..1f38291d20 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -309,6 +309,8 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_Assert_Positive_Basic_KernelFail", #endif #if defined VEGA20 "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index d7229c1927..74c5bca32a 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -440,6 +440,11 @@ "Unit_Thread_Block_Tile_Shfl_Positive_Basic - float", "Unit_Thread_Block_Tile_Shfl_Positive_Basic - double", "Unit_Thread_Block_Tile_Getters_Positive_Basic", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_StaticAssert_Positive_Basic_RTC", + "Unit_Assert_Positive_Basic_KernelFail", + "Unit_StaticAssert_Positive_Basic", + "Unit_StaticAssert_Negative_Basic", #endif "End of json" ] diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json index 3fdf6d03a6..ada918a267 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json @@ -89,6 +89,9 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior", - "Unit_hipMemcpy_Positive_Synchronization_Behavior" + "Unit_hipMemcpy_Positive_Synchronization_Behavior", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_StaticAssert_Positive_Basic", + "Unit_StaticAssert_Negative_Basic" ] } diff --git a/projects/hip-tests/catch/include/hip_test_common.hh b/projects/hip-tests/catch/include/hip_test_common.hh index 147abe0941..c2d19650bf 100644 --- a/projects/hip-tests/catch/include/hip_test_common.hh +++ b/projects/hip-tests/catch/include/hip_test_common.hh @@ -129,6 +129,19 @@ THE SOFTWARE. } \ } +// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError. +#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \ + { \ + auto localError = errorExpr; \ + INFO("Matching Errors: " \ + << "\n Expected Error: " << hiprtcGetErrorString(expectedError) \ + << "\n Expected Code: " << expectedError << '\n' \ + << " Actual Error: " << hiprtcGetErrorString(localError) \ + << "\n Actual Code: " << localError << "\nStr: " << #errorExpr \ + << "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \ + REQUIRE(localError == expectedError); \ + } + #define HIPASSERT(condition) \ if (!(condition)) { \ printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \ diff --git a/projects/hip-tests/catch/unit/CMakeLists.txt b/projects/hip-tests/catch/unit/CMakeLists.txt index 9d158dad4d..46f79b4d1d 100644 --- a/projects/hip-tests/catch/unit/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/CMakeLists.txt @@ -59,3 +59,4 @@ add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246 endif() add_subdirectory(synchronization) add_subdirectory(launchBounds) +add_subdirectory(assertion) \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/assertion/CMakeLists.txt b/projects/hip-tests/catch/unit/assertion/CMakeLists.txt new file mode 100644 index 0000000000..f7b38de221 --- /dev/null +++ b/projects/hip-tests/catch/unit/assertion/CMakeLists.txt @@ -0,0 +1,48 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(HIP_PLATFORM MATCHES "nvidia") + set(TEST_SRC + assert.cc + ) + hip_add_exe_to_target(NAME AssertionTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + static_assert.cc + assert.cc + ) + hip_add_exe_to_target(NAME AssertionTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +add_test(NAME Unit_StaticAssert_Positive_Basic + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + static_assert_kernels_positive.cc 2) + +add_test(NAME Unit_StaticAssert_Negative_Basic + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + static_assert_kernels_negative.cc 2) diff --git a/projects/hip-tests/catch/unit/assertion/assert.cc b/projects/hip-tests/catch/unit/assertion/assert.cc new file mode 100644 index 0000000000..1be0569f2a --- /dev/null +++ b/projects/hip-tests/catch/unit/assertion/assert.cc @@ -0,0 +1,118 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup assert assert + * @{ + * @ingroup DeviceLanguageTest + * `void assert(int expression)` - + * Stops the kernel execution if expression is equal to zero. + */ + +jmp_buf env_ignore_abort; +volatile int abort_raised_flag = 0; + +void on_sigabrt(int signum) { + signal(signum, SIG_DFL); + abort_raised_flag = 1; + longjmp(env_ignore_abort, 1); +} + +void try_and_catch_abort(void (*func)()) { + if (!setjmp(env_ignore_abort)) { + signal(SIGABRT, &on_sigabrt); + (*func)(); + signal(SIGABRT, SIG_DFL); + } +} + +__global__ void AssertPassKernel() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + // expected always to be true + assert(tid >= 0); +} + +__global__ void AssertFailKernel() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + // expected to fail for the even thread indices + assert(tid % 2 == 1); +} + +template void LaunchAssertKernel() { + const int num_blocks = 2; + const int num_threads = 16; + + if constexpr (should_abort) { + AssertFailKernel<<>>(); +#if HT_AMD + HIP_CHECK(hipDeviceSynchronize()); +#else + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert); +#endif + } else { + AssertPassKernel<<>>(); + HIP_CHECK(hipDeviceSynchronize()); + } +} + +/** + * Test Description + * ------------------------ + * - Launches kernels with asserts that have an expression equal to 1. + * - Expects that SIGABRT is not raised and kernels have executed successfully. + * Test source + * ------------------------ + * - unit/assertion/assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") { + try_and_catch_abort(&LaunchAssertKernel); + REQUIRE(abort_raised_flag == 0); +} + +/** + * Test Description + * ------------------------ + * - Launches kernels with asserts that have an expression equal to 0. + * - Expects that SIGABRT is raised and kernels have been stopped on AMD. + * - The HIP runtime also aborts the host code, so this test case uses signal handlers + * to avoid host code abortion. + * - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA. + * - The host code is not aborted. + * Test source + * ------------------------ + * - unit/assertion/assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") { + try_and_catch_abort(&LaunchAssertKernel); +#if HT_AMD + REQUIRE(abort_raised_flag == 1); +#else + REQUIRE(abort_raised_flag == 0); +#endif +} diff --git a/projects/hip-tests/catch/unit/assertion/static_assert.cc b/projects/hip-tests/catch/unit/assertion/static_assert.cc new file mode 100644 index 0000000000..508db295b7 --- /dev/null +++ b/projects/hip-tests/catch/unit/assertion/static_assert.cc @@ -0,0 +1,88 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "static_assert_kernels_rtc.hh" + +/** + * @addtogroup static_assert static_assert + * @{ + * @ingroup DeviceLanguageTest + * `void static_assert(constexpr expression, const char* message)` - + * Stops the compilation if expression is equal to zero, and displays the specified message. + */ + +void StaticAssertWrapper(const char* program_source) { + hiprtcProgram program{}; + + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} + +/** + * Test Description + * ------------------------ + * - Compiles kernels with static_assert calls: + * -# Expected that static_assert passes and compilation is successful. + * -# Expected that static_assert fails and compilation has errors. + * - Uses RTC to perform compilation. + * Test source + * ------------------------ + * - unit/assertion/static_assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); } + +/** + * Test Description + * ------------------------ + * - Passes invalidly formed expressions to static_assert calls. + * - Uses expressions that are not constexpr and values that are not known during compilation. + * - Uses RTC to perform compilation. + * Test source + * ------------------------ + * - unit/assertion/static_assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); } diff --git a/projects/hip-tests/catch/unit/assertion/static_assert_kernels_negative.cc b/projects/hip-tests/catch/unit/assertion/static_assert_kernels_negative.cc new file mode 100644 index 0000000000..777f27855c --- /dev/null +++ b/projects/hip-tests/catch/unit/assertion/static_assert_kernels_negative.cc @@ -0,0 +1,30 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void StaticAssertErrorKernel1() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]"); +} + +__global__ void StaticAssertErrorKernel2() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(++tid > 2, "[StaticAssertErrorKernel2]"); +} diff --git a/projects/hip-tests/catch/unit/assertion/static_assert_kernels_positive.cc b/projects/hip-tests/catch/unit/assertion/static_assert_kernels_positive.cc new file mode 100644 index 0000000000..2ed0d7b68c --- /dev/null +++ b/projects/hip-tests/catch/unit/assertion/static_assert_kernels_positive.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void StaticAssertPassKernel1() { + static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]"); +} + +__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); } + +__global__ void StaticAssertFailKernel1() { + static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]"); +} + +__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); } diff --git a/projects/hip-tests/catch/unit/assertion/static_assert_kernels_rtc.hh b/projects/hip-tests/catch/unit/assertion/static_assert_kernels_rtc.hh new file mode 100644 index 0000000000..5bb7419e30 --- /dev/null +++ b/projects/hip-tests/catch/unit/assertion/static_assert_kernels_rtc.hh @@ -0,0 +1,56 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Positive and negative kernels used for the static_assert Test Cases that are using RTC. +*/ + +static constexpr auto kStaticAssert_Positive{ + R"( + __global__ void StaticAssertPassKernel1() { + static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]"); + } + + __global__ void StaticAssertPassKernel2() { + static_assert(10 > 5, "[StaticAssertPassKernel2]"); + } + + __global__ void StaticAssertFailKernel1() { + static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]"); + } + + __global__ void StaticAssertFailKernel2() { + static_assert(10 < 5, "[StaticAssertFailKernel2]"); + } + )"}; + +static constexpr auto kStaticAssert_Negative{ + R"( + __global__ void StaticAssertErrorKernel1() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]"); + } + + __global__ void StaticAssertErrorKernel2() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(++tid > 2, "[StaticAssertErrorKernel2]"); + } + )"}; diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc index 4bceaa41b7..effb4f68e6 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -75,7 +74,7 @@ void GraphMemcpyFromSymbolShell(void* symbol, size_t offset, const std::vector= 5.2 - */ + */ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Negative_Parameters") { using namespace std::placeholders; hipGraph_t graph = nullptr; diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc index 1c8c047f9e..3163443944 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc @@ -23,7 +23,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -78,7 +77,7 @@ void GraphMemcpyToSymbolShell(const void* symbol, size_t offset, const std::vect * - Verify that data is correctly copied to a symbol. A graph is constructed to which a * MemcpyToSymbol node is added. After graph execution, a MemcpyFromSymbol is performed and * the copied values are compared against values known to have been copied to symbol memory - * previously. + * previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for source memory allocated on host and device. diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemsetNode.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemsetNode.cc index 4d4359b2b1..af502ab07a 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddMemsetNode.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemsetNode.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include #include @@ -129,7 +128,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_Negative_Parameters") { * Allocate a 2D array using hipMallocPitch. Initialize the allocated memory * using hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { CHECK_IMAGE_SUPPORT @@ -147,22 +146,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { } } // 2D Memory allocation hipMallocPitch - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, - numH)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, numH)); // Create Graph HIP_CHECK(hipGraphCreate(&graph, 0)); hipGraphNode_t memsetNode, memcpyNode; // Add MemSet Node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = pitch_A; memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = numH; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipMemcpy3DParms myparms{}; @@ -173,21 +170,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { myparms.extent = make_hipExtent(width, numH, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { for (size_t j = 0; j < numH; j++) { - REQUIRE(*(A_h + i*numH + j) == memSetVal); + REQUIRE(*(A_h + i * numH + j) == memSetVal); } } HIP_CHECK(hipGraphExecDestroy(graphExec)); @@ -200,12 +196,12 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { * Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { CHECK_IMAGE_SUPPORT size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A; - char *A_d; + char* A_d; // Initialize the host memory std::vector A_h(numW, ' '); @@ -213,22 +209,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { hipGraph_t graph; std::vector nodeDependencies; // 1D Memory allocation hipMallocPitch - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, - 1)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, 1)); // Create Graph HIP_CHECK(hipGraphCreate(&graph, 0)); hipGraphNode_t memsetNode, memcpyNode; // Add MemSet Node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = pitch_A; memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipMemcpy3DParms myparms{}; @@ -239,15 +233,14 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { myparms.extent = make_hipExtent(width, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); @@ -264,7 +257,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { * Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { CHECK_IMAGE_SUPPORT @@ -300,8 +293,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = numH; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // MemCpy params @@ -315,22 +307,21 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { // Add MemCpy Node HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { for (size_t j = 0; j < numH; j++) { - REQUIRE(*(A_h + i*numH + j) == memSetVal); + REQUIRE(*(A_h + i * numH + j) == memSetVal); } } HIP_CHECK(hipGraphExecDestroy(graphExec)); @@ -343,7 +334,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { * Allocate a 1D array using hipMalloc3D. Initialize the allocated * memory using hipGraphAddMemsetNode. Copy the values in device * memory to host using hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { CHECK_IMAGE_SUPPORT @@ -375,8 +366,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // MemCpy params @@ -390,21 +380,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { // Add MemCpy Node HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); @@ -415,9 +404,9 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { * Allocate a 1D array using hipMalloc. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { - char *A_d; + char* A_d; size_t NumW = SIZE; size_t Nbytes1D = SIZE * sizeof(char); @@ -436,14 +425,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { // Add Memset node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = Nbytes1D; memsetParams.elementSize = sizeof(char); memsetParams.width = NumW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0}; @@ -456,20 +444,19 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { myparms.extent = make_hipExtent(Nbytes1D, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < NumW; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); @@ -479,16 +466,15 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { int managed = 0; - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeManagedMemory, 0)); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); INFO("hipDeviceAttributeManagedMemory: " << managed); if (managed != 1) { WARN( - "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" - "so defaulting to system memory."); + "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" + "so defaulting to system memory."); } size_t Nbytes1D = SIZE * sizeof(char); - char *A_d; + char* A_d; // Initialize the host memory std::vector A_h(SIZE, ' '); // Device Memory @@ -502,14 +488,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { // Add Memset node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = Nbytes1D; memsetParams.elementSize = sizeof(char); memsetParams.width = SIZE; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node @@ -524,21 +509,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { myparms.extent = make_hipExtent(Nbytes1D, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < SIZE; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); diff --git a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc index 585435e684..3d8681eeb4 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -86,7 +85,7 @@ void GraphExecMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_s * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but * incorrect parameters. After the graph is instantiated the parameters are updated to correct * values and the graph executed. After graph execution, a MemcpyFromSymbol is performed and the - * copied values are compared against values known to have been copied to symbol memory previously. + * copied values are compared against values known to have been copied to symbol memory previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/projects/hip-tests/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc b/projects/hip-tests/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc index edecbfad9a..ee2282b425 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include -#include #include #include "graph_memset_node_test_common.hh" @@ -46,7 +45,7 @@ THE SOFTWARE. * which also constitutes a test for said API. * The test is repeated for all valid element sizes(1, * 2, 4), and several allocations of different width(height is always 1 because only 1D memset nodes - * can be updated), both on host and device + * can be updated), both on host and device * Test source * ------------------------ * - unit/graph/hipGraphExecMemsetNodeSetParams.cc diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc index 7f1ac7fe3c..b8c10c3900 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -85,7 +84,7 @@ void GraphMemcpyFromSymbolSetParamsShell(const void* symbol, const void* alt_sym * - Verify that data is correctly copied from a symbol after node parameters are set following * node addition. A graph is constructed to which a MemcpyFromSymbol node is added with valid but * incorrect parameters. The parameters are then updated to correct values and the graph executed. - * Values in destination memory are compared against values known to be in symbol memory. + * Values in destination memory are compared against values known to be in symbol memory. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc index 0f84b6b283..b62b01cf5b 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -86,7 +85,7 @@ void GraphMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_symbo * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but * incorrect parameters. The parameters are then updated to correct values and the graph executed. * After graph execution, a MemcpyFromSymbol is performed and the copied values are compared against - * values known to have been copied to symbol memory previously. + * values known to have been copied to symbol memory previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeGetParams.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeGetParams.cc index 25fe849206..1c640db2de 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeGetParams.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeGetParams.cc @@ -19,7 +19,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include #include #include diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeSetParams.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeSetParams.cc index d8f7cac249..af8e6d50da 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeSetParams.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemsetNodeSetParams.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include -#include #include #include "graph_memset_node_test_common.hh" @@ -44,7 +43,7 @@ THE SOFTWARE. * The parameters are also verified via hipGraphMemsetNodeGetParams, which also constitutes a test * for said API. * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different - * height and width both on host and device + * height and width both on host and device * Test source * ------------------------ * - unit/graph/hipGraphMemsetNodeSetParams.cc diff --git a/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc b/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc index 1d9ea95b00..fe82055061 100644 --- a/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc +++ b/projects/hip-tests/catch/unit/graph/hipLaunchHostFunc.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include #include -#include #include "stream_capture_common.hh" diff --git a/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture.cc b/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture.cc index 9814a05097..21a2edec0b 100644 --- a/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture.cc +++ b/projects/hip-tests/catch/unit/graph/hipStreamBeginCapture.cc @@ -19,8 +19,7 @@ THE SOFTWARE. #include #include -#include -#include "stream_capture_common.hh" // NOLINT +#include "stream_capture_common.hh" // NOLINT #pragma clang diagnostic ignored "-Wunused-variable" /** @@ -56,8 +55,7 @@ static void hostNodeCallback(void* data) { } template -void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, - hipStream_t stream) { +void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(T); @@ -89,8 +87,7 @@ void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, std::fill_n(A_h.host_ptr(), N, static_cast(i)); HIP_CHECK(hipGraphLaunch(graphExec, stream)); HIP_CHECK(hipStreamSynchronize(stream)); - ArrayFindIfNot(B_h.host_ptr(), - static_cast(i) * static_cast(i), N); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i) * static_cast(i), N); } HIP_CHECK(hipGraphExecDestroy(graphExec)) @@ -117,16 +114,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { StreamGuard stream_guard(stream_type); hipStream_t stream = stream_guard.stream(); - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); EventsGuard events_guard(3); StreamsGuard streams_guard(2); SECTION("Linear graph capture") { captureStreamAndLaunchGraph( - [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, - hipStream_t stream) { + [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) { return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); }, captureMode, stream); @@ -134,10 +130,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { SECTION("Branched graph capture") { captureStreamAndLaunchGraph( - [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, - float* B_d, size_t N, hipStream_t stream) { - captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, - streams_guard.stream_list(), events_guard.event_list()); + [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, + hipStream_t stream) { + captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(), + events_guard.event_list()); }, captureMode, stream); } @@ -173,8 +169,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { hipErrorIllegalState); } SECTION("Creating hipStream with invalid mode") { - HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue); } #if HT_NVIDIA // EXSWHTEC-216 SECTION("Stream capture on uninitialized stream returns error code.") { @@ -182,8 +177,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { StreamGuard sg(Streams::created); return sg.stream(); }; - HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), - hipStreamCaptureModeGlobal), + HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), hipStreamCaptureModeGlobal), hipErrorContextIsDestroyed); } #endif @@ -207,8 +201,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { StreamGuard stream_guard(stream_type); hipStream_t s = stream_guard.stream(); - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(s, captureMode)); @@ -218,8 +212,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { /* Local function for inter stream event synchronization */ -static void interStrmEventSyncCapture(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; @@ -266,8 +259,7 @@ static void interStrmEventSyncCapture(const hipStream_t& stream1, /* Local function for colligated stream capture */ -static void colligatedStrmCapture(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; @@ -310,8 +302,7 @@ static void colligatedStrmCapture(const hipStream_t& stream1, /* Local function for colligated stream capture functionality */ -static void colligatedStrmCaptureFunc(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(int); @@ -331,10 +322,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, // Capture 2 streams HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal)); - captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), - N, stream1); - captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), - N, stream2); + captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); + captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); @@ -370,9 +359,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, /* Stream Capture thread function */ -static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, - int* B_h, int* B_d, hipGraph_t* graph, - size_t N, hipStreamCaptureMode mode) { +static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d, + hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) { // Capture stream HIP_CHECK(hipStreamBeginCapture(stream, mode)); captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); @@ -404,10 +392,10 @@ static void multithreadedTest(hipStreamCaptureMode mode) { LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); // Launch 2 threads to capture the 2 streams into graphs - std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), - B_h.host_ptr(), B_d.ptr(), &graph1, N, mode); - std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), - D_h.host_ptr(), D_d.ptr(), &graph2, N, mode); + std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), + B_d.ptr(), &graph1, N, mode); + std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), + D_d.ptr(), &graph2, N, mode); t1.join(); t2.join(); @@ -480,11 +468,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") { TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); - StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, - minPriority); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); - StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, - maxPriority); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); interStrmEventSyncCapture(stream1, stream2); } @@ -533,11 +519,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") { TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); - StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, - minPriority); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); - StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, - maxPriority); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); colligatedStrmCapture(stream1, stream2); } @@ -578,8 +562,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") { * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") { - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); multithreadedTest(captureMode); } @@ -708,8 +692,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") { HIP_CHECK(hipEventRecord(events[2], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, - streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); @@ -751,8 +734,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") { dummyKernel<<<1, 1, 0, streams[0]>>>(); // Since stream[1] is already in capture mode due to event wait // hipStreamBeginCapture on stream[1] is expected to return error. - HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], - hipStreamCaptureModeGlobal), + HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal), hipErrorIllegalState); } @@ -785,8 +767,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") { for (int i = 0; i < 3; i++) { HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem); - HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, - stream1)); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1)); HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i])); } // Instantiate and execute all graphs @@ -825,22 +806,19 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") { EventsGuard events_guard(1); hipEvent_t e = events_guard[0]; - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("Synchronize stream during capture") { - HIP_CHECK_ERROR(hipStreamSynchronize(stream), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported); } SECTION("Query stream during capture") { - HIP_CHECK_ERROR(hipStreamQuery(stream), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported); } #if HT_NVIDIA SECTION("Synchronize device during capture") { - HIP_CHECK_ERROR(hipDeviceSynchronize(), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported); } SECTION("Synchronize event during capture") { HIP_CHECK(hipEventRecord(e, stream)); @@ -884,17 +862,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") { HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("hipMalloc during capture") { - HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported); } SECTION("hipMemcpy during capture") { - HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), - hipMemcpyHostToDevice), + HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice), hipErrorStreamCaptureImplicit); } SECTION("hipMemset during capture") { - HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), - hipErrorStreamCaptureImplicit); + HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit); } } #endif @@ -931,8 +906,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { HIP_CHECK(hipEventRecord(e, stream1)); HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); - HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), - hipErrorStreamCaptureUnjoined); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } SECTION("End strm capture when forked strm still has operations") { EventsGuard events_guard(2); @@ -946,8 +920,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { HIP_CHECK(hipEventRecord(e2, stream2)); HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); - HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), - hipErrorStreamCaptureUnjoined); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } } /** @@ -970,19 +943,17 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") { SUCCEED("skipping the testcases as numDevices < 2"); return; } - hipStream_t* stream = reinterpret_cast - (malloc(devcount * sizeof(hipStream_t))); + hipStream_t* stream = reinterpret_cast(malloc(devcount * sizeof(hipStream_t))); REQUIRE(stream != nullptr); - hipGraph_t* graph = reinterpret_cast - (malloc(devcount * sizeof(hipGraph_t))); + hipGraph_t* graph = reinterpret_cast(malloc(devcount * sizeof(hipGraph_t))); REQUIRE(graph != nullptr); int **devMem{nullptr}, **hostMem{nullptr}; hostMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(hostMem != nullptr); devMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(devMem != nullptr); - hipGraphExec_t* graphExec = reinterpret_cast - (malloc(devcount * sizeof(hipGraphExec_t))); + hipGraphExec_t* graphExec = + reinterpret_cast(malloc(devcount * sizeof(hipGraphExec_t))); // Capture stream in each device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); @@ -994,15 +965,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") { for (int i = 0; i < (dev + 1); i++) { incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]); } - HIP_CHECK(hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), - hipMemcpyDefault, stream[dev])); + HIP_CHECK( + hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev])); HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev])); } // Launch the captured graphs in the respective device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); - HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev])); } // Validate output @@ -1069,8 +1039,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") { HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), - hipMemcpyDefault, streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault, + streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); @@ -1108,23 +1078,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { hipGraph_t graphs[3]; StreamsGuard streams(3); EventsGuard events(4); - LinearAllocGuard hostMem_g1 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard hostMem_g2 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard hostMem_g3 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard devMem_g1 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); - LinearAllocGuard devMem_g2 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); - LinearAllocGuard devMem_g3 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard hostMem_g1 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g2 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g3 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g1 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g2 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g3 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); - std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), - hostMem_g3.host_ptr()}; - std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), - devMem_g3.ptr()}; + std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()}; + std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()}; // Create a device memory of size int and initialize it to 0 for (int i = 0; i < 3; i++) { memset(hostMem[i], 0, sizeof(int)); @@ -1148,16 +1110,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), - hipMemcpyDefault, streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture // Start capturing graph2 from stream 2 HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal)); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); - HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), - hipMemcpyDefault, streams[1])); + HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1])); HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture // Start capturing graph3 from stream 3 HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal)); @@ -1166,8 +1126,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); - HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), - hipMemcpyDefault, streams[2])); + HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2])); HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int))); @@ -1211,40 +1170,32 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { EventsGuard events(7); // Allocate Device memory and Host memory size_t N = GRIDSIZE * BLOCKSIZE; - LinearAllocGuard Ah = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Ch = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard - (LinearAllocs::hipMalloc, N * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard - (LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ch = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0)); HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[4])); + HIP_CHECK( + hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4])); hipHostFn_t fn = hostNodeCallback; HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr)); HIP_CHECK(hipEventRecord(events[1], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0)); int* Ad_2nd_half = Ad.ptr() + N / 2; int* Ad_1st_half = Ad.ptr(); - mymul<<>>(Ad_2nd_half, - CONST_KER2_VAL); - mymul<<>>(Ad_1st_half, - CONST_KER1_VAL); + mymul<<>>(Ad_2nd_half, CONST_KER2_VAL); + mymul<<>>(Ad_1st_half, CONST_KER1_VAL); HIP_CHECK(hipEventRecord(events[2], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0)); - mymul<<>>(Ad_1st_half, - CONST_KER3_VAL); + mymul<<>>(Ad_1st_half, CONST_KER3_VAL); HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr)); HIP_CHECK(hipEventRecord(events[6], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0)); @@ -1255,8 +1206,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipEventRecord(events[4], streams[3])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0)); - HIP_CHECK(hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Execute and test the graph hipGraphExec_t graphExec{nullptr}; @@ -1269,11 +1220,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { HIP_CHECK(hipStreamSynchronize(streams[0])); for (size_t i = 0; i < N; i++) { if (i > (N / 2 - 1)) { - REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + - Ah.host_ptr()[i] * CONST_KER2_VAL)); + REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL)); } else { - REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + - Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); + REQUIRE(Ch.host_ptr()[i] == + (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); } } } @@ -1340,14 +1290,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; // Allocate device memory - LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); @@ -1357,10 +1305,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Stream Creation Before Capture") { StreamsGuard stream1(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture @@ -1368,10 +1316,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Synchronizing multiple streams during Capture") { StreamsGuard stream1(1), stream2(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream2[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream2[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamSynchronize(stream2[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); @@ -1380,20 +1328,20 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Stream Creation After Capture") { HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Stream Synchronize Before Capture") { StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); myadd<<>>(Ad.ptr(), Bd.ptr()); @@ -1404,10 +1352,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); } // Execute and test the graph @@ -1415,8 +1363,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { HIP_CHECK(hipGraphLaunch(graphExec, stream0[0])); HIP_CHECK(hipStreamSynchronize(stream0[0])); // Check output - HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } @@ -1437,20 +1384,16 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { * - HIP_VERSION >= 5.6 */ // Local function executed as thread -static void strmSyncThread(int *Ah, int *Ad, int *Bh, int *Bd, - int BLOCKSIZE, hipError_t *error) { +static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) { StreamsGuard stream(1); - HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream[0])); - HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); *error = hipStreamSynchronize(stream[0]); } // Local function executed as thread -static void captureStrmThread(hipGraph_t *graph, int *Ah, int *Ad, - int *Bh, int *Bd, int BLOCKSIZE, int GRIDSIZE, - hipStreamCaptureMode flag, hipError_t *error) { +static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, + int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) { StreamsGuard stream(1); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], flag)); @@ -1466,14 +1409,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { constexpr int VALUE1 = 7, VALUE2 = 11; hipGraph_t graph{nullptr}; // Allocate device memory - LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); @@ -1483,10 +1424,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { StreamsGuard stream(2); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); } @@ -1494,34 +1435,30 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") { StreamsGuard stream(2); // Capture streams into graph - HIP_CHECK(hipStreamBeginCapture(stream[0], - hipStreamCaptureModeThreadLocal)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); + HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); } #endif #if HT_AMD SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeGlobal, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeGlobal, &error); REQUIRE(error == hipErrorStreamCaptureUnsupported); } #endif SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeThreadLocal, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeThreadLocal, &error); REQUIRE(error == hipSuccess); } SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeRelaxed, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeRelaxed, &error); REQUIRE(error == hipSuccess); } if (graph != nullptr) { @@ -1532,8 +1469,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { HIP_CHECK(hipGraphLaunch(graphExec, stream[0])); HIP_CHECK(hipStreamSynchronize(stream[0])); // Check output - HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } diff --git a/projects/hip-tests/catch/unit/graph/hipStreamEndCapture.cc b/projects/hip-tests/catch/unit/graph/hipStreamEndCapture.cc index 39f25ce2a6..7ec1ea88a6 100644 --- a/projects/hip-tests/catch/unit/graph/hipStreamEndCapture.cc +++ b/projects/hip-tests/catch/unit/graph/hipStreamEndCapture.cc @@ -19,7 +19,6 @@ THE SOFTWARE. #include #include -#include #include "stream_capture_common.hh" diff --git a/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo.cc b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo.cc index d8f8cb5d55..9c3317ed85 100644 --- a/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo.cc +++ b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include -#include #include #include "stream_capture_common.hh" diff --git a/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc index ea67318ef9..0dde7247b1 100644 --- a/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc +++ b/projects/hip-tests/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc @@ -19,7 +19,6 @@ THE SOFTWARE. #include #include -#include #include "stream_capture_common.hh" diff --git a/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing.cc b/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing.cc index c6a77c316e..256d20f21d 100644 --- a/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing.cc +++ b/projects/hip-tests/catch/unit/graph/hipStreamIsCapturing.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include -#include #include #include "stream_capture_common.hh" diff --git a/projects/hip-tests/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc b/projects/hip-tests/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc index e35dd317d6..e11e1c3e24 100644 --- a/projects/hip-tests/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc +++ b/projects/hip-tests/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #include "stream_capture_common.hh" @@ -367,7 +366,7 @@ TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") { const hipStreamUpdateCaptureDependenciesFlags flag = GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies); - HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); //hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); // hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag)); diff --git a/projects/hip-tests/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc b/projects/hip-tests/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc index c35fc18900..5ac784bc79 100644 --- a/projects/hip-tests/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc +++ b/projects/hip-tests/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #include "stream_capture_common.hh" diff --git a/projects/hip-tests/catch/unit/kernel/hipShflTests.cc b/projects/hip-tests/catch/unit/kernel/hipShflTests.cc index 89c529c16b..3525602bd0 100644 --- a/projects/hip-tests/catch/unit/kernel/hipShflTests.cc +++ b/projects/hip-tests/catch/unit/kernel/hipShflTests.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include #include #include -#include #define WIDTH 4 @@ -32,20 +31,17 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Z 1 // Device (Kernel) function, it must be void -template -__global__ void matrixTranspose(T* out, T* in, const int width) { +template __global__ void matrixTranspose(T* out, T* in, const int width) { int x = blockDim.x * blockIdx.x + threadIdx.x; T val = in[x]; for (int i = 0; i < width; i++) { - for (int j = 0; j < width; j++) - out[i * width + j] = __shfl(val, j * width + i); + for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); } } // CPU implementation of matrix transpose template -void matrixTransposeCPUReference(T* output, - T* input, const unsigned int width) { +void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) { for (unsigned int j = 0; j < width; j++) { for (unsigned int i = 0; i < width; i++) { output[i * width + j] = input[j * width + i]; @@ -54,61 +50,52 @@ void matrixTransposeCPUReference(T* output, } static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { - *fact = static_cast(INT32_MAX)+1; -} +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } static void getFactor(float* fact) { *fact = 2.5; } static void getFactor(__half* fact) { *fact = 2.5; } static void getFactor(double* fact) { *fact = 2.5; } static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { - *fact = static_cast(__LONG_LONG_MAX__)+1; -} +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } -template -int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { +template int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { int errors = 0; for (int i = 0; i < NUM; i++) { - if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { - errors++; - } - } - return errors; -} - -template <> -int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { errors++; } } return errors; } -template -void init(T* Matrix) { +template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { + int errors = 0; + for (int i = 0; i < NUM; i++) { + if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + errors++; + } + } + return errors; +} + +template void init(T* Matrix) { // initialize the input data T factor; getFactor(&factor); for (int i = 0; i < NUM; i++) { - Matrix[i] = (T)i + factor; + Matrix[i] = (T)i + factor; } } -template <> -void init(__half* Matrix) { +template <> void init(__half* Matrix) { // initialize the input data __half factor; getFactor(&factor); for (int i = 0; i < NUM; i++) { - Matrix[i] = i + __half2float(factor); + Matrix[i] = i + __half2float(factor); } } -template -static void runTest() { +template static void runTest() { T* Matrix; T* TransposeMatrix; T* cpuTransposeMatrix; @@ -129,21 +116,17 @@ static void runTest() { // allocate the memory on the device side HIP_CHECK(hipMalloc(reinterpret_cast(&gpuMatrix), NUM * sizeof(T))); - HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), - NUM * sizeof(T))); + HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), NUM * sizeof(T))); // Memory transfer from host to device - HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), - hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice)); // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(1), - dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0, - gpuTransposeMatrix, gpuMatrix, WIDTH); + hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), + 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host - HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, - NUM * sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -183,26 +166,12 @@ static void runTest() { */ TEST_CASE("Unit_hipShflTests") { - SECTION("run test for int") { - runTest(); - } - SECTION("run test for float") { - runTest(); - } - SECTION("run test for double") { - runTest(); - } + SECTION("run test for int") { runTest(); } + SECTION("run test for float") { runTest(); } + SECTION("run test for double") { runTest(); } // Test added to support half datatype. - SECTION("run test for __half") { - runTest<__half>(); - } - SECTION("run test for int64_t") { - runTest(); - } - SECTION("run test for unsigned int") { - runTest(); - } - SECTION("run test for uint64_t") { - runTest(); - } + SECTION("run test for __half") { runTest<__half>(); } + SECTION("run test for int64_t") { runTest(); } + SECTION("run test for unsigned int") { runTest(); } + SECTION("run test for uint64_t") { runTest(); } } diff --git a/projects/hip-tests/catch/unit/kernel/hipShflUpDownTest.cc b/projects/hip-tests/catch/unit/kernel/hipShflUpDownTest.cc index ab80dd51b1..a06216f03d 100644 --- a/projects/hip-tests/catch/unit/kernel/hipShflUpDownTest.cc +++ b/projects/hip-tests/catch/unit/kernel/hipShflUpDownTest.cc @@ -21,12 +21,10 @@ THE SOFTWARE. #include #include #include -#include const int size = 32; -template -__global__ void shflDownSum(T* a, int size) { +template __global__ void shflDownSum(T* a, int size) { T val = a[threadIdx.x]; for (int i = size / 2; i > 0; i /= 2) { val += __shfl_down(val, i, size); @@ -34,8 +32,7 @@ __global__ void shflDownSum(T* a, int size) { a[threadIdx.x] = val; } -template -__global__ void shflUpSum(T* a, int size) { +template __global__ void shflUpSum(T* a, int size) { T val = a[threadIdx.x]; for (int i = size / 2; i > 0; i /= 2) { val += __shfl_up(val, i, size); @@ -43,34 +40,29 @@ __global__ void shflUpSum(T* a, int size) { a[threadIdx.x] = val; } -template -__global__ void shflXorSum(T* a, int size) { +template __global__ void shflXorSum(T* a, int size) { T val = a[threadIdx.x]; - for (int i = size/2; i > 0; i /= 2) { + for (int i = size / 2; i > 0; i /= 2) { val += __shfl_xor(val, i, size); } a[threadIdx.x] = val; } static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { - *fact = static_cast(INT32_MAX)+1; -} +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } static void getFactor(float* fact) { *fact = 2.5; } static void getFactor(double* fact) { *fact = 2.5; } static void getFactor(__half* fact) { *fact = 2.5; } static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { - *fact = static_cast(__LONG_LONG_MAX__)+1; -} +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } template T sum(T* a) { T cpuSum = 0; T factor; getFactor(&factor); for (int i = 0; i < size; i++) { - a[i] = i + factor; - cpuSum += a[i]; + a[i] = i + factor; + cpuSum += a[i]; } return cpuSum; } @@ -80,8 +72,8 @@ template <> __half sum(__half* a) { __half factor; getFactor(&factor); for (int i = 0; i < size; i++) { - a[i] = i + __half2float(factor); - cpuSum = __half2float(cpuSum) + __half2float(a[i]); + a[i] = i + __half2float(factor); + cpuSum = __half2float(cpuSum) + __half2float(a[i]); } return cpuSum; } @@ -100,8 +92,7 @@ template <> bool compare(__half gpuSum, __half cpuSum) { return false; } -template -static void runTestShflUp() { +template static void runTestShflUp() { const int size = 32; T a[size]; T cpuSum = sum(a); @@ -114,8 +105,7 @@ static void runTestShflUp() { HIP_CHECK(hipFree(d_a)); } -template -static void runTestShflDown() { +template static void runTestShflDown() { T a[size]; T cpuSum = sum(a); T* d_a; @@ -127,8 +117,7 @@ static void runTestShflDown() { HIP_CHECK(hipFree(d_a)); } -template -static void runTestShflXor() { +template static void runTestShflXor() { T a[size]; T cpuSum = sum(a); T* d_a; @@ -141,12 +130,12 @@ static void runTestShflXor() { } /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - -* Contains warp __shfl_up function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_up function + */ /** * Test Description @@ -164,27 +153,13 @@ static void runTestShflXor() { */ TEST_CASE("Unit_runTestShfl_up") { - SECTION("runTestShflUp for int") { - runTestShflUp(); - } - SECTION("runTestShflUp for float") { - runTestShflUp(); - } - SECTION("runTestShflUp for double") { - runTestShflUp(); - } - SECTION("runTestShflUp for __half") { - runTestShflUp<__half>(); - } - SECTION("runTestShflUp for int64_t") { - runTestShflUp(); - } - SECTION("runTestShflUp for unsigned int") { - runTestShflUp(); - } - SECTION("runTestShflUp for uint64_t") { - runTestShflUp(); - } + SECTION("runTestShflUp for int") { runTestShflUp(); } + SECTION("runTestShflUp for float") { runTestShflUp(); } + SECTION("runTestShflUp for double") { runTestShflUp(); } + SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); } + SECTION("runTestShflUp for int64_t") { runTestShflUp(); } + SECTION("runTestShflUp for unsigned int") { runTestShflUp(); } + SECTION("runTestShflUp for uint64_t") { runTestShflUp(); } } /** * End doxygen group __shfl. @@ -192,12 +167,12 @@ TEST_CASE("Unit_runTestShfl_up") { */ /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - -* Contains warp __shfl_down function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_down function + */ /** * Test Description @@ -215,27 +190,13 @@ TEST_CASE("Unit_runTestShfl_up") { */ TEST_CASE("Unit_runTestShfl_Down") { - SECTION("runTestShflDown for int") { - runTestShflDown(); - } - SECTION("runTestShflDown for float") { - runTestShflDown(); - } - SECTION("runTestShflDown for double") { - runTestShflDown(); - } - SECTION("runTestShflDown for __half") { - runTestShflDown<__half>(); - } - SECTION("runTestShflDown for int64_t") { - runTestShflDown(); - } - SECTION("runTestShflDown for unsigned int") { - runTestShflDown(); - } - SECTION("runTestShflDown for uint64_t") { - runTestShflDown(); - } + SECTION("runTestShflDown for int") { runTestShflDown(); } + SECTION("runTestShflDown for float") { runTestShflDown(); } + SECTION("runTestShflDown for double") { runTestShflDown(); } + SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); } + SECTION("runTestShflDown for int64_t") { runTestShflDown(); } + SECTION("runTestShflDown for unsigned int") { runTestShflDown(); } + SECTION("runTestShflDown for uint64_t") { runTestShflDown(); } } /** * End doxygen group __shfl. @@ -243,12 +204,12 @@ TEST_CASE("Unit_runTestShfl_Down") { */ /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_xor(T var, int laneMask, int width=warpSize)` - -* Contains warp __shfl_xor function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_xor(T var, int laneMask, int width=warpSize)` - + * Contains warp __shfl_xor function + */ /** * Test Description @@ -266,27 +227,13 @@ TEST_CASE("Unit_runTestShfl_Down") { */ TEST_CASE("Unit_runTestShfl_Xor") { - SECTION("runTestShflXor for int") { - runTestShflXor(); - } - SECTION("runTestShflXor for float") { - runTestShflXor(); - } - SECTION("runTestShflXor for double") { - runTestShflXor(); - } - SECTION("runTestShflXor for __half") { - runTestShflXor<__half>(); - } - SECTION("runTestShflXor for int64_t") { - runTestShflXor(); - } - SECTION("runTestShflXor for unsigned int") { - runTestShflXor(); - } - SECTION("runTestShflXor for uint64_t") { - runTestShflXor(); - } + SECTION("runTestShflXor for int") { runTestShflXor(); } + SECTION("runTestShflXor for float") { runTestShflXor(); } + SECTION("runTestShflXor for double") { runTestShflXor(); } + SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); } + SECTION("runTestShflXor for int64_t") { runTestShflXor(); } + SECTION("runTestShflXor for unsigned int") { runTestShflXor(); } + SECTION("runTestShflXor for uint64_t") { runTestShflXor(); } } /** * End doxygen group __shfl. diff --git a/projects/hip-tests/catch/unit/stream/hipStreamGetDevice.cc b/projects/hip-tests/catch/unit/stream/hipStreamGetDevice.cc index 9f2eef521e..1fe87eaeeb 100644 --- a/projects/hip-tests/catch/unit/stream/hipStreamGetDevice.cc +++ b/projects/hip-tests/catch/unit/stream/hipStreamGetDevice.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #define NUMBER_OF_THREADS 10 static bool thread_results[NUMBER_OF_THREADS]; @@ -54,8 +53,7 @@ TEST_CASE("Unit_hipStreamGetDevice_Negative") { HIP_CHECK(hipStreamCreate(&stream)); HIP_CHECK_ERROR(hipStreamGetDevice(nullptr, nullptr), hipErrorInvalidValue); - HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), hipErrorInvalidValue); HIP_CHECK_ERROR(hipStreamGetDevice(stream, nullptr), hipErrorInvalidValue); HIP_CHECK(hipStreamDestroy(stream)); } @@ -145,9 +143,7 @@ static bool validateStreamGetDevice() { return true; } -static void thread_Test(int threadNum) { - thread_results[threadNum] = validateStreamGetDevice(); -} +static void thread_Test(int threadNum) { thread_results[threadNum] = validateStreamGetDevice(); } static bool test_hipStreamGetDevice_MThread() { std::vector tests; @@ -158,7 +154,7 @@ static bool test_hipStreamGetDevice_MThread() { tests.push_back(std::thread(thread_Test, idx)); } // Wait for all threads to complete - for (std::thread &t : tests) { + for (std::thread& t : tests) { t.join(); } // Wait for thread @@ -169,9 +165,7 @@ static bool test_hipStreamGetDevice_MThread() { return status; } -TEST_CASE("Unit_hipStreamGetDevice_MThread") { - REQUIRE(true == test_hipStreamGetDevice_MThread()); -} +TEST_CASE("Unit_hipStreamGetDevice_MThread") { REQUIRE(true == test_hipStreamGetDevice_MThread()); } /** * Test Description