From dfda77be9c76c05acc9a0eb2b386145f7a3bcda6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:36:27 +0100 Subject: [PATCH 01/19] EXSWHTEC-170 - Implement tests for Kernel Graph Node Attribute APIs #8 Change-Id: I75271f0d1906d31b16d09ca6acb94ee864e87d92 [ROCm/hip-tests commit: 55b925f3ff8f5858482b13e72ccbf989f2158791] --- .../graph/hipGraphKernelNodeGetAttribute.cc | 203 ++----- .../graph/hipGraphKernelNodeSetAttribute.cc | 494 +++++++----------- .../hipGraphKernelNodeSetAttribute_old.cc | 370 +++++++++++++ 3 files changed, 596 insertions(+), 471 deletions(-) create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute_old.cc diff --git a/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeGetAttribute.cc b/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeGetAttribute.cc index faed511f7b..7051853e7d 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeGetAttribute.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeGetAttribute.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -18,178 +21,66 @@ THE SOFTWARE. */ #include -#include #include -/** -* @addtogroup hipGraphKernelNodeGetAttribute hipGraphKernelNodeGetAttribute -* @{ -* @ingroup GraphTest -* `hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, -* hipKernelNodeAttrID attr, hipKernelNodeAttrValue* value_out )` - -* Queries node attribute. -*/ +#define THREADS_PER_BLOCK 512 -/** -* Test Description -* ------------------------ -*  - Functional Test for API - hipGraphKernelNodeGetAttribute -* 1) GetKernelAttribute for ID hipKernelNodeAttributeCooperative -* 2) GetKernelAttribute for ID hipKernelNodeAttributeAccessPolicyWindow -* Test source -* ------------------------ -*  - unit/graph/hipGraphKernelNodeGetAttribute.cc -* Test requirements -* ------------------------ -*  - HIP_VERSION >= 5.6 -*/ +TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters") { + constexpr int N = 1024; -TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Functional") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - hipGraph_t graph; - hipGraphExec_t graphExec; - hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd; - hipKernelNodeParams kNodeParams{}; - hipStream_t stream; int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t NElem{N}; + HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N)); - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - - void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kNodeParams.gridDim = dim3(blocks); - kNodeParams.blockDim = dim3(threadsPerBlock); - kNodeParams.sharedMemBytes = 0; - kNodeParams.kernelParams = reinterpret_cast(kernelArgs); - kNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kNodeParams)); - - // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1)); - - hipKernelNodeAttrValue value_out; - memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); - - SECTION("GetKernelAttribute for hipKernelNodeAttributeCooperative") { - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeCooperative, &value_out)); - } - SECTION("GetKernelAttribute for hipKernelNodeAttributeAccessPolicyWindow") { - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_out)); - } - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Verify graph execution result - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream)); -} - -/** -* Test Description -* ------------------------ -*  - Negative Test for API - hipGraphKernelNodeGetAttribute -* 1) Pass kernel node as nullptr for Get attribute api & verify -* 2) Pass KernelNodeAttrID as negative value for Get attribute api & verify -* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api & verify -* 4) Pass KernelNodeAttrValue as nullptr for Get attribute api & verify -* Test source -* ------------------------ -*  - unit/graph/hipGraphKernelNodeGetAttribute.cc -* Test requirements -* ------------------------ -*  - HIP_VERSION >= 5.6 -*/ - -TEST_CASE("Unit_hipGraphKernelNodeGetAttribute_Negative") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; hipGraph_t graph; - hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd; - hipKernelNodeParams kNodeParams{}; - hipStream_t stream; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t NElem{N}; - hipError_t ret; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kNodeParams.gridDim = dim3(blocks); - kNodeParams.blockDim = dim3(threadsPerBlock); - kNodeParams.sharedMemBytes = 0; - kNodeParams.kernelParams = reinterpret_cast(kernelArgs); - kNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kNodeParams)); + hipKernelNodeParams node_params{}; + node_params.func = reinterpret_cast(HipTest::vectorADD); + node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1); + node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - hipKernelNodeAttrValue value_out; - memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + size_t N_elem{N}; + void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast(&N_elem)}; + node_params.kernelParams = reinterpret_cast(kernel_params); - SECTION("Pass kernel node as nullptr for Get attribute api") { - ret = hipGraphKernelNodeGetAttribute(nullptr, - hipKernelNodeAttributeAccessPolicyWindow, &value_out); - REQUIRE(hipErrorInvalidValue == ret); + hipGraphNode_t graph_node; + HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params)); + + hipKernelNodeAttrValue node_attribute; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute( + nullptr, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); } - SECTION("Pass KernelNodeAttrID as negative value for Get attribute api") { - ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttrID(-1), &value_out); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("node is not a kernel node") { + hipGraphNode_t empty_node; + HIP_CHECK(hipGraphAddEmptyNode(&empty_node, graph, nullptr, 0)); + HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute( + empty_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); } - SECTION("Pass KernelNodeAttrID as INT_MAX value for Get attribute api") { - ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttrID(INT_MAX), &value_out); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("invalid attribute") { + HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute(graph_node, static_cast(-1), + &node_attribute), + hipErrorInvalidValue); } -#if HT_AMD // getting SIGSEGV error in Cuda Setup - SECTION("Pass KernelNodeAttrValue as nullptr for Get attribute api") { - ret = hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, nullptr); - REQUIRE(hipErrorInvalidValue == ret); + +#if HT_AMD // segfaults on NVIDIA + SECTION("value == nullptr") { + HIP_CHECK_ERROR(hipGraphKernelNodeGetAttribute( + graph_node, hipKernelNodeAttributeAccessPolicyWindow, nullptr), + hipErrorInvalidValue); } #endif - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream)); + + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + HIP_CHECK(hipFree(C_d)); } diff --git a/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute.cc b/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute.cc index ac476b2546..8f2c101b96 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute.cc @@ -1,13 +1,16 @@ /* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -18,353 +21,214 @@ THE SOFTWARE. */ #include -#include #include -/** -* @addtogroup hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute -* @{ -* @ingroup GraphTest -* `hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, -* hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value )` - -* Sets node attribute. -*/ +#define THREADS_PER_BLOCK 512 -/** -* Test Description -* ------------------------ -*  - Functional Test for API - hipGraphKernelNodeSetAttribute -* 1) Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow attributes -* 2) Check hipGraphKernelNodeSetAttribute for cooperative attributes -* 3) Check hipGraphKernelNodeSetAttribute for window cooperative attributes -* Test source -* ------------------------ -*  - unit/graph/hipGraphKernelNodeGetAttribute.cc -* Test requirements -* ------------------------ -*  - HIP_VERSION >= 5.6 -*/ +namespace { +constexpr std::array kAccessProperties{ + hipAccessPropertyNormal, hipAccessPropertyStreaming, hipAccessPropertyPersisting}; +} // anonymous namespace -static bool validateKernelNodeAttrValue(hipKernelNodeAttrValue in, - hipKernelNodeAttrValue out) { - if ((in.accessPolicyWindow.base_ptr != out.accessPolicyWindow.base_ptr) || - (in.accessPolicyWindow.hitProp != out.accessPolicyWindow.hitProp) || - (in.accessPolicyWindow.hitRatio != out.accessPolicyWindow.hitRatio) || - (in.accessPolicyWindow.missProp != out.accessPolicyWindow.missProp) || - (in.accessPolicyWindow.num_bytes != out.accessPolicyWindow.num_bytes) || - (in.cooperative != out.cooperative)) { - return false; - } - return true; +static bool CompareAccessPolicyWindow(const hipKernelNodeAttrValue& lhs, + const hipKernelNodeAttrValue& rhs) { + return lhs.accessPolicyWindow.base_ptr == rhs.accessPolicyWindow.base_ptr && + lhs.accessPolicyWindow.num_bytes == rhs.accessPolicyWindow.num_bytes && + lhs.accessPolicyWindow.hitRatio == rhs.accessPolicyWindow.hitRatio && + lhs.accessPolicyWindow.hitProp == rhs.accessPolicyWindow.hitProp && + lhs.accessPolicyWindow.missProp == rhs.accessPolicyWindow.missProp; } -TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Functional") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - hipGraph_t graph; - hipGraphExec_t graphExec; - hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd; - hipKernelNodeParams kNodeParams{}; - hipStream_t stream; +TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Positive_AccessPolicyWindow") { + constexpr int N = 1024; + + const auto hit_prop = GENERATE(from_range(begin(kAccessProperties), end(kAccessProperties))); + const auto miss_prop = GENERATE(from_range(begin(kAccessProperties), end(kAccessProperties) - 1)); + int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t NElem{N}; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N)); + hipGraph_t graph; HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kNodeParams.gridDim = dim3(blocks); - kNodeParams.blockDim = dim3(threadsPerBlock); - kNodeParams.sharedMemBytes = 0; - kNodeParams.kernelParams = reinterpret_cast(kernelArgs); - kNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kNodeParams)); + hipKernelNodeParams node_params{}; + node_params.func = reinterpret_cast(HipTest::vectorADD); + node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1); + node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1)); + size_t N_elem{N}; + void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast(&N_elem)}; + node_params.kernelParams = reinterpret_cast(kernel_params); - hipKernelNodeAttrValue value_in, value_out; + hipGraphNode_t graph_node; + HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params)); - SECTION("Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow") { - memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); - memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + int max_window_size; + HIP_CHECK( + hipDeviceGetAttribute(&max_window_size, hipDeviceAttributeAccessPolicyMaxWindowSize, 0)); - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + hipKernelNodeAttrValue node_attribute_1; + node_attribute_1.accessPolicyWindow.base_ptr = reinterpret_cast(A_d); + node_attribute_1.accessPolicyWindow.num_bytes = + std::min(static_cast(max_window_size), sizeof(int) * N); + node_attribute_1.accessPolicyWindow.hitRatio = 0.6; + node_attribute_1.accessPolicyWindow.hitProp = hit_prop; + node_attribute_1.accessPolicyWindow.missProp = miss_prop; - value_in.accessPolicyWindow.hitRatio = 0.8; - value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; - value_in.accessPolicyWindow.missProp = hipAccessPropertyStreaming; + HIP_CHECK(hipGraphKernelNodeSetAttribute(graph_node, hipKernelNodeAttributeAccessPolicyWindow, + &node_attribute_1)); - HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + hipKernelNodeAttrValue node_attribute_2; + HIP_CHECK(hipGraphKernelNodeGetAttribute(graph_node, hipKernelNodeAttributeAccessPolicyWindow, + &node_attribute_2)); - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_out)); - REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out)); - } - SECTION("Check hipGraphKernelNodeSetAttribute for cooperative") { - memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); - memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + REQUIRE(CompareAccessPolicyWindow(node_attribute_1, node_attribute_2)); - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); - - value_in.cooperative = 2; - - HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); - - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_out)); - REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out)); - } - - SECTION("Check hipGraphKernelNodeSetAttribute for window and cooperative") { - memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); - memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); - - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); - - value_in.cooperative = 8; - value_in.accessPolicyWindow.hitRatio = 0.1; - value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; - value_in.accessPolicyWindow.missProp = hipAccessPropertyNormal; - - HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); - - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_out)); - REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out)); - } - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Verify graph execution result - HipTest::checkVectorADD(A_h, B_h, C_h, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream)); + + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + HIP_CHECK(hipFree(C_d)); } -/** -* Test Description -* ------------------------ -*  - Negative/argument Test for API - hipGraphKernelNodeSetAttribute -* 1) Pass kernel node as nullptr for Set attribute api and verify -* 2) Pass KernelNodeAttrID as invalid value for Set attribute api and verify -* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api and verify -* 4) Pass KernelNodeAttrValue as nullptr for Set attribute api and verify -* 5) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value missProp as hipAccessPropertyPersisting -* 6) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value hitProp as hipAccessPropertyPersisting -* 7) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value accessPolicyWindow.hitRatio as 1.4 -* 8) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value accessPolicyWindow.hitRatio as 0 -* 9) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value accessPolicyWindow.hitRatio as 1 -* 10) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value accessPolicyWindow.hitRatio as -1.8 -* 11) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value accessPolicyWindow.hitRatio as -0.6 -* 12) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6 -* 13) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" -* and pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6 -* 14) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value accessPolicyWindow.num_bytes as 1 MB -* 15) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow -* and pass value base_ptr as nullptr -* Test source -* ------------------------ -*  - unit/graph/hipGraphKernelNodeSetAttribute.cc -* Test requirements -* ------------------------ -*  - HIP_VERSION >= 5.6 -*/ +TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Positive_Cooperative") { + constexpr int N = 1024; -TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - hipGraph_t graph; - hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd; - hipKernelNodeParams kNodeParams{}; - hipStream_t stream; int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t NElem{N}; - hipError_t ret; - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N)); + hipGraph_t graph; HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kNodeParams.gridDim = dim3(blocks); - kNodeParams.blockDim = dim3(threadsPerBlock); - kNodeParams.sharedMemBytes = 0; - kNodeParams.kernelParams = reinterpret_cast(kernelArgs); - kNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kNodeParams)); + hipKernelNodeParams node_params{}; + node_params.func = reinterpret_cast(HipTest::vectorADD); + node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1); + node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); - hipKernelNodeAttrValue value_in, value_out; - memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); - memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); - HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in)); - memcpy(&value_out, &value_in, sizeof(hipKernelNodeAttrValue)); + size_t N_elem{N}; + void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast(&N_elem)}; + node_params.kernelParams = reinterpret_cast(kernel_params); - SECTION("Pass kernel node as nullptr for Set attribute api") { - ret = hipGraphKernelNodeSetAttribute(nullptr, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); + hipGraphNode_t graph_node; + HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params)); + + hipKernelNodeAttrValue node_attribute_1; + node_attribute_1.cooperative = 2; + + HIP_CHECK(hipGraphKernelNodeSetAttribute(graph_node, hipKernelNodeAttributeCooperative, + &node_attribute_1)); + + hipKernelNodeAttrValue node_attribute_2; + HIP_CHECK(hipGraphKernelNodeGetAttribute(graph_node, hipKernelNodeAttributeCooperative, + &node_attribute_2)); + + REQUIRE(node_attribute_1.cooperative == node_attribute_2.cooperative); + + HIP_CHECK(hipGraphDestroy(graph)); + + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + HIP_CHECK(hipFree(C_d)); +} + +TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative_Parameters") { + constexpr int N = 1024; + + int *A_d, *B_d, *C_d; + HIP_CHECK(hipMalloc(&A_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&B_d, sizeof(int) * N)); + HIP_CHECK(hipMalloc(&C_d, sizeof(int) * N)); + + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipKernelNodeParams node_params{}; + node_params.func = reinterpret_cast(HipTest::vectorADD); + node_params.gridDim = dim3(N / THREADS_PER_BLOCK, 1, 1); + node_params.blockDim = dim3(THREADS_PER_BLOCK, 1, 1); + + size_t N_elem{N}; + void* kernel_params[] = {&A_d, &B_d, &C_d, reinterpret_cast(&N_elem)}; + node_params.kernelParams = reinterpret_cast(kernel_params); + + hipGraphNode_t graph_node; + HIP_CHECK(hipGraphAddKernelNode(&graph_node, graph, nullptr, 0, &node_params)); + + int max_window_size; + HIP_CHECK( + hipDeviceGetAttribute(&max_window_size, hipDeviceAttributeAccessPolicyMaxWindowSize, 0)); + + hipKernelNodeAttrValue node_attribute; + node_attribute.accessPolicyWindow.base_ptr = reinterpret_cast(A_d); + node_attribute.accessPolicyWindow.num_bytes = + std::min(static_cast(max_window_size), sizeof(int) * N); + node_attribute.accessPolicyWindow.hitRatio = 0.6; + node_attribute.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; + node_attribute.accessPolicyWindow.missProp = hipAccessPropertyStreaming; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + nullptr, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); } - SECTION("Pass KernelNodeAttrID as invalid value for Set attribute api") { - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttrID(-1), &value_in); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("node is not a kernel node") { + hipGraphNode_t empty_node; + HIP_CHECK(hipGraphAddEmptyNode(&empty_node, graph, nullptr, 0)); + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + empty_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); } - SECTION("Pass KernelNodeAttrID as INT_MAX value for Set attribute api") { - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttrID(INT_MAX), &value_in); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("invalid attribute") { + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute(graph_node, static_cast(-1), + &node_attribute), + hipErrorInvalidValue); } -#if HT_AMD // getting SIGSEGV error in Cuda Setup - SECTION("Pass KernelNodeAttrValue as nullptr for Set attribute api") { - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, nullptr); - REQUIRE(hipErrorInvalidValue == ret); + +#if HT_AMD // segfaults on NVIDIA + SECTION("value == nullptr") { + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + graph_node, hipKernelNodeAttributeAccessPolicyWindow, nullptr), + hipErrorInvalidValue); } #endif - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value missProp as hipAccessPropertyPersisting") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.missProp = hipAccessPropertyPersisting; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value hitProp as hipAccessPropertyPersisting") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipSuccess == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value accessPolicyWindow.hitRatio as 1.4") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.hitRatio = 1.4; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value accessPolicyWindow.hitRatio as 0") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.hitRatio = 0; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipSuccess == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value accessPolicyWindow.hitRatio as 1") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.hitRatio = 1; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipSuccess == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value accessPolicyWindow.hitRatio as -1.8") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.hitRatio = -1.8; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value accessPolicyWindow.hitRatio as -0.6") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.hitRatio = -0.6; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " & pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.num_bytes = 1024; - value_in.accessPolicyWindow.hitRatio = 0.6; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " & pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.num_bytes = 1024 * 1024 * 1024; - value_in.accessPolicyWindow.hitRatio = -0.6; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value accessPolicyWindow.num_bytes as 1 MB") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.num_bytes = 1024 * 1024; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" - " and pass value base_ptr as nullptr") { - memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); - value_in.accessPolicyWindow.base_ptr = nullptr; - ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, - hipKernelNodeAttributeAccessPolicyWindow, &value_in); - REQUIRE(hipSuccess == ret); + + SECTION("accessPolicyWindow.num_bytes > accessPolicyMaxWindowSize") { + node_attribute.accessPolicyWindow.num_bytes = max_window_size + 1; + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); + } + + SECTION("accessPolicyWindow.hitRatio < 0") { + node_attribute.accessPolicyWindow.hitRatio = -0.6; + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); + } + + SECTION("accessPolicyWindow.hitRatio > 1.0") { + node_attribute.accessPolicyWindow.hitRatio = 1.1; + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); + } + + SECTION("accessPolicyWindow.missProp == hipAccessPropertyPersisting") { + node_attribute.accessPolicyWindow.missProp = hipAccessPropertyPersisting; + HIP_CHECK_ERROR(hipGraphKernelNodeSetAttribute( + graph_node, hipKernelNodeAttributeAccessPolicyWindow, &node_attribute), + hipErrorInvalidValue); } - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(stream)); -} + + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + HIP_CHECK(hipFree(C_d)); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute_old.cc new file mode 100644 index 0000000000..ac476b2546 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphKernelNodeSetAttribute_old.cc @@ -0,0 +1,370 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** +* @addtogroup hipGraphKernelNodeSetAttribute hipGraphKernelNodeSetAttribute +* @{ +* @ingroup GraphTest +* `hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, +* hipKernelNodeAttrID attr, const hipKernelNodeAttrValue* value )` - +* Sets node attribute. +*/ + +/** +* Test Description +* ------------------------ +*  - Functional Test for API - hipGraphKernelNodeSetAttribute +* 1) Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow attributes +* 2) Check hipGraphKernelNodeSetAttribute for cooperative attributes +* 3) Check hipGraphKernelNodeSetAttribute for window cooperative attributes +* Test source +* ------------------------ +*  - unit/graph/hipGraphKernelNodeGetAttribute.cc +* Test requirements +* ------------------------ +*  - HIP_VERSION >= 5.6 +*/ + +static bool validateKernelNodeAttrValue(hipKernelNodeAttrValue in, + hipKernelNodeAttrValue out) { + if ((in.accessPolicyWindow.base_ptr != out.accessPolicyWindow.base_ptr) || + (in.accessPolicyWindow.hitProp != out.accessPolicyWindow.hitProp) || + (in.accessPolicyWindow.hitRatio != out.accessPolicyWindow.hitRatio) || + (in.accessPolicyWindow.missProp != out.accessPolicyWindow.missProp) || + (in.accessPolicyWindow.num_bytes != out.accessPolicyWindow.num_bytes) || + (in.cooperative != out.cooperative)) { + return false; + } + return true; +} + +TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraph_t graph; + hipGraphExec_t graphExec; + hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd; + hipKernelNodeParams kNodeParams{}; + hipStream_t stream; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + size_t NElem{N}; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kNodeParams.gridDim = dim3(blocks); + kNodeParams.blockDim = dim3(threadsPerBlock); + kNodeParams.sharedMemBytes = 0; + kNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpy_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpy_C, 1)); + + hipKernelNodeAttrValue value_in, value_out; + + SECTION("Check hipGraphKernelNodeSetAttribute for AccessPolicyWindow") { + memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); + memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + + value_in.accessPolicyWindow.hitRatio = 0.8; + value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; + value_in.accessPolicyWindow.missProp = hipAccessPropertyStreaming; + + HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_out)); + REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out)); + } + SECTION("Check hipGraphKernelNodeSetAttribute for cooperative") { + memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); + memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + + value_in.cooperative = 2; + + HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_out)); + REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out)); + } + + SECTION("Check hipGraphKernelNodeSetAttribute for window and cooperative") { + memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); + memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + + value_in.cooperative = 8; + value_in.accessPolicyWindow.hitRatio = 0.1; + value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; + value_in.accessPolicyWindow.missProp = hipAccessPropertyNormal; + + HIP_CHECK(hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_out)); + REQUIRE(true == validateKernelNodeAttrValue(value_in, value_out)); + } + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Verify graph execution result + HipTest::checkVectorADD(A_h, B_h, C_h, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** +* Test Description +* ------------------------ +*  - Negative/argument Test for API - hipGraphKernelNodeSetAttribute +* 1) Pass kernel node as nullptr for Set attribute api and verify +* 2) Pass KernelNodeAttrID as invalid value for Set attribute api and verify +* 3) Pass KernelNodeAttrID as INT_MAX value for Get attribute api and verify +* 4) Pass KernelNodeAttrValue as nullptr for Set attribute api and verify +* 5) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value missProp as hipAccessPropertyPersisting +* 6) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value hitProp as hipAccessPropertyPersisting +* 7) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value accessPolicyWindow.hitRatio as 1.4 +* 8) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value accessPolicyWindow.hitRatio as 0 +* 9) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value accessPolicyWindow.hitRatio as 1 +* 10) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value accessPolicyWindow.hitRatio as -1.8 +* 11) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value accessPolicyWindow.hitRatio as -0.6 +* 12) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6 +* 13) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" +* and pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6 +* 14) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value accessPolicyWindow.num_bytes as 1 MB +* 15) Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow +* and pass value base_ptr as nullptr +* Test source +* ------------------------ +*  - unit/graph/hipGraphKernelNodeSetAttribute.cc +* Test requirements +* ------------------------ +*  - HIP_VERSION >= 5.6 +*/ + +TEST_CASE("Unit_hipGraphKernelNodeSetAttribute_Negative") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + hipGraph_t graph; + hipGraphNode_t memcpy_A, memcpy_B, memcpy_C, kernel_vecAdd; + hipKernelNodeParams kNodeParams{}; + hipStream_t stream; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + size_t NElem{N}; + hipError_t ret; + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpy_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kNodeParams.gridDim = dim3(blocks); + kNodeParams.blockDim = dim3(threadsPerBlock); + kNodeParams.sharedMemBytes = 0; + kNodeParams.kernelParams = reinterpret_cast(kernelArgs); + kNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kNodeParams)); + + hipKernelNodeAttrValue value_in, value_out; + memset(&value_in, 0, sizeof(hipKernelNodeAttrValue)); + memset(&value_out, 0, sizeof(hipKernelNodeAttrValue)); + HIP_CHECK(hipGraphKernelNodeGetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in)); + memcpy(&value_out, &value_in, sizeof(hipKernelNodeAttrValue)); + + SECTION("Pass kernel node as nullptr for Set attribute api") { + ret = hipGraphKernelNodeSetAttribute(nullptr, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as invalid value for Set attribute api") { + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttrID(-1), &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as INT_MAX value for Set attribute api") { + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttrID(INT_MAX), &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } +#if HT_AMD // getting SIGSEGV error in Cuda Setup + SECTION("Pass KernelNodeAttrValue as nullptr for Set attribute api") { + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, nullptr); + REQUIRE(hipErrorInvalidValue == ret); + } +#endif + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value missProp as hipAccessPropertyPersisting") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.missProp = hipAccessPropertyPersisting; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value hitProp as hipAccessPropertyPersisting") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.hitProp = hipAccessPropertyPersisting; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipSuccess == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value accessPolicyWindow.hitRatio as 1.4") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.hitRatio = 1.4; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value accessPolicyWindow.hitRatio as 0") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.hitRatio = 0; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipSuccess == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value accessPolicyWindow.hitRatio as 1") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.hitRatio = 1; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipSuccess == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value accessPolicyWindow.hitRatio as -1.8") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.hitRatio = -1.8; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value accessPolicyWindow.hitRatio as -0.6") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.hitRatio = -0.6; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " & pass accessPolicyWindow.num_bytes as 1024 & hitRatio as 0.6") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.num_bytes = 1024; + value_in.accessPolicyWindow.hitRatio = 0.6; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " & pass accessPolicyWindow.num_bytes as 1 GB & hitRatio as -0.6") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.num_bytes = 1024 * 1024 * 1024; + value_in.accessPolicyWindow.hitRatio = -0.6; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value accessPolicyWindow.num_bytes as 1 MB") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.num_bytes = 1024 * 1024; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass KernelNodeAttrID as hipKernelNodeAttributeAccessPolicyWindow" + " and pass value base_ptr as nullptr") { + memcpy(&value_in, &value_out, sizeof(hipKernelNodeAttrValue)); + value_in.accessPolicyWindow.base_ptr = nullptr; + ret = hipGraphKernelNodeSetAttribute(kernel_vecAdd, + hipKernelNodeAttributeAccessPolicyWindow, &value_in); + REQUIRE(hipSuccess == ret); + } + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(stream)); +} From 6931cb5bfddfac77066e6caca18b080b8770ff7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 22:12:09 +0100 Subject: [PATCH 02/19] EXSWHTEC-189 - Implement new and update existing tests for the hipGraph*MemcpyNode1D family of APIs #14 Change-Id: I0f5e936fee6912ea24cc80c1013cf38ed41ff851 [ROCm/hip-tests commit: efddf0908226a3f65ffaeefbf5414f69a58e5ca7] --- .../catch/include/memcpy1d_tests_common.hh | 5 +- .../hip-tests/catch/unit/graph/CMakeLists.txt | 4 + .../unit/graph/hipGraphAddMemcpyNode1D.cc | 362 ++++++++---------- .../unit/graph/hipGraphAddMemcpyNode1D_old.cc | 242 ++++++++++++ .../hipGraphExecMemcpyNodeSetParams1D.cc | 357 +++++++++-------- .../hipGraphExecMemcpyNodeSetParams1D_old.cc | 201 ++++++++++ .../graph/hipGraphMemcpyNodeSetParams1D.cc | 285 +++++++------- .../hipGraphMemcpyNodeSetParams1D_old.cc | 172 +++++++++ 8 files changed, 1128 insertions(+), 500 deletions(-) create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D_old.cc create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D_old.cc create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D_old.cc diff --git a/projects/hip-tests/catch/include/memcpy1d_tests_common.hh b/projects/hip-tests/catch/include/memcpy1d_tests_common.hh index c14e6db444..d357d992cb 100644 --- a/projects/hip-tests/catch/include/memcpy1d_tests_common.hh +++ b/projects/hip-tests/catch/include/memcpy1d_tests_common.hh @@ -24,10 +24,10 @@ THE SOFTWARE. #include -#include #include -#include +#include #include +#include static inline unsigned int GenerateLinearAllocationFlagCombinations( const LinearAllocs allocation_type) { @@ -141,7 +141,6 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = int can_access_peer = 0; HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); if (!can_access_peer) { - INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device); return; } HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); diff --git a/projects/hip-tests/catch/unit/graph/CMakeLists.txt b/projects/hip-tests/catch/unit/graph/CMakeLists.txt index e1ee3f1907..2d0a3c50ff 100644 --- a/projects/hip-tests/catch/unit/graph/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/graph/CMakeLists.txt @@ -54,6 +54,7 @@ set(TEST_SRC hipGraphAddMemcpyNode1D.cc hipGraphAddChildGraphNode.cc hipGraphNodeGetType.cc + hipGraphExecMemcpyNodeSetParams1D_old.cc hipGraphExecMemcpyNodeSetParams1D.cc hipGraphGetEdges.cc hipGraphGetEdges_old.cc @@ -72,6 +73,8 @@ set(TEST_SRC hipGraphEventWaitNodeGetEvent.cc hipGraphExecMemcpyNodeSetParams.cc hipStreamBeginCapture.cc + hipGraphAddMemcpyNode1D_old.cc + hipGraphAddMemcpyNode1D.cc hipStreamBeginCapture_old.cc hipStreamIsCapturing.cc hipStreamIsCapturing_old.cc) @@ -105,6 +108,7 @@ set(TEST_SRC hipGraphLaunch.cc hipGraphLaunch_old.cc hipGraphMemcpyNodeSetParams1D.cc + hipGraphMemcpyNodeSetParams1D_old.cc hipGraphExecMemcpyNodeSetParamsToSymbol_old.cc hipGraphExecMemcpyNodeSetParamsToSymbol.cc hipGraphNodeGetDependentNodes.cc diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D.cc index 40c8ef1847..83ae815019 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D.cc @@ -6,237 +6,179 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : -Functional - -1) Add 1D memcpy node to graph and verify memcpy operation is success for all memcpy kinds(H2D, D2H and D2D). - Memcpy nodes are added and assigned to default device. -2) Allocate memory on default device(Dev 0), Perform memcpy operation for 1D arrays on Peer device(Dev 1) and - verify the results. -3) Create two host pointers, copy the data between them by the api hipGraphAddMemcpyNode1D with data transfer - kind hipMemcpyHostToHost. Validate the output. - -Negative - -1) Pass pGraphNode as nullptr and check if api returns error. -2) When graph is un-initialized argument(skipping graph creation), api should return error code. -3) Passing pDependencies as nullptr, api should return success. -4) When numDependencies is max(size_t) and pDependencies is not valid ptr, api expected to return error code. -5) When pDependencies is nullptr, but numDependencies is non-zero, api expected to return error. -6) When destination ptr is nullptr, api expected to return error code. -7) When source ptr is nullptr, api expected to return error code. -8) If count is more than allocated size for source and destination ptr, error code is returned. -9) If count is less than or equal to allocated size of source and destination ptr, api should return success. -*/ +#include #include -#include -#include -#include - -static void validateMemcpyNode1DArray(bool peerAccess) { - constexpr int SIZE{32}; - int harray1D[SIZE]{}; - int harray1Dres[SIZE]{}; - hipGraph_t graph; - hipArray_t devArray1, devArray2; - hipGraphNode_t memcpyH2D, memcpyD2H, memcpyD2D; - constexpr int numBytes{SIZE * sizeof(int)}; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipMalloc(&devArray1, numBytes)); - HIP_CHECK(hipMalloc(&devArray2, numBytes)); - - // Initialize 1D object - for (int i = 0; i < SIZE; i++) { - harray1D[i] = i + 1; - } - - HIP_CHECK(hipGraphCreate(&graph, 0)); - - // For peer access test, Memory is allocated on device(0) - // while memcpy nodes are allocated and assigned to peer device(1) - if (peerAccess) { - HIP_CHECK(hipSetDevice(1)); - } - - // Host to Device (harray1D -> devArray1) - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, - devArray1, harray1D, numBytes, hipMemcpyHostToDevice)); - - // Device to Device (devArray1 -> devArray2) - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, graph, &memcpyH2D, 1, - devArray2, devArray1, numBytes, hipMemcpyDeviceToDevice)); - - // Device to host (devArray2 -> harray1Dres) - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &memcpyD2D, 1, - harray1Dres, devArray2, numBytes, hipMemcpyDeviceToHost)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Validate result - for (int i = 0; i < SIZE; i++) { - if (harray1D[i] != harray1Dres[i]) { - INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i] - << " mismatch at : " << i); - REQUIRE(false); - } - } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFree(devArray1)); - HIP_CHECK(hipFree(devArray2)); -} +#include +#include +#include "graph_tests_common.hh" /** - * Functional Tests adds memcpy 1D nodes of types H2D, D2D and D2H to graph - * and verifies execution sequence by launching graph. - * - * For Default device test: Memory allocations and memory operations - * are performed from device(0). - * For Peer device test: Memory allocations happen on device(0) and memcpy operations - * are performed from device(1). + * @addtogroup hipGraphAddMemcpyNode1D hipGraphAddMemcpyNode1D + * @{ + * @ingroup GraphTest + * `hipGraphAddMemcpyNode1D(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t + * *pDependencies, size_t numDependencies, void *dst, const void *src, size_t count, hipMemcpyKind + * kind)` - Creates a 1D memcpy node and adds it to a graph */ -TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") { - SECTION("Memcpy with 1D array on default device") { - validateMemcpyNode1DArray(false); - } - - SECTION("Memcpy with 1D array on peer device") { - int numDevices{}, peerAccess{}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - } - - if (!peerAccess) { - WARN("Skipping test as peer device access is not found!"); - return; - } - validateMemcpyNode1DArray(true); - } -} - - /** - * Negative Test for API hipGraphAddMemcpyNode1D + * Test Description + * ------------------------ + * - Verify basic API behavior. A Memcpy1D node is created with parameters set according to the + * test run, after which the graph is run and the memcpy results are verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - int *A_d, *A_h; - hipGraph_t graph; - hipGraphNode_t memcpyNode{}; - hipError_t ret; +TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Positive_Basic") { + constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, count, direction)); + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&A_h, Nbytes)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; + }; + +#if HT_NVIDIA + MemcpyWithDirectionCommonTests(f); +#else + using namespace std::placeholders; + + SECTION("Device to host") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost)); + } + + SECTION("Device to host with default kind") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + + SECTION("Host to device") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice)); + } + + SECTION("Host to device with default kind") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + +// Disabled on AMD due to defect - EXSWHTEC-209 +#if 0 + SECTION("Host to host") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyHostToHost)); + } + + SECTION("Host to host with default kind") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } +#endif + + SECTION("Device to device") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } + } + + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + } +#endif +} + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# kind is an invalid enum value + * -# count is zero + * -# count is larger than dst allocation size + * -# count is larger than src allocation size + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + int src[2] = {}, dst[2] = {}; - SECTION("Pass pGraphNode as nullptr") { - ret = hipGraphAddMemcpyNode1D(nullptr, graph, - nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + GraphAddNodeCommonNegativeTests( + std::bind(hipGraphAddMemcpyNode1D, _1, _2, _3, _4, dst, src, sizeof(dst), hipMemcpyDefault), + graph); + + MemcpyWithDirectionCommonNegativeTests( + std::bind(hipGraphAddMemcpyNode1D, &node, graph, nullptr, 0, _1, _2, _3, _4), dst, src, + sizeof(dst), hipMemcpyDefault); + +// Disabled on AMD due to defect - EXSWHTEC-211 +#if HT_NVIDIA + SECTION("count == 0") { + HIP_CHECK_ERROR( + hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, 0, hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass graph as nullptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, nullptr, - nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); +#endif + + SECTION("count larger than dst allocation size") { + LinearAllocGuard dev_dst(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dev_dst.ptr(), src, + sizeof(src), hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass pDependencies as nullptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipSuccess == ret); + + SECTION("count larger than src allocation size") { + LinearAllocGuard dev_src(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, dev_src.ptr(), + sizeof(dst), hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass numDependencies is max and pDependencies is not valid ptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, INT_MAX, A_d, A_h, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass pDependencies as nullptr, but numDependencies is non-zero") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, 9, A_d, A_h, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass destination ptr as nullptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, 0, nullptr, A_h, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass source ptr as nullptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, 0, A_d, nullptr, Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass count as more than allocated size for source ptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, 0, A_d, A_h, Nbytes+10, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass count as less than allocated size for destination ptr") { - ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, - nullptr, 0, A_d, A_h, Nbytes-10, hipMemcpyHostToDevice); - REQUIRE(hipSuccess == ret); - } - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(A_h)); + HIP_CHECK(hipGraphDestroy(graph)); } -/* - * Create two host pointers, copy the data between them by the api - * hipGraphAddMemcpyNode1D with data transfer kind hipMemcpyHostToHost. - * Validate the output. -*/ -TEST_CASE("Unit_hipGraphAddMemcpyNode1D_HostToHost") { - constexpr size_t size = 1024; - size_t numBytes{size * sizeof(int)}; - - // Host Vectors - std::vector A_h(size); - std::vector B_h(size); - // Initialization - std::iota(A_h.begin(), A_h.end(), 0); - std::fill_n(B_h.begin(), size, 0); - - hipGraph_t graph; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - hipGraphNode_t memcpyH2H; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipStreamCreate(&streamForGraph)); - - // Host to Host - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0, - B_h.data(), A_h.data(), numBytes, hipMemcpyHostToHost)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - - // Validation - REQUIRE(std::equal(A_h.begin(), A_h.end(), B_h.begin(), B_h.end())); -} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D_old.cc new file mode 100644 index 0000000000..40c8ef1847 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode1D_old.cc @@ -0,0 +1,242 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : +Functional - +1) Add 1D memcpy node to graph and verify memcpy operation is success for all memcpy kinds(H2D, D2H and D2D). + Memcpy nodes are added and assigned to default device. +2) Allocate memory on default device(Dev 0), Perform memcpy operation for 1D arrays on Peer device(Dev 1) and + verify the results. +3) Create two host pointers, copy the data between them by the api hipGraphAddMemcpyNode1D with data transfer + kind hipMemcpyHostToHost. Validate the output. + +Negative - +1) Pass pGraphNode as nullptr and check if api returns error. +2) When graph is un-initialized argument(skipping graph creation), api should return error code. +3) Passing pDependencies as nullptr, api should return success. +4) When numDependencies is max(size_t) and pDependencies is not valid ptr, api expected to return error code. +5) When pDependencies is nullptr, but numDependencies is non-zero, api expected to return error. +6) When destination ptr is nullptr, api expected to return error code. +7) When source ptr is nullptr, api expected to return error code. +8) If count is more than allocated size for source and destination ptr, error code is returned. +9) If count is less than or equal to allocated size of source and destination ptr, api should return success. +*/ + +#include +#include +#include +#include + +static void validateMemcpyNode1DArray(bool peerAccess) { + constexpr int SIZE{32}; + int harray1D[SIZE]{}; + int harray1Dres[SIZE]{}; + hipGraph_t graph; + hipArray_t devArray1, devArray2; + hipGraphNode_t memcpyH2D, memcpyD2H, memcpyD2D; + constexpr int numBytes{SIZE * sizeof(int)}; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipMalloc(&devArray1, numBytes)); + HIP_CHECK(hipMalloc(&devArray2, numBytes)); + + // Initialize 1D object + for (int i = 0; i < SIZE; i++) { + harray1D[i] = i + 1; + } + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // For peer access test, Memory is allocated on device(0) + // while memcpy nodes are allocated and assigned to peer device(1) + if (peerAccess) { + HIP_CHECK(hipSetDevice(1)); + } + + // Host to Device (harray1D -> devArray1) + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, + devArray1, harray1D, numBytes, hipMemcpyHostToDevice)); + + // Device to Device (devArray1 -> devArray2) + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2D, graph, &memcpyH2D, 1, + devArray2, devArray1, numBytes, hipMemcpyDeviceToDevice)); + + // Device to host (devArray2 -> harray1Dres) + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H, graph, &memcpyD2D, 1, + harray1Dres, devArray2, numBytes, hipMemcpyDeviceToHost)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate result + for (int i = 0; i < SIZE; i++) { + if (harray1D[i] != harray1Dres[i]) { + INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i] + << " mismatch at : " << i); + REQUIRE(false); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(devArray1)); + HIP_CHECK(hipFree(devArray2)); +} + + +/** + * Functional Tests adds memcpy 1D nodes of types H2D, D2D and D2H to graph + * and verifies execution sequence by launching graph. + * + * For Default device test: Memory allocations and memory operations + * are performed from device(0). + * For Peer device test: Memory allocations happen on device(0) and memcpy operations + * are performed from device(1). + */ +TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Functional") { + SECTION("Memcpy with 1D array on default device") { + validateMemcpyNode1DArray(false); + } + + SECTION("Memcpy with 1D array on peer device") { + int numDevices{}, peerAccess{}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + } + + if (!peerAccess) { + WARN("Skipping test as peer device access is not found!"); + return; + } + validateMemcpyNode1DArray(true); + } +} + + + +/** + * Negative Test for API hipGraphAddMemcpyNode1D + */ +TEST_CASE("Unit_hipGraphAddMemcpyNode1D_Negative") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + int *A_d, *A_h; + hipGraph_t graph; + hipGraphNode_t memcpyNode{}; + hipError_t ret; + + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&A_h, Nbytes)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + SECTION("Pass pGraphNode as nullptr") { + ret = hipGraphAddMemcpyNode1D(nullptr, graph, + nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass graph as nullptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, nullptr, + nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass pDependencies as nullptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, 0, A_d, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipSuccess == ret); + } + SECTION("Pass numDependencies is max and pDependencies is not valid ptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, INT_MAX, A_d, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass pDependencies as nullptr, but numDependencies is non-zero") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, 9, A_d, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass destination ptr as nullptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, 0, nullptr, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass source ptr as nullptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, 0, A_d, nullptr, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass count as more than allocated size for source ptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, 0, A_d, A_h, Nbytes+10, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass count as less than allocated size for destination ptr") { + ret = hipGraphAddMemcpyNode1D(&memcpyNode, graph, + nullptr, 0, A_d, A_h, Nbytes-10, hipMemcpyHostToDevice); + REQUIRE(hipSuccess == ret); + } + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(A_h)); + HIP_CHECK(hipGraphDestroy(graph)); +} +/* + * Create two host pointers, copy the data between them by the api + * hipGraphAddMemcpyNode1D with data transfer kind hipMemcpyHostToHost. + * Validate the output. +*/ +TEST_CASE("Unit_hipGraphAddMemcpyNode1D_HostToHost") { + constexpr size_t size = 1024; + size_t numBytes{size * sizeof(int)}; + + // Host Vectors + std::vector A_h(size); + std::vector B_h(size); + // Initialization + std::iota(A_h.begin(), A_h.end(), 0); + std::fill_n(B_h.begin(), size, 0); + + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + hipGraphNode_t memcpyH2H; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + // Host to Host + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0, + B_h.data(), A_h.data(), numBytes, hipMemcpyHostToHost)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + + // Validation + REQUIRE(std::equal(A_h.begin(), A_h.end(), B_h.begin(), B_h.end())); +} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc index 03e97d32e6..9a01d6d0ae 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc @@ -6,8 +6,10 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,182 +19,235 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* -Testcase Scenarios : -Functional- -1) Instantiate a graph with memcpy node, obtain executable graph and update the - node params with set exec api call. Make sure they are taking effect. -Negative- -1) Pass hGraphExec as nullptr and check if api returns error. -2) Pass GraphNode as nullptr and check if api returns error. -3) Pass destination ptr is nullptr, api expected to return error code. -4) Pass source ptr is nullptr, api expected to return error code. -5) Pass count as zero, api expected to return error code. -6) Pass same pointer as source ptr and destination ptr, api expected to return error code. -7) Pass overlap memory address as source ptr and destination ptr, api expected to return error code. -7) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code. -8) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code. -9) If count is more than allocated size for source and destination ptr, api should return error code. -10) If count is less than allocated size for source and destination ptr, api should return error code. -11) Change the hipMemcpyKind from H2D to D2H but allocate pointer memory for H2D, api should return error code. -*/ +#include #include -#include -#include +#include +#include -/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Negative scenarios. +#include "graph_tests_common.hh" + +/** + * @addtogroup hipGraphExecMemcpyNodeSetParams1D hipGraphExecMemcpyNodeSetParams1D + * @{ + * @ingroup GraphTest + * `hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipGraphNode_t node, void *dst, + * const void *src, size_t count, hipMemcpyKind kind)` - Sets the parameters for a memcpy node in + * the given graphExec to perform a 1-dimensional copy */ -TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - int *A_d; - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - int *A_h = reinterpret_cast(malloc(Nbytes)); - REQUIRE(A_h != nullptr); - memset(A_h, 0, Nbytes); +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values in the executable graph. The + * executable graph is run and the results of the memcpy verified. The test is run for all possible + * memcpy directions, with both the corresponding memcpy kind and hipMemcpyDefault, as well as half + * page and full page allocation sizes. Test source + * ------------------------ + * - unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Positive_Basic") { + constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + const auto offset_src = reinterpret_cast(src) + 1; + const auto offset_dst = reinterpret_cast(dst) + 1; + HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, offset_dst, offset_src, count - 1, + direction)); + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, count, direction)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); - hipError_t ret; - hipGraphNode_t memcpyH2D; - hipGraph_t graph; - hipGraphExec_t graphExec; + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + return hipSuccess; + }; + +#if HT_NVIDIA + MemcpyWithDirectionCommonTests(f); +#else + using namespace std::placeholders; + + SECTION("Device to host") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost)); + } + + SECTION("Host to device") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice)); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } + } + + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + } + +// Disabled on AMD due to defect - EXSWHTEC-209 +#if 0 + SECTION("Host to host") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyHostToHost)); + } + + SECTION("Host to host with default kind") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } +#endif + +// Disabled on AMD due to defect - EXSWHTEC-210 +#if 0 + SECTION("Device to host with default kind") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + + SECTION("Host to device with default kind") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } +#endif + +#endif +} + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# pGraphExec is nullptr + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# kind is an invalid enum value + * -# count is zero + * -# count is larger than dst allocation size + * -# count is larger than src allocation size + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); - // Instantiate the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - SECTION("Pass hGraphExec as nullptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(nullptr, memcpyH2D, A_d, A_h, - Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + int src[2] = {}, dst[2] = {}; + + hipGraphNode_t node = nullptr; + HIP_CHECK( + hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(dst), hipMemcpyDefault)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + SECTION("pGraphExec == nullptr") { + HIP_CHECK_ERROR( + hipGraphExecMemcpyNodeSetParams1D(nullptr, node, dst, src, sizeof(dst), hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass GraphNode as nullptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, nullptr, A_d, A_h, - Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, nullptr, dst, src, sizeof(dst), + hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass destination ptr is nullptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, nullptr, A_h, - Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + MemcpyWithDirectionCommonNegativeTests( + std::bind(hipGraphExecMemcpyNodeSetParams1D, graph_exec, node, _1, _2, _3, _4), dst, src, + sizeof(dst), hipMemcpyDefault); + + SECTION("count == 0") { + HIP_CHECK_ERROR( + hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, 0, hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass source ptr is nullptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, nullptr, - Nbytes, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("count larger than dst allocation size") { + LinearAllocGuard dev_dst(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dev_dst.ptr(), src, + sizeof(src), hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass count as zero") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h, - 0, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("count larger than src allocation size") { + LinearAllocGuard dev_src(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, dev_src.ptr(), + sizeof(dst), hipMemcpyDefault), + hipErrorInvalidValue); } - SECTION("Pass same pointer as source ptr and destination ptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_d, - Nbytes, hipMemcpyDeviceToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass overlap memory where destination ptr is ahead of source ptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_d-5, - Nbytes, hipMemcpyDeviceToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass overlap memory where source ptr is ahead of destination ptr") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d+5, A_d, - Nbytes, hipMemcpyDeviceToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Copy more than allocated memory") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h, - Nbytes+8, hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Copy less than allocated memory") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h, - Nbytes-8, hipMemcpyHostToDevice); - REQUIRE(hipSuccess == ret); - } - SECTION("Change the hipMemcpyKind from H2D to D2H") { - ret = hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d, A_h, - Nbytes, hipMemcpyDeviceToHost); - REQUIRE(hipSuccess != ret); - } - HIP_CHECK(hipFree(A_d)); - free(A_h); - HIP_CHECK(hipGraphExecDestroy(graphExec)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); HIP_CHECK(hipGraphDestroy(graph)); } -/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Functional scenarios. +/** + * Test Description + * ------------------------ + * - Verify that memcpy direction cannot be altered in an executable graph. The test is run for + * all memcpy directions with appropriate memory allocations. + * Test source + * ------------------------ + * - unit/graph/hipGraphExecMemcpyNodeSetParams1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t NElem{N}; +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative_Changing_Memcpy_Direction") { + int host; + LinearAllocGuard dev(LinearAllocs::hipMalloc, sizeof(int)); - int *hData = reinterpret_cast(malloc(Nbytes)); - REQUIRE(hData != nullptr); - memset(hData, 0, Nbytes); - - hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; - hipGraphNode_t kernel_vecAdd; - hipKernelNodeParams kernelNodeParams{}; - hipGraph_t graph; - hipGraphExec_t graphExec; - hipStream_t streamForGraph; - - HIP_CHECK(hipStreamCreate(&streamForGraph)); - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + const auto [dir, src, dst] = + GENERATE_REF(std::make_tuple(hipMemcpyHostToHost, &host, &host), + std::make_tuple(hipMemcpyHostToDevice, &host, dev.ptr()), + std::make_tuple(hipMemcpyDeviceToHost, dev.ptr(), &host), + std::make_tuple(hipMemcpyDeviceToDevice, dev.ptr(), dev.ptr())); + hipGraph_t graph = nullptr; HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(int), dir)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); + const auto set_dir = GENERATE(hipMemcpyHostToHost, hipMemcpyHostToDevice, hipMemcpyDeviceToHost, + hipMemcpyDeviceToDevice, hipMemcpyDefault); + if (dir == set_dir) { + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + return; + } - void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kernelNodeParams)); + HIP_CHECK_ERROR( + hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, sizeof(int), set_dir), + hipErrorInvalidValue); - // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); - - // Instantiate the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - - HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyD2H_C, hData, - C_d, Nbytes, hipMemcpyDeviceToHost)); - - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Verify graph execution result - HipTest::checkVectorADD(A_h, B_h, hData, N); - - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); HIP_CHECK(hipGraphDestroy(graph)); - free(hData); } diff --git a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D_old.cc new file mode 100644 index 0000000000..0a28e6f31e --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams1D_old.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +Testcase Scenarios : +Functional- +1) Instantiate a graph with memcpy node, obtain executable graph and update the + node params with set exec api call. Make sure they are taking effect. +Negative- +1) Pass hGraphExec as nullptr and check if api returns error. +2) Pass GraphNode as nullptr and check if api returns error. +3) Pass destination ptr is nullptr, api expected to return error code. +4) Pass source ptr is nullptr, api expected to return error code. +5) Pass count as zero, api expected to return error code. +6) Pass same pointer as source ptr and destination ptr, api expected to return error code. +7) Pass overlap memory address as source ptr and destination ptr, api expected to return error code. +7) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code. +8) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code. +9) If count is more than allocated size for source and destination ptr, api should return error code. +10) If count is less than allocated size for source and destination ptr, api should return error code. +11) Change the hipMemcpyKind from H2D to D2H but allocate pointer memory for H2D, api should return error code. +*/ + +#include +#include +#include +#include + +/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Functional scenarios. + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + size_t NElem{N}; + + int *hData = reinterpret_cast(malloc(Nbytes)); + REQUIRE(hData != nullptr); + memset(hData, 0, Nbytes); + + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd; + hipKernelNodeParams kernelNodeParams{}; + hipGraph_t graph; + hipGraphExec_t graphExec; + hipStream_t streamForGraph; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, + Nbytes, hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, + Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, + &kernelNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + + // Instantiate the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyD2H_C, hData, + C_d, Nbytes, hipMemcpyDeviceToHost)); + + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify graph execution result + HipTest::checkVectorADD(A_h, B_h, hData, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphDestroy(graph)); + free(hData); +} + +/* Test verifies hipGraphExecMemcpyNodeSetParams1D API Negative scenarios. + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + + LinearAllocGuard A_d(LinearAllocs::hipMalloc, Nbytes); + LinearAllocGuard A_h(LinearAllocs::malloc, Nbytes); + memset(A_h.ptr(), 0, Nbytes); + + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memcpyH2D; + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D, graph, nullptr, 0, A_d.ptr(), A_h.ptr(), + Nbytes, hipMemcpyHostToDevice)); + // Instantiate the graph + hipGraphExec_t graphExec; + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + + SECTION("Pass hGraphExec as nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(nullptr, memcpyH2D, A_d.ptr(), + A_h.ptr(), Nbytes, + hipMemcpyHostToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass GraphNode as nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, nullptr, A_d.ptr(), + A_h.ptr(), Nbytes, + hipMemcpyHostToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass destination ptr is nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, + nullptr, A_h.ptr(), Nbytes, + hipMemcpyHostToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass source ptr is nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(), + nullptr, Nbytes, + hipMemcpyHostToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass count as zero") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(), + A_h.ptr(), 0, + hipMemcpyHostToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass same pointer as source ptr and destination ptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(), + A_d.ptr(), Nbytes, + hipMemcpyDeviceToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass overlap memory where destination ptr is ahead of source ptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(), + A_d.ptr() - 5, Nbytes, + hipMemcpyDeviceToDevice), + hipErrorInvalidValue); + } + + SECTION("Pass overlap memory where source ptr is ahead of destination ptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, + A_d.ptr() + 5, A_d.ptr(), Nbytes, + hipMemcpyDeviceToDevice), + hipErrorInvalidValue); + } + + SECTION("Copy more than allocated memory") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams1D(graphExec, memcpyH2D, A_d.ptr(), + A_h.ptr(), Nbytes + 8, + hipMemcpyHostToDevice), + hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D.cc index 86e439e528..fa22c6ee2c 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D.cc @@ -6,8 +6,10 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,169 +19,180 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : -Functional- -1) Create a graph, add Memcpy node to graph, update the Memcpy node params with set and make sure they are taking effect. -Negative- -1) Pass pGraphNode as nullptr and check if api returns error. -2) Pass destination ptr is nullptr, api expected to return error code. -3) Pass source ptr is nullptr, api expected to return error code. -4) Pass count as zero, api expected to return error code. -5) Pass same pointer as source ptr and destination ptr, api expected to return error code. -6) Pass overlap memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected to return error code. -7) Pass overlap memory as source ptr and destination ptr where destination ptr is ahead of source ptr, api expected to return error code. -8) If count is more than allocated size for source and destination ptr, api should return error code. -9) If count is less than allocated size for source and destination ptr, api should return error code. -*/ +#include #include -#include -#include +#include +#include -/* Test verifies hipGraphMemcpyNodeSetParams1D API Negative scenarios. +#include "graph_tests_common.hh" + +static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction) { + switch (direction) { + case hipMemcpyHostToDevice: + return hipMemcpyDeviceToHost; + case hipMemcpyDeviceToHost: + return hipMemcpyHostToDevice; + default: + return direction; + } +}; + +/** + * @addtogroup hipGraphMemcpyNodeSetParams1D hipGraphMemcpyNodeSetParams1D + * @{ + * @ingroup GraphTest + * `hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void *dst, const void *src, size_t count, + * hipMemcpyKind kind)` - Sets a memcpy node's parameters to perform a 1-dimensional copy */ -TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - int *A_d, *A_h; - hipGraphNode_t memcpyNode{}; - hipError_t ret; - HIP_CHECK(hipMalloc(&A_d, Nbytes)); - HIP_CHECK(hipMalloc(&A_h, Nbytes)); +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values after which the graph is + * executed and the results of the memcpy verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipGraphMemcpyNodeSetParams1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Positive_Basic") { + constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, src, dst, count / 2, + ReverseMemcpyDirection(direction))); + HIP_CHECK(hipGraphMemcpyNodeSetParams1D(node, dst, src, count, direction)); + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); - hipGraph_t graph; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); - SECTION("Pass pGraphNode as nullptr") { - ret = hipGraphMemcpyNodeSetParams1D(nullptr, A_d, A_h, Nbytes, - hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + return hipSuccess; + }; + +#if HT_NVIDIA + MemcpyWithDirectionCommonTests(f); +#else + using namespace std::placeholders; + + SECTION("Device to host") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost)); } - SECTION("Pass destination ptr is nullptr") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, nullptr, A_h, Nbytes, - hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("Host to device") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice)); } - SECTION("Pass source ptr is nullptr") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, nullptr, Nbytes, - hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("Device to device") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } } - SECTION("Pass count as zero") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, 0, - hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); + + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } } -#if HT_AMD - SECTION("Pass same pointer as source ptr and destination ptr") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d, Nbytes, - hipMemcpyDeviceToDevice); - REQUIRE(hipErrorInvalidValue == ret); + +// Disabled on AMD due to defect - EXSWHTEC-209 +#if 0 + SECTION("Host to host") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyHostToHost)); + } + + SECTION("Host to host with default kind") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); } #endif - SECTION("Pass overlap memory where destination ptr is ahead of source ptr") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d-5, Nbytes, - hipMemcpyDeviceToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass overlap memory where source ptr is ahead of destination ptr") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d+5, A_d, Nbytes-5, - hipMemcpyDeviceToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Copy more than allocated memory") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes+8, - hipMemcpyHostToDevice); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Copy less than allocated memory") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes-8, - hipMemcpyHostToDevice); - REQUIRE(hipSuccess == ret); - } - SECTION("Change the kind from H2D to D2H") { - ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes, - hipMemcpyDeviceToHost); - REQUIRE(hipSuccess == ret); + +// Disabled on AMD due to defect - EXSWHTEC-210 +#if 0 + SECTION("Device to host with default kind") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); } - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(A_h)); - HIP_CHECK(hipGraphDestroy(graph)); + SECTION("Host to device with default kind") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } +#endif + +#endif } -/* Test verifies hipGraphMemcpyNodeSetParams1D API Functional scenarios. +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# dst is nullptr + * -# src is nullptr + * -# kind is an invalid enum value + * -# count is zero + * -# count is larger than dst allocation size + * -# count is larger than src allocation size + * Test source + * ------------------------ + * - unit/graph/hipGraphMemcpyNodeSetParams1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Functional") { - constexpr size_t N = 1024; - constexpr size_t Nbytes = N * sizeof(int); - constexpr auto blocksPerCU = 6; // to hide latency - constexpr auto threadsPerBlock = 256; - int *A_d, *B_d, *C_d; - int *A_h, *B_h, *C_h; - size_t NElem{N}; - - int *hData = reinterpret_cast(malloc(Nbytes)); - REQUIRE(hData != nullptr); - memset(hData, 0, Nbytes); - - hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; - hipGraphNode_t kernel_vecAdd; - hipKernelNodeParams kernelNodeParams{}; - hipGraph_t graph; - hipGraphExec_t graphExec; - hipStream_t streamForGraph; - - HIP_CHECK(hipStreamCreate(&streamForGraph)); - - HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); - unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); - +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, - Nbytes, hipMemcpyHostToDevice)); + int src[2] = {}, dst[2] = {}; - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, - Nbytes, hipMemcpyHostToDevice)); + hipGraphNode_t node = nullptr; + HIP_CHECK( + hipGraphAddMemcpyNode1D(&node, graph, nullptr, 0, dst, src, sizeof(dst), hipMemcpyDefault)); - HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, - Nbytes, hipMemcpyDeviceToHost)); - HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, hData, C_d, Nbytes, - hipMemcpyDeviceToHost)); + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams1D(nullptr, dst, src, sizeof(dst), hipMemcpyDefault), + hipErrorInvalidValue); + } - void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; - kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); - kernelNodeParams.gridDim = dim3(blocks); - kernelNodeParams.blockDim = dim3(threadsPerBlock); - kernelNodeParams.sharedMemBytes = 0; - kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); - kernelNodeParams.extra = nullptr; - HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, - &kernelNodeParams)); + MemcpyWithDirectionCommonNegativeTests( + std::bind(hipGraphMemcpyNodeSetParams1D, node, _1, _2, _3, _4), dst, src, sizeof(dst), + hipMemcpyDefault); - // Create dependencies - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); - HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + SECTION("count == 0") { + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams1D(node, dst, src, 0, hipMemcpyDefault), + hipErrorInvalidValue); + } - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); + SECTION("count larger than dst allocation size") { + LinearAllocGuard dev_dst(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK_ERROR( + hipGraphMemcpyNodeSetParams1D(node, dev_dst.ptr(), src, sizeof(src), hipMemcpyDefault), + hipErrorInvalidValue); + } - // Verify graph execution result - HipTest::checkVectorADD(A_h, B_h, hData, N); + SECTION("count larger than src allocation size") { + LinearAllocGuard dev_src(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK_ERROR( + hipGraphMemcpyNodeSetParams1D(node, dst, dev_src.ptr(), sizeof(dst), hipMemcpyDefault), + hipErrorInvalidValue); + } - HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); HIP_CHECK(hipGraphDestroy(graph)); - free(hData); } - diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D_old.cc new file mode 100644 index 0000000000..414eda51e9 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams1D_old.cc @@ -0,0 +1,172 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : +Functional- +1) Create a graph, add Memcpy node to graph, update the Memcpy node params with set and make sure +they are taking effect. Negative- 1) Pass pGraphNode as nullptr and check if api returns error. 2) +Pass destination ptr is nullptr, api expected to return error code. 3) Pass source ptr is nullptr, +api expected to return error code. 4) Pass count as zero, api expected to return error code. 5) Pass +same pointer as source ptr and destination ptr, api expected to return error code. 6) Pass overlap +memory as source ptr and destination ptr where source ptr is ahead of destination ptr, api expected +to return error code. 7) Pass overlap memory as source ptr and destination ptr where destination ptr +is ahead of source ptr, api expected to return error code. 8) If count is more than allocated size +for source and destination ptr, api should return error code. 9) If count is less than allocated +size for source and destination ptr, api should return error code. +*/ + +#include +#include +#include + +/* Test verifies hipGraphMemcpyNodeSetParams1D API Negative scenarios. + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Negative") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + int *A_d, *A_h; + hipGraphNode_t memcpyNode{}; + hipError_t ret; + + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipMalloc(&A_h, Nbytes)); + + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyNode, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + + SECTION("Pass pGraphNode as nullptr") { + ret = hipGraphMemcpyNodeSetParams1D(nullptr, A_d, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass destination ptr is nullptr") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, nullptr, A_h, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass source ptr is nullptr") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, nullptr, Nbytes, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass count as zero") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, 0, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } +#if HT_AMD + SECTION("Pass same pointer as source ptr and destination ptr") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d, Nbytes, hipMemcpyDeviceToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } +#endif + SECTION("Pass overlap memory where destination ptr is ahead of source ptr") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_d - 5, Nbytes, hipMemcpyDeviceToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass overlap memory where source ptr is ahead of destination ptr") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d + 5, A_d, Nbytes - 5, + hipMemcpyDeviceToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Copy more than allocated memory") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes + 8, hipMemcpyHostToDevice); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Copy less than allocated memory") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes - 8, hipMemcpyHostToDevice); + REQUIRE(hipSuccess == ret); + } + SECTION("Change the kind from H2D to D2H") { + ret = hipGraphMemcpyNodeSetParams1D(memcpyNode, A_d, A_h, Nbytes, hipMemcpyDeviceToHost); + REQUIRE(hipSuccess == ret); + } + + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(A_h)); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/* Test verifies hipGraphMemcpyNodeSetParams1D API Functional scenarios. + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams1D_Functional") { + constexpr size_t N = 1024; + constexpr size_t Nbytes = N * sizeof(int); + constexpr auto blocksPerCU = 6; // to hide latency + constexpr auto threadsPerBlock = 256; + int *A_d, *B_d, *C_d; + int *A_h, *B_h, *C_h; + size_t NElem{N}; + + int* hData = reinterpret_cast(malloc(Nbytes)); + REQUIRE(hData != nullptr); + memset(hData, 0, Nbytes); + + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, memcpyD2H_C; + hipGraphNode_t kernel_vecAdd; + hipKernelNodeParams kernelNodeParams{}; + hipGraph_t graph; + hipGraphExec_t graphExec; + hipStream_t streamForGraph; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, Nbytes, + hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, graph, nullptr, 0, B_d, B_h, Nbytes, + hipMemcpyHostToDevice)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_C, graph, nullptr, 0, C_h, C_d, Nbytes, + hipMemcpyDeviceToHost)); + + HIP_CHECK(hipGraphMemcpyNodeSetParams1D(memcpyD2H_C, hData, C_d, Nbytes, hipMemcpyDeviceToHost)); + + void* kernelArgs2[] = {&A_d, &B_d, &C_d, reinterpret_cast(&NElem)}; + kernelNodeParams.func = reinterpret_cast(HipTest::vectorADD); + kernelNodeParams.gridDim = dim3(blocks); + kernelNodeParams.blockDim = dim3(threadsPerBlock); + kernelNodeParams.sharedMemBytes = 0; + kernelNodeParams.kernelParams = reinterpret_cast(kernelArgs2); + kernelNodeParams.extra = nullptr; + HIP_CHECK(hipGraphAddKernelNode(&kernel_vecAdd, graph, nullptr, 0, &kernelNodeParams)); + + // Create dependencies + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_B, &kernel_vecAdd, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &kernel_vecAdd, &memcpyD2H_C, 1)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify graph execution result + HipTest::checkVectorADD(A_h, B_h, hData, N); + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipGraphDestroy(graph)); + free(hData); +} From 25d0b6864026f4aa11255cd5e400af878619f3da Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 28 Dec 2023 21:24:09 +0000 Subject: [PATCH 03/19] EXSWHTEC-109 - Implement tests for the hipModuleLoad family of APIs and hipModuleUnload #20 Change-Id: I19d6534af2c33046dc3862372d45f32b6ccc6ad1 [ROCm/hip-tests commit: 2c6d940ac05b4981acb4e9aa70c84d0de2443e0a] --- .../catch/hipTestMain/config/config_amd_linux | 5 ++ .../hipTestMain/config/config_amd_windows | 7 ++ .../config/config_nvidia_linux.json | 1 + .../config/config_nvidia_windows.json | 1 + projects/hip-tests/catch/unit/CMakeLists.txt | 5 +- .../catch/unit/module/CMakeLists.txt | 32 ++++++++- .../catch/unit/module/empty_module.cc | 20 ++++++ .../catch/unit/module/hipModuleLoad.cc | 59 +++++++++++++++ .../catch/unit/module/hipModuleLoadData.cc | 67 +++++++++++++++++ .../catch/unit/module/hipModuleLoadDataEx.cc | 69 ++++++++++++++++++ .../catch/unit/module/hipModuleUnload.cc | 38 ++++++++++ .../catch/unit/module/hip_module_common.cc | 71 +++++++++++++++++++ .../catch/unit/module/hip_module_common.hh | 57 +++++++++++++++ .../catch/unit/module/not_a_module.txt | 0 14 files changed, 426 insertions(+), 6 deletions(-) create mode 100644 projects/hip-tests/catch/unit/module/empty_module.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleLoad.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleLoadData.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleLoadDataEx.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleUnload.cc create mode 100644 projects/hip-tests/catch/unit/module/hip_module_common.cc create mode 100644 projects/hip-tests/catch/unit/module/hip_module_common.hh create mode 100644 projects/hip-tests/catch/unit/module/not_a_module.txt diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index 5c4cce0510..ddb82cfb77 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -257,6 +257,11 @@ "Unit_Device_Complex_hipCfma_Negative_Parameters_RTC", "Unit_Device_make_Complex_Negative_Parameters_RTC", "Unit_Device_Complex_Cast_Negative_Parameters_RTC", + "Note: Test disabled due to defect - EXSWHTEC-151", + "Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module", + "Note: Following two tests disabled due to defect - EXSWHTEC-153", + "Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String", + "Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String", #endif #if defined VEGA20 "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 9b2639734a..25436e508f 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -356,6 +356,13 @@ "Unit_hipGetMipmappedArrayLevel_Negative", "Unit_hipFreeMipmappedArray_Negative_DoubleFree", "Unit_hipFreeMipmappedArrayMultiTArray - int", + "Note: Test disabled due to defect - EXSWHTEC-151", + "Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module", + "Note: Test disabled due to defect - EXSWHTEC-152", + "Unit_hipModuleUnload_Negative_Module_Is_Nullptr", + "Note: Following two tests disabled due to defect - EXSWHTEC-153", + "Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String", + "Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String", #endif "End of json" ] diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json index f5d75df1b1..79d9d577ee 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux.json @@ -53,6 +53,7 @@ "Unit_atomicExch_system_Positive_Host_And_GPU - unsigned int", "Unit_atomicExch_system_Positive_Host_And_GPU - unsigned long long", "Unit_atomicExch_system_Positive_Host_And_GPU - float", + "Unit_hipModuleUnload_Negative_Double_Unload", "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===", "Unit_Device_Complex_Unary_Negative_Parameters_RTC", "Unit_Device_Complex_Binary_Negative_Parameters_RTC", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows.json index 4bd8c6dde7..3ae7e43a08 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows.json +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows.json @@ -16,6 +16,7 @@ "Unit_ChannelDescriptor_Positive_Basic_3D - long3", "Unit_ChannelDescriptor_Positive_Basic_4D - ulong4", "Unit_ChannelDescriptor_Positive_Basic_4D - long4", + "Unit_hipModuleUnload_Negative_Double_Unload", "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/356 ===", "Unit_Device_Complex_Unary_Negative_Parameters_RTC", "Unit_Device_Complex_Binary_Negative_Parameters_RTC", diff --git a/projects/hip-tests/catch/unit/CMakeLists.txt b/projects/hip-tests/catch/unit/CMakeLists.txt index 8fac51f5c6..ab39835390 100644 --- a/projects/hip-tests/catch/unit/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/CMakeLists.txt @@ -35,12 +35,11 @@ add_subdirectory(multiThread) add_subdirectory(compiler) add_subdirectory(errorHandling) add_subdirectory(cooperativeGrps) -add_subdirectory(warp) add_subdirectory(context) -add_subdirectory(device_memory) +add_subdirectory(module) +add_subdirectory(warp) add_subdirectory(dynamicLoading) add_subdirectory(g++) -add_subdirectory(module) add_subdirectory(channelDescriptor) add_subdirectory(executionControl) add_subdirectory(vector_types) diff --git a/projects/hip-tests/catch/unit/module/CMakeLists.txt b/projects/hip-tests/catch/unit/module/CMakeLists.txt index afdf8c4f0f..262f8ff7dd 100644 --- a/projects/hip-tests/catch/unit/module/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/module/CMakeLists.txt @@ -19,14 +19,29 @@ # SOFTWARE. # Common Tests - Test independent of all platforms -if(HIP_PLATFORM MATCHES "amd") set(TEST_SRC - hipExtModuleLaunchKernel.cc + hip_module_common.cc + hipModuleLoad.cc + hipModuleLoadData.cc + hipModuleLoadDataEx.cc + hipModuleUnload.cc ) +add_custom_target(empty_module.code + COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} + ${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc + -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/module/empty_module.code + -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ + -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) + # Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906" # having space at the start/end of OFFLOAD_ARCH_STR can cause build failures +if(HIP_PLATFORM MATCHES "amd") +set(TEST_SRC + ${TEST_SRC} + hipExtModuleLaunchKernel.cc) + add_custom_target(copyKernel.code COMMAND ${CMAKE_CXX_COMPILER} -mcode-object-version=5 --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/copyKernel.cc @@ -100,11 +115,22 @@ add_custom_target(copiousArgKernel17.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) endif() +endif() +if(HIP_PLATFORM MATCHES "amd") + set(RTCLIB "hiprtc") +else() + set(RTCLIB "nvrtc") +endif() hip_add_exe_to_target(NAME ModuleTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) + TEST_TARGET_NAME build_tests + LINKER_LIBS ${RTCLIB} + COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) +add_dependencies(build_tests empty_module.code) + +if(HIP_PLATFORM MATCHES "amd") add_dependencies(build_tests copyKernel.code copyKernel.s) if(UNIX) add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code diff --git a/projects/hip-tests/catch/unit/module/empty_module.cc b/projects/hip-tests/catch/unit/module/empty_module.cc new file mode 100644 index 0000000000..d77f3e5733 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/empty_module.cc @@ -0,0 +1,20 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleLoad.cc b/projects/hip-tests/catch/unit/module/hipModuleLoad.cc new file mode 100644 index 0000000000..5812c6a698 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleLoad.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +TEST_CASE("Unit_hipModuleLoad_Positive_Basic") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module = nullptr; + HIP_CHECK(hipModuleLoad(&module, "empty_module.code")); + REQUIRE(module != nullptr); + HIP_CHECK(hipModuleUnload(module)); +} + +TEST_CASE("Unit_hipModuleLoad_Negative_Parameters") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module; + + SECTION("module == nullptr") { + HIP_CHECK_ERROR(hipModuleLoad(nullptr, "empty_module.code"), hipErrorInvalidValue); + } + + SECTION("fname == nullptr") { + HIP_CHECK_ERROR(hipModuleLoad(&module, nullptr), hipErrorInvalidValue); + } + + SECTION("fname == empty string") { + HIP_CHECK_ERROR(hipModuleLoad(&module, ""), hipErrorInvalidValue); + } + + SECTION("fname == non existent file") { + HIP_CHECK_ERROR(hipModuleLoad(&module, "non existent file"), hipErrorFileNotFound); + } +} + +TEST_CASE("Unit_hipModuleLoad_Negative_Load_From_A_File_That_Is_Not_A_Module") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module; + + HIP_CHECK_ERROR(hipModuleLoad(&module, "not_a_module.txt"), hipErrorInvalidImage); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc b/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc new file mode 100644 index 0000000000..4d364f1d33 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleLoadData.cc @@ -0,0 +1,67 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include + + +TEST_CASE("Unit_hipModuleLoadData_Positive_Basic") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module = nullptr; + + SECTION("Load compiled module from file") { + const auto loaded_module = LoadModuleIntoBuffer("empty_module.code"); + HIP_CHECK(hipModuleLoadData(&module, loaded_module.data())); + REQUIRE(module != nullptr); + HIP_CHECK(hipModuleUnload(module)); + } + + SECTION("Load RTCd module") { + const auto rtc = CreateRTCCharArray(R"(extern "C" __global__ void kernel() {})"); + HIP_CHECK(hipModuleLoadData(&module, rtc.data())); + REQUIRE(module != nullptr); + HIP_CHECK(hipModuleUnload(module)); + } +} + +TEST_CASE("Unit_hipModuleLoadData_Negative_Parameters") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module; + + SECTION("module == nullptr") { + const auto loaded_module = LoadModuleIntoBuffer("empty_module.code"); + HIP_CHECK_ERROR(hipModuleLoadData(nullptr, loaded_module.data()), hipErrorInvalidValue); + LoadModuleIntoBuffer("empty_module.code"); + } + + SECTION("image == nullptr") { + HIP_CHECK_ERROR(hipModuleLoadData(&module, nullptr), hipErrorInvalidValue); + } +} + +TEST_CASE("Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module; + + HIP_CHECK_ERROR(hipModuleLoadData(&module, ""), hipErrorInvalidImage); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleLoadDataEx.cc b/projects/hip-tests/catch/unit/module/hipModuleLoadDataEx.cc new file mode 100644 index 0000000000..2ee3f833fe --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleLoadDataEx.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include + + +TEST_CASE("Unit_hipModuleLoadDataEx_Positive_Basic") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module = nullptr; + + SECTION("Load compiled module from file") { + const auto loaded_module = LoadModuleIntoBuffer("empty_module.code"); + HIP_CHECK(hipModuleLoadDataEx(&module, loaded_module.data(), 0, nullptr, nullptr)); + REQUIRE(module != nullptr); + HIP_CHECK(hipModuleUnload(module)); + } + + SECTION("Load RTCd module") { + const auto rtc = CreateRTCCharArray(R"(extern "C" __global__ void kernel() {})"); + HIP_CHECK(hipModuleLoadDataEx(&module, rtc.data(), 0, nullptr, nullptr)); + REQUIRE(module != nullptr); + HIP_CHECK(hipModuleUnload(module)); + } +} + +TEST_CASE("Unit_hipModuleLoadDataEx_Negative_Parameters") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module = nullptr; + + SECTION("module == nullptr") { + const auto loaded_module = LoadModuleIntoBuffer("empty_module.code"); + HIP_CHECK_ERROR(hipModuleLoadDataEx(nullptr, loaded_module.data(), 0, nullptr, nullptr), + hipErrorInvalidValue); + LoadModuleIntoBuffer("empty_module.code"); + } + + SECTION("image == nullptr") { + HIP_CHECK_ERROR(hipModuleLoadDataEx(&module, nullptr, 0, nullptr, nullptr), + hipErrorInvalidValue); + } +} + +TEST_CASE("Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String") { + HIP_CHECK(hipFree(nullptr)); + hipModule_t module; + + HIP_CHECK_ERROR(hipModuleLoadDataEx(&module, "", 0, nullptr, nullptr), hipErrorInvalidImage); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleUnload.cc b/projects/hip-tests/catch/unit/module/hipModuleUnload.cc new file mode 100644 index 0000000000..914b66c89f --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleUnload.cc @@ -0,0 +1,38 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +TEST_CASE("Unit_hipModuleUnload_Negative_Module_Is_Nullptr") { + HIP_CHECK(hipFree(nullptr)); + + HIP_CHECK_ERROR(hipModuleUnload(nullptr), hipErrorInvalidResourceHandle); +} + +TEST_CASE("Unit_hipModuleUnload_Negative_Double_Unload") { + HIP_CHECK(hipFree(nullptr)); + + hipModule_t module = nullptr; + HIP_CHECK(hipModuleLoad(&module, "empty_module.code")); + HIP_CHECK(hipModuleUnload(module)); + HIP_CHECK_ERROR(hipModuleUnload(module), hipErrorNotFound); +} diff --git a/projects/hip-tests/catch/unit/module/hip_module_common.cc b/projects/hip-tests/catch/unit/module/hip_module_common.cc new file mode 100644 index 0000000000..4e5e7de581 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hip_module_common.cc @@ -0,0 +1,71 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include + +#include +#include + +ModuleGuard ModuleGuard::LoadModule(const char* fname) { + hipModule_t module = nullptr; + HIP_CHECK(hipModuleLoad(&module, fname)); + return ModuleGuard{module}; +} + +ModuleGuard ModuleGuard::LoadModuleDataFile(const char* fname) { + const auto loaded_module = LoadModuleIntoBuffer(fname); + hipModule_t module = nullptr; + HIP_CHECK(hipModuleLoadData(&module, loaded_module.data())); + return ModuleGuard{module}; +} + +ModuleGuard ModuleGuard::LoadModuleDataRTC(const char* code) { + const auto rtc = CreateRTCCharArray(code); + hipModule_t module = nullptr; + HIP_CHECK(hipModuleLoadData(&module, rtc.data())); + return ModuleGuard{module}; +} + +// Load module into buffer instead of mapping file to avoid platform specific mechanisms +std::vector LoadModuleIntoBuffer(const char* path_string) { + std::experimental::filesystem::path p(path_string); + const auto file_size = std::experimental::filesystem::file_size(p); + std::ifstream f(p, std::ios::binary | std::ios::in); + REQUIRE(f); + std::vector empty_module(file_size); + REQUIRE(f.read(empty_module.data(), file_size)); + return empty_module; +} + +std::vector CreateRTCCharArray(const char* src) { + hiprtcProgram prog; + HIPRTC_CHECK(hiprtcCreateProgram(&prog, src, "prog", 0, nullptr, nullptr)); + HIPRTC_CHECK(hiprtcCompileProgram(prog, 0, nullptr)); + size_t code_size = 0; + HIPRTC_CHECK(hiprtcGetCodeSize(prog, &code_size)); + std::vector code(code_size, '\0'); + HIPRTC_CHECK(hiprtcGetCode(prog, code.data())); + HIPRTC_CHECK(hiprtcDestroyProgram(&prog)); + return code; +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hip_module_common.hh b/projects/hip-tests/catch/unit/module/hip_module_common.hh new file mode 100644 index 0000000000..41b153b3a3 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hip_module_common.hh @@ -0,0 +1,57 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include + +class ModuleGuard { + public: + ~ModuleGuard() { static_cast(hipModuleUnload(module_)); } + + ModuleGuard(const ModuleGuard&) = delete; + ModuleGuard(ModuleGuard&&) = delete; + + static ModuleGuard LoadModule(const char* fname); + + static ModuleGuard LoadModuleDataFile(const char* fname); + + static ModuleGuard LoadModuleDataRTC(const char* code); + + hipModule_t module() const { return module_; } + + private: + ModuleGuard(const hipModule_t module) : module_{module} {} + hipModule_t module_ = nullptr; +}; + +// Load module into buffer instead of mapping file to avoid platform specific mechanisms +std::vector LoadModuleIntoBuffer(const char* path_string); + +std::vector CreateRTCCharArray(const char* src); + +inline hipFunction_t GetKernel(const hipModule_t module, const char* kname) { + hipFunction_t kernel = nullptr; + HIP_CHECK(hipModuleGetFunction(&kernel, module, kname)); + return kernel; +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/not_a_module.txt b/projects/hip-tests/catch/unit/module/not_a_module.txt new file mode 100644 index 0000000000..e69de29bb2 From b1e2ab3345efaac2da3a104bcc37f1e8d6302e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:14:45 +0100 Subject: [PATCH 04/19] EXSWHTEC-112 - Implement tests for hipModuleGetFunction #21 Change-Id: Id0eee32b3f330c8d9156df30d3b0733082a6ba0a [ROCm/hip-tests commit: d143e4c4865109b26be0cd65b0dfbbf52b3bb82d] --- .../catch/unit/module/CMakeLists.txt | 15 +++- .../catch/unit/module/get_function_module.cc | 28 +++++++ .../catch/unit/module/hipModuleGetFunction.cc | 74 +++++++++++++++++++ 3 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 projects/hip-tests/catch/unit/module/get_function_module.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleGetFunction.cc diff --git a/projects/hip-tests/catch/unit/module/CMakeLists.txt b/projects/hip-tests/catch/unit/module/CMakeLists.txt index 262f8ff7dd..5e52e4575b 100644 --- a/projects/hip-tests/catch/unit/module/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/module/CMakeLists.txt @@ -21,12 +21,18 @@ # Common Tests - Test independent of all platforms set(TEST_SRC hip_module_common.cc - hipModuleLoad.cc - hipModuleLoadData.cc - hipModuleLoadDataEx.cc - hipModuleUnload.cc + hipModuleLoad.cc + hipModuleLoadData.cc + hipModuleLoadDataEx.cc + hipModuleUnload.cc + hipModuleGetFunction.cc ) +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code + COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc -o get_function_module.code + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc) +add_custom_target(get_function_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code) + add_custom_target(empty_module.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc @@ -129,6 +135,7 @@ hip_add_exe_to_target(NAME ModuleTest COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) add_dependencies(build_tests empty_module.code) +add_dependencies(ModuleTest get_function_module) if(HIP_PLATFORM MATCHES "amd") add_dependencies(build_tests copyKernel.code copyKernel.s) diff --git a/projects/hip-tests/catch/unit/module/get_function_module.cc b/projects/hip-tests/catch/unit/module/get_function_module.cc new file mode 100644 index 0000000000..2c5a8a5636 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/get_function_module.cc @@ -0,0 +1,28 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +extern "C" { +__global__ void GlobalKernel() {} + +__device__ void DeviceKernel() {} +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleGetFunction.cc b/projects/hip-tests/catch/unit/module/hipModuleGetFunction.cc new file mode 100644 index 0000000000..676a61c33d --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleGetFunction.cc @@ -0,0 +1,74 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include + +static hipModule_t GetModule() { + HIP_CHECK(hipFree(nullptr)); + static const auto mg = ModuleGuard::LoadModule("get_function_module.code"); + return mg.module(); +} + +TEST_CASE("Unit_hipModuleGetFunction_Positive_Basic") { + hipFunction_t kernel = nullptr; + HIP_CHECK(hipModuleGetFunction(&kernel, GetModule(), "GlobalKernel")); + REQUIRE(kernel != nullptr); +} + +TEST_CASE("Unit_hipModuleGetFunction_Negative_Parameters") { + hipFunction_t kernel = nullptr; + + SECTION("function == nullptr") { + HIP_CHECK_ERROR(hipModuleGetFunction(nullptr, GetModule(), "GlobalKernel"), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-154 +#if HT_NVIDIA + SECTION("module == nullptr") { + HIP_CHECK_ERROR(hipModuleGetFunction(&kernel, nullptr, "GlobalKernel"), + hipErrorInvalidResourceHandle); + } +#endif + + SECTION("kname == nullptr") { + HIP_CHECK_ERROR(hipModuleGetFunction(&kernel, GetModule(), nullptr), hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-155 +#if HT_NVIDIA + SECTION("kname == empty string") { + HIP_CHECK_ERROR(hipModuleGetFunction(&kernel, GetModule(), ""), hipErrorInvalidValue); + } +#endif + + SECTION("kname == non existent kernel") { + HIP_CHECK_ERROR(hipModuleGetFunction(&kernel, GetModule(), "NonExistentKernel"), + hipErrorNotFound); + } + + SECTION("kname == __device__ kernel") { + HIP_CHECK_ERROR(hipModuleGetFunction(&kernel, GetModule(), "DeviceKernel"), hipErrorNotFound); + } +} \ No newline at end of file From b2f572d4a70f2651ce0de3eedde08a479023c385 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 28 Dec 2023 13:58:39 +0000 Subject: [PATCH 05/19] EXSWHTEC-111 - Implement tests for the hipModuleLaunchKernel family of APIs #22 Change-Id: I963e17c413eb0976a1073e2f02a7e5eff1db3b42 [ROCm/hip-tests commit: e7016b99cec8e3913f5a1c8a32af6523b59e9f03] --- .../catch/unit/module/CMakeLists.txt | 11 +- .../unit/module/hipExtModuleLaunchKernel.cc | 82 ++++-- .../unit/module/hipModuleLaunchKernel.cc | 49 ++++ .../module/hip_module_launch_kernel_common.hh | 269 ++++++++++++++++++ .../catch/unit/module/launch_kernel_module.cc | 37 +++ 5 files changed, 429 insertions(+), 19 deletions(-) create mode 100644 projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc create mode 100644 projects/hip-tests/catch/unit/module/hip_module_launch_kernel_common.hh create mode 100644 projects/hip-tests/catch/unit/module/launch_kernel_module.cc diff --git a/projects/hip-tests/catch/unit/module/CMakeLists.txt b/projects/hip-tests/catch/unit/module/CMakeLists.txt index 5e52e4575b..27f368f5ed 100644 --- a/projects/hip-tests/catch/unit/module/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/module/CMakeLists.txt @@ -26,13 +26,19 @@ set(TEST_SRC hipModuleLoadDataEx.cc hipModuleUnload.cc hipModuleGetFunction.cc + hipModuleLaunchKernel.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc -o get_function_module.code - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc) + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc) add_custom_target(get_function_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code) +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code + COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc -o launch_kernel_module.code + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc) +add_custom_target(launch_kernel_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code) + add_custom_target(empty_module.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc @@ -136,6 +142,7 @@ hip_add_exe_to_target(NAME ModuleTest add_dependencies(build_tests empty_module.code) add_dependencies(ModuleTest get_function_module) +add_dependencies(ModuleTest launch_kernel_module) if(HIP_PLATFORM MATCHES "amd") add_dependencies(build_tests copyKernel.code copyKernel.s) @@ -143,4 +150,4 @@ if(UNIX) add_dependencies(build_tests copiousArgKernel.code copiousArgKernel0.code copiousArgKernel1.code copiousArgKernel2.code copiousArgKernel3.code copiousArgKernel16.code copiousArgKernel17.code) endif() -endif() \ No newline at end of file +endif() diff --git a/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc b/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc index b8bceb26ee..8c77b796d1 100644 --- a/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/projects/hip-tests/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -50,6 +50,8 @@ THE SOFTWARE. #include "hip/hip_ext.h" #include // NOLINT +#include "hip_module_launch_kernel_common.hh" + static constexpr auto totalWorkGroups{1024}; static constexpr auto localWorkSize{512}; static constexpr auto lastWorkSizeEven{256}; @@ -69,7 +71,7 @@ static bool searchRegExpr(const std::regex& expr, const char* filename) { assemblyfile.seekg(0, assemblyfile.end); int len = assemblyfile.tellg(); assemblyfile.seekg(0, assemblyfile.beg); - char *fbuf = new char[len + 1]; + char* fbuf = new char[len + 1]; assemblyfile.read(fbuf, len); fbuf[len] = '\0'; @@ -124,8 +126,7 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup") { auto isEven = GENERATE(0, 1); // Calculate size auto lastWorkSize = isEven ? lastWorkSizeEven : lastWorkSizeOdd; - size_t arraylength = - (totalWorkGroups - 1)*localWorkSize + lastWorkSize; + size_t arraylength = (totalWorkGroups - 1) * localWorkSize + lastWorkSize; size_t sizeBytes{arraylength * sizeof(int)}; // Get module and function from module hipModule_t Module; @@ -133,9 +134,9 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup") { HIP_CHECK(hipModuleLoad(&Module, fileName)); HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); // Allocate resources - int *A = new int[arraylength]; + int* A = new int[arraylength]; REQUIRE(A != nullptr); - int *B = new int[arraylength]; + int* B = new int[arraylength]; REQUIRE(B != nullptr); // Inititialize data for (size_t i = 0; i < arraylength; i++) { @@ -155,14 +156,13 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup") { args.buffersize = arraylength; size_t size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; // Memcpy from A to Ad HIP_CHECK(hipMemcpy(Ad, A, sizeBytes, hipMemcpyDefault)); - REQUIRE(hipErrorInvalidValue == hipExtModuleLaunchKernel(Function, - arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL, - reinterpret_cast(&config), 0)); + REQUIRE(hipErrorInvalidValue == + hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL, + reinterpret_cast(&config), 0)); HIP_CHECK(hipDeviceSynchronize()); HIP_CHECK(hipFree(Ad)); HIP_CHECK(hipFree(Bd)); @@ -194,9 +194,9 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") { HIP_CHECK(hipModuleLoad(&Module, fileName)); HIP_CHECK(hipModuleGetFunction(&Function, Module, kernel_name)); // Allocate resources - int *A = new int[arraylength]; + int* A = new int[arraylength]; REQUIRE(A != nullptr); - int *B = new int[arraylength]; + int* B = new int[arraylength]; REQUIRE(B != nullptr); // Inititialize data for (size_t i = 0; i < arraylength; i++) { @@ -216,14 +216,12 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") { args.buffersize = arraylength; size_t size = sizeof(args); - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END}; // Memcpy from A to Ad HIP_CHECK(hipMemcpy(Ad, A, sizeBytes, hipMemcpyDefault)); - HIP_CHECK(hipExtModuleLaunchKernel(Function, arraylength, 1, 1, - localWorkSize, 1, 1, 0, 0, NULL, - reinterpret_cast(&config), 0)); + HIP_CHECK(hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL, + reinterpret_cast(&config), 0)); // Memcpy results back to host HIP_CHECK(hipMemcpy(B, Bd, sizeBytes, hipMemcpyDefault)); HIP_CHECK(hipDeviceSynchronize()); @@ -237,3 +235,53 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") { delete[] B; HIP_CHECK(hipModuleUnload(Module)); } + +TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Basic") { + ModuleLaunchKernelPositiveBasic(); + + SECTION("Timed kernel launch with events") { + hipEvent_t start_event = nullptr, stop_event = nullptr; + HIP_CHECK(hipEventCreate(&start_event)); + HIP_CHECK(hipEventCreate(&stop_event)); + const auto kernel = GetKernel(mg.module(), "Delay"); + int clock_rate = 0; + HIP_CHECK(hipDeviceGetAttribute(&clock_rate, hipDeviceAttributeClockRate, 0)); + uint32_t interval = 100; + uint32_t ticks_per_second = clock_rate; + void* kernel_params[2] = {&interval, &ticks_per_second}; + HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, kernel_params, nullptr, + start_event, stop_event)); + HIP_CHECK(hipDeviceSynchronize()); + auto elapsed = 0.0f; + HIP_CHECK(hipEventElapsedTime(&elapsed, start_event, stop_event)); + REQUIRE(static_cast(elapsed) >= interval); + } +} + +TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") { + ModuleLaunchKernelPositiveParameters(); + + SECTION("Pass only start event") { + hipEvent_t start_event = nullptr; + HIP_CHECK(hipEventCreate(&start_event)); + const auto kernel = GetKernel(mg.module(), "NOPKernel"); + HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, + start_event, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipEventQuery(start_event)); + } + + SECTION("Pass only stop event") { + hipEvent_t stop_event = nullptr; + HIP_CHECK(hipEventCreate(&stop_event)); + const auto kernel = GetKernel(mg.module(), "NOPKernel"); + HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, + nullptr, stop_event)); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipEventQuery(stop_event)); + } +} + +TEST_CASE("Unit_hipExtModuleLaunchKernel_Negative_Parameters") { + ModuleLaunchKernelNegativeParameters(); +} diff --git a/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc b/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc new file mode 100644 index 0000000000..f440e8c013 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleLaunchKernel.cc @@ -0,0 +1,49 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_launch_kernel_common.hh" + +#include +#include + +static hipError_t hipModuleLaunchKernelWrapper(hipFunction_t f, uint32_t gridX, uint32_t gridY, + uint32_t gridZ, uint32_t blockX, uint32_t blockY, + uint32_t blockZ, size_t sharedMemBytes, + hipStream_t hStream, void** kernelParams, + void** extra, hipEvent_t, hipEvent_t, uint32_t) { + return hipModuleLaunchKernel(f, gridX, gridY, gridZ, blockX, blockY, blockZ, sharedMemBytes, + hStream, kernelParams, extra); +} + +TEST_CASE("Unit_hipModuleLaunchKernel_Positive_Basic") { + HIP_CHECK(hipFree(nullptr)); + ModuleLaunchKernelPositiveBasic(); +} + +TEST_CASE("Unit_hipModuleLaunchKernel_Positive_Parameters") { + HIP_CHECK(hipFree(nullptr)); + ModuleLaunchKernelPositiveParameters(); +} + +TEST_CASE("Unit_hipModuleLaunchKernel_Negative_Parameters") { + HIP_CHECK(hipFree(nullptr)); + ModuleLaunchKernelNegativeParameters(); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hip_module_launch_kernel_common.hh b/projects/hip-tests/catch/unit/module/hip_module_launch_kernel_common.hh new file mode 100644 index 0000000000..91ce3a9fd2 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hip_module_launch_kernel_common.hh @@ -0,0 +1,269 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "hip_module_common.hh" + +#include +#include +#include + +inline int GetDeviceAttribute(const int device, + const hipDeviceAttribute_t attr) { + int value = 0; + HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); + return value; +} + +inline ModuleGuard InitModule() { + HIP_CHECK(hipFree(nullptr)); + return ModuleGuard::LoadModule("launch_kernel_module.code"); +} + +inline ModuleGuard mg{InitModule()}; + +using ExtModuleLaunchKernelSig = hipError_t(hipFunction_t, uint32_t, uint32_t, uint32_t, uint32_t, + uint32_t, uint32_t, size_t, hipStream_t, void**, void**, + hipEvent_t, hipEvent_t, uint32_t); + +template void ModuleLaunchKernelPositiveBasic() { + SECTION("Kernel with no arguments") { + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + HIP_CHECK(func(f, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + hipFunction_t f = GetKernel(mg.module(), "Kernel42"); + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(func(f, 1, 1, 1, 1, 1, 1, 0, nullptr, kernel_args, nullptr, nullptr, nullptr, 0u)); + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } + + SECTION("Kernel with arguments using extra") { + hipFunction_t f = GetKernel(mg.module(), "Kernel42"); + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + int* result_ptr = result_dev.ptr(); + size_t size = sizeof(result_ptr); + // clang-format off + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &result_ptr, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + // clang-format on + HIP_CHECK(func(f, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, extra, nullptr, nullptr, 0u)); + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +template void ModuleLaunchKernelPositiveParameters() { + const auto LaunchNOPKernel = [=](unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, + unsigned int blockDimY, unsigned int blockDimZ) { + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + HIP_CHECK(func(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, nullptr, + nullptr, nullptr, nullptr, nullptr, 0u)); + HIP_CHECK(hipDeviceSynchronize()); + }; + + SECTION("gridDimX == maxGridDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimX); + LaunchNOPKernel(x, 1, 1, 1, 1, 1); + } + + SECTION("gridDimY == maxGridDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimY); + LaunchNOPKernel(1, y, 1, 1, 1, 1); + } + + SECTION("gridDimZ == maxGridDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimZ); + LaunchNOPKernel(1, 1, z, 1, 1, 1); + } + + SECTION("blockDimX == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + LaunchNOPKernel(1, 1, 1, x, 1, 1); + } + + SECTION("blockDimY == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + LaunchNOPKernel(1, 1, 1, 1, y, 1); + } + + SECTION("blockDimZ == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + LaunchNOPKernel(1, 1, 1, 1, 1, z); + } +} + +template void ModuleLaunchKernelNegativeParameters() { + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + +// Disabled on AMD due to defect - EXSWHTEC-157 +#if HT_NVIDIA + SECTION("f == nullptr") { + HIP_CHECK_ERROR( + func(nullptr, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidResourceHandle); + } +#endif + + SECTION("gridDimX == 0") { + HIP_CHECK_ERROR(func(f, 0, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("gridDimY == 0") { + HIP_CHECK_ERROR(func(f, 1, 0, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("gridDimZ == 0") { + HIP_CHECK_ERROR(func(f, 1, 1, 0, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDimX == 0") { + HIP_CHECK_ERROR(func(f, 1, 1, 1, 0, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDimY == 0") { + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 0, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDimZ == 0") { + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, 0, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-158 +#if HT_NVIDIA + SECTION("gridDimX > maxGridDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimX) + 1u; + HIP_CHECK_ERROR(func(f, x, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("gridDimY > maxGridDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimY) + 1u; + HIP_CHECK_ERROR(func(f, 1, y, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("gridDimZ > maxGridDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxGridDimZ) + 1u; + HIP_CHECK_ERROR(func(f, 1, 1, z, 1, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } +#endif + +// Disabled on AMD due to defect - EXSWHTEC-156 +#if HT_NVIDIA + SECTION("blockDimX > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + HIP_CHECK_ERROR(func(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDimY > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, y, 1, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDimZ > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, z, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } +#endif + +// Disabled on AMD due to defect - EXSWHTEC-162 +#if HT_NVIDIA + SECTION("blockDimX * blockDimY * blockDimZ > MaxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int dim = std::ceil(std::cbrt(max)) + 1; + HIP_CHECK_ERROR( + func(f, 1, 1, 1, dim, dim, dim, 0, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } +#endif + +// Disabled on AMD due to defect - EXSWHTEC-159 +#if HT_NVIDIA + SECTION("sharedMemBytes > max shared memory per block") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, 1, max, nullptr, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } +#endif + +// Disabled on AMD due to defect - EXSWHTEC-160 +#if HT_NVIDIA + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, 0, 0, stream, nullptr, nullptr, nullptr, nullptr, 0u), + hipErrorContextIsDestroyed); + } +#endif + + SECTION("Passing kernel_args and extra simultaneously") { + hipFunction_t f = GetKernel(mg.module(), "Kernel42"); + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + int* result_ptr = result_dev.ptr(); + size_t size = sizeof(result_ptr); + void* kernel_args[1] = {&result_ptr}; + // clang-format off + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &result_ptr, + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + // clang-format on + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, 1, 0, nullptr, kernel_args, extra, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-161 +#if HT_NVIDIA + SECTION("Invalid extra") { + hipFunction_t f = GetKernel(mg.module(), "Kernel42"); + void* extra[0] = {}; + HIP_CHECK_ERROR(func(f, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, extra, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/launch_kernel_module.cc b/projects/hip-tests/catch/unit/module/launch_kernel_module.cc new file mode 100644 index 0000000000..01c04b45d6 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/launch_kernel_module.cc @@ -0,0 +1,37 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +extern "C" { +__global__ void NOPKernel() {} + +__global__ void Kernel42(int* out) { *out = 42; } + +// Interval is in millisecond +__global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { + while (interval--) { + uint64_t start = clock(); + while (clock() - start < ticks_per_ms) { + } + } +} +} \ No newline at end of file From de6db484605023b046fc3f80cf2e377f60bb926f Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 28 Dec 2023 21:35:27 +0000 Subject: [PATCH 06/19] EXSWHTEC-80 - Implement tests for hipModuleGetGlobal #23 Change-Id: I873ef5c977ec252425b81e3ed7ea33c861277efb [ROCm/hip-tests commit: ce7e67ac39f2b84b997471bbc693b391a085dd41] --- .../catch/hipTestMain/config/config_amd_linux | 6 + .../hipTestMain/config/config_amd_windows | 6 + .../catch/unit/module/CMakeLists.txt | 8 + .../unit/module/get_global_test_module.cc | 42 +++++ .../catch/unit/module/hipModuleGetGlobal.cc | 145 ++++++++++++++++++ .../catch/unit/module/hipModuleGetGlobal.hh | 28 ++++ 6 files changed, 235 insertions(+) create mode 100644 projects/hip-tests/catch/unit/module/get_global_test_module.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleGetGlobal.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleGetGlobal.hh diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index ddb82cfb77..5964a7aad9 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -262,6 +262,12 @@ "Note: Following two tests disabled due to defect - EXSWHTEC-153", "Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String", "Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String", + "Note: Test disabled due to defect - EXSWHTEC-163", + "Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr", + "Note: Test disabled due to defect - EXSWHTEC-164", + "Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String", + "Note: Test disabled due to defect - EXSWHTEC-165", + "Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr", #endif #if defined VEGA20 "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 25436e508f..c26e5cdd67 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -363,6 +363,12 @@ "Note: Following two tests disabled due to defect - EXSWHTEC-153", "Unit_hipModuleLoadData_Negative_Image_Is_An_Empty_String", "Unit_hipModuleLoadDataEx_Negative_Image_Is_An_Empty_String", + "Note: Test disabled due to defect - EXSWHTEC-163", + "Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr", + "Note: Test disabled due to defect - EXSWHTEC-164", + "Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String", + "Note: Test disabled due to defect - EXSWHTEC-165", + "Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr", #endif "End of json" ] diff --git a/projects/hip-tests/catch/unit/module/CMakeLists.txt b/projects/hip-tests/catch/unit/module/CMakeLists.txt index 27f368f5ed..3d2611753c 100644 --- a/projects/hip-tests/catch/unit/module/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/module/CMakeLists.txt @@ -27,6 +27,7 @@ set(TEST_SRC hipModuleUnload.cc hipModuleGetFunction.cc hipModuleLaunchKernel.cc + hipModuleGetGlobal.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code @@ -39,6 +40,12 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/launch_kernel_module.cc) add_custom_target(launch_kernel_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/launch_kernel_module.code) +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code + COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc -o get_global_test_module.code + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_global_test_module.cc) +add_custom_target(get_global_test_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code) + + add_custom_target(empty_module.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc @@ -143,6 +150,7 @@ hip_add_exe_to_target(NAME ModuleTest add_dependencies(build_tests empty_module.code) add_dependencies(ModuleTest get_function_module) add_dependencies(ModuleTest launch_kernel_module) +add_dependencies(ModuleTest get_global_test_module) if(HIP_PLATFORM MATCHES "amd") add_dependencies(build_tests copyKernel.code copyKernel.s) diff --git a/projects/hip-tests/catch/unit/module/get_global_test_module.cc b/projects/hip-tests/catch/unit/module/get_global_test_module.cc new file mode 100644 index 0000000000..98e58c7c54 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/get_global_test_module.cc @@ -0,0 +1,42 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" + +#include "hipModuleGetGlobal.hh" + +#define HIP_MODULE_GET_GLOBAL_TEST_DEFINE_DEVICE_GLOBALS(type) \ + __device__ type type##_var = 0; \ + __device__ type type##_arr[kArraySize] = {}; \ + extern "C" { \ + __global__ void type##_var_address_validation_kernel(void* ptr, bool* out) { \ + *out = static_cast(&type##_var) == ptr; \ + } \ + __global__ void type##_arr_address_validation_kernel(void* ptr, bool* out) { \ + *out = static_cast(type##_arr) == ptr; \ + } \ + } + +HIP_MODULE_GET_GLOBAL_TEST_DEFINE_DEVICE_GLOBALS(int) +HIP_MODULE_GET_GLOBAL_TEST_DEFINE_DEVICE_GLOBALS(float) +HIP_MODULE_GET_GLOBAL_TEST_DEFINE_DEVICE_GLOBALS(char) +HIP_MODULE_GET_GLOBAL_TEST_DEFINE_DEVICE_GLOBALS(double) \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleGetGlobal.cc b/projects/hip-tests/catch/unit/module/hipModuleGetGlobal.cc new file mode 100644 index 0000000000..32f46f0caf --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleGetGlobal.cc @@ -0,0 +1,145 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "hip_module_common.hh" +#include "hipModuleGetGlobal.hh" + +template +static void HipModuleGetGlobalTest(hipModule_t module, const std::string global_name) { + constexpr auto size = N * sizeof(T); + + hipDeviceptr_t global; + size_t global_size = 0; + HIP_CHECK(hipModuleGetGlobal(&global, &global_size, module, global_name.c_str())); + REQUIRE(global != 0); + REQUIRE(size == global_size); + + hipFunction_t kernel = nullptr; + const auto kernel_name = global_name + "_address_validation_kernel"; + HIP_CHECK(hipModuleGetFunction(&kernel, module, kernel_name.c_str())); + LinearAllocGuard equal_addresses(LinearAllocs::hipMalloc, sizeof(bool)); + HIP_CHECK(hipMemset(equal_addresses.ptr(), false, sizeof(*equal_addresses.ptr()))); + bool* equal_addresses_ptr = equal_addresses.ptr(); + void* kernel_args[2] = {&global, &equal_addresses_ptr}; + HIP_CHECK(hipModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, kernel_args, nullptr)); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + bool ok; + HIP_CHECK(hipMemcpy(&ok, equal_addresses_ptr, sizeof(ok), hipMemcpyDeviceToHost)); + REQUIRE(ok); + + constexpr T expected_value = 42; + std::array fill_buffer; + std::fill_n(fill_buffer.begin(), N, expected_value); + HIP_CHECK(hipMemcpyHtoD(global, fill_buffer.data(), size)); + + + std::array read_buffer; + HIP_CHECK(hipMemcpyDtoH(read_buffer.data(), global, size)); + ArrayFindIfNot(read_buffer.data(), expected_value, read_buffer.size()); +} + +#define HIP_MODULE_GET_GLOBAL_S(expr) #expr +#define HIP_MODULE_GET_GLOBAL_TEST(type, module) \ + SECTION("array") { \ + HipModuleGetGlobalTest(module, HIP_MODULE_GET_GLOBAL_S(type##_arr)); \ + } \ + SECTION("scalar") { \ + HipModuleGetGlobalTest(module, HIP_MODULE_GET_GLOBAL_S(type##_var)); \ + } + +static inline hipModule_t GetModule() { + HIP_CHECK(hipFree(nullptr)); + const static auto mg = ModuleGuard::LoadModule("get_global_test_module.code"); + return mg.module(); +} + +TEST_CASE("Unit_hipModuleGetGlobal_Positive_Basic") { + hipModule_t module = GetModule(); + + SECTION("int") { HIP_MODULE_GET_GLOBAL_TEST(int, module); } + + SECTION("float") { HIP_MODULE_GET_GLOBAL_TEST(float, module); } + + SECTION("char") { HIP_MODULE_GET_GLOBAL_TEST(char, module); } + + SECTION("double") { HIP_MODULE_GET_GLOBAL_TEST(double, module); } +} + +TEST_CASE("Unit_hipModuleGetGlobal_Positive_Parameters") { + hipModule_t module = GetModule(); + hipDeviceptr_t global = 0; + size_t global_size = 0; + + SECTION("dptr == nullptr") { + HIP_CHECK(hipModuleGetGlobal(nullptr, &global_size, module, "int_var")); + } + + SECTION("bytes == nullptr") { + HIP_CHECK(hipModuleGetGlobal(&global, nullptr, module, "int_var")); + } +} + +TEST_CASE("Unit_hipModuleGetGlobal_Negative_Parameters") { + hipModule_t module = GetModule(); + hipDeviceptr_t global = 0; + size_t global_size = 0; + + SECTION("name == nullptr") { + HIP_CHECK_ERROR(hipModuleGetGlobal(&global, &global_size, module, nullptr), + hipErrorInvalidValue); + } + + SECTION("name == invalid name") { + HIP_CHECK_ERROR(hipModuleGetGlobal(&global, &global_size, module, "dummy"), hipErrorNotFound); + } +} + +TEST_CASE("Unit_hipModuleGetGlobal_Negative_Hmod_Is_Nullptr") { + hipDeviceptr_t global = 0; + size_t global_size = 0; + + HIP_CHECK_ERROR(hipModuleGetGlobal(&global, &global_size, nullptr, "int_var"), + hipErrorInvalidResourceHandle); +} + +TEST_CASE("Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String") { + hipModule_t module = GetModule(); + hipDeviceptr_t global = 0; + size_t global_size = 0; + + HIP_CHECK_ERROR(hipModuleGetGlobal(&global, &global_size, module, ""), hipErrorInvalidValue); +} + +TEST_CASE("Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr") { + hipModule_t module = GetModule(); + HIP_CHECK_ERROR(hipModuleGetGlobal(nullptr, nullptr, module, "int_var"), hipErrorInvalidValue); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleGetGlobal.hh b/projects/hip-tests/catch/unit/module/hipModuleGetGlobal.hh new file mode 100644 index 0000000000..8bd773f032 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleGetGlobal.hh @@ -0,0 +1,28 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +namespace { +constexpr size_t kArraySize = 5; +} // anonymous namespace \ No newline at end of file From d30d6a5a6236c19b03db95d8ad793cd17ca3984f Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 28 Dec 2023 21:44:41 +0000 Subject: [PATCH 07/19] EXSWHTEC-113 - Implement tests for hipModuleGetTexRef #24 Change-Id: I5fe18f2ca977a740e29dda356b68ea4eb6e0565f [ROCm/hip-tests commit: 72aa591328f6eb38cebca297862b9a96339a6d28] --- .../catch/hipTestMain/config/config_amd_linux | 4 ++ .../hipTestMain/config/config_amd_windows | 4 ++ .../catch/unit/module/CMakeLists.txt | 8 +++ .../catch/unit/module/get_tex_ref_module.cc | 24 +++++++ .../catch/unit/module/hipModuleGetTexRef.cc | 67 +++++++++++++++++++ 5 files changed, 107 insertions(+) create mode 100644 projects/hip-tests/catch/unit/module/get_tex_ref_module.cc create mode 100644 projects/hip-tests/catch/unit/module/hipModuleGetTexRef.cc diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index 5964a7aad9..d24f21a772 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -268,6 +268,10 @@ "Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String", "Note: Test disabled due to defect - EXSWHTEC-165", "Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr", + "Note: Test disabled due to defect - EXSWHTEC-166", + "Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr", + "Note: Test disabled due to defect - EXSWHTEC-167", + "Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String", #endif #if defined VEGA20 "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index c26e5cdd67..9a780a8730 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -369,6 +369,10 @@ "Unit_hipModuleGetGlobal_Negative_Name_Is_Empty_String", "Note: Test disabled due to defect - EXSWHTEC-165", "Unit_hipModuleGetGlobal_Negative_Dptr_And_Bytes_Are_Nullptr", + "Note: Test disabled due to defect - EXSWHTEC-166", + "Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr", + "Note: Test disabled due to defect - EXSWHTEC-167", + "Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String", #endif "End of json" ] diff --git a/projects/hip-tests/catch/unit/module/CMakeLists.txt b/projects/hip-tests/catch/unit/module/CMakeLists.txt index 3d2611753c..f8ab77055c 100644 --- a/projects/hip-tests/catch/unit/module/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/module/CMakeLists.txt @@ -28,6 +28,7 @@ set(TEST_SRC hipModuleGetFunction.cc hipModuleLaunchKernel.cc hipModuleGetGlobal.cc + hipModuleGetTexRef.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code @@ -46,6 +47,12 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.cod add_custom_target(get_global_test_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_global_test_module.code) +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code + COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc -o get_tex_ref_module.code + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_tex_ref_module.cc) +add_custom_target(get_tex_ref_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get_tex_ref_module.code) + + add_custom_target(empty_module.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/empty_module.cc @@ -151,6 +158,7 @@ add_dependencies(build_tests empty_module.code) add_dependencies(ModuleTest get_function_module) add_dependencies(ModuleTest launch_kernel_module) add_dependencies(ModuleTest get_global_test_module) +add_dependencies(ModuleTest get_tex_ref_module) if(HIP_PLATFORM MATCHES "amd") add_dependencies(build_tests copyKernel.code copyKernel.s) diff --git a/projects/hip-tests/catch/unit/module/get_tex_ref_module.cc b/projects/hip-tests/catch/unit/module/get_tex_ref_module.cc new file mode 100644 index 0000000000..ffe2213ea0 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/get_tex_ref_module.cc @@ -0,0 +1,24 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +texture tex; \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/module/hipModuleGetTexRef.cc b/projects/hip-tests/catch/unit/module/hipModuleGetTexRef.cc new file mode 100644 index 0000000000..1b3c8a50b4 --- /dev/null +++ b/projects/hip-tests/catch/unit/module/hipModuleGetTexRef.cc @@ -0,0 +1,67 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include + +static hipModule_t GetModule() { + HIP_CHECK(hipFree(nullptr)); + static const auto mg = ModuleGuard::LoadModule("get_tex_ref_module.code"); + return mg.module(); +} + +TEST_CASE("Unit_hipModuleGetTexRef_Positive_Basic") { + hipTexRef tex_ref = nullptr; + HIP_CHECK(hipModuleGetTexRef(&tex_ref, GetModule(), "tex")); + REQUIRE(tex_ref != nullptr); +} + +TEST_CASE("Unit_hipModuleGetTexRef_Negative_Parameters") { + hipModule_t module = GetModule(); + hipTexRef tex_ref = nullptr; + + SECTION("texRef == nullptr") { + HIP_CHECK_ERROR(hipModuleGetTexRef(nullptr, module, "tex"), hipErrorInvalidValue); + } + + SECTION("name == nullptr") { + HIP_CHECK_ERROR(hipModuleGetTexRef(&tex_ref, module, nullptr), hipErrorInvalidValue); + } + + SECTION("name == non existent texture") { + HIP_CHECK_ERROR(hipModuleGetTexRef(&tex_ref, module, "non_existent_texture"), hipErrorNotFound); + } +} + +TEST_CASE("Unit_hipModuleGetTexRef_Negative_Hmod_Is_Nullptr") { + hipTexRef tex_ref = nullptr; + + HIP_CHECK_ERROR(hipModuleGetTexRef(&tex_ref, nullptr, "tex"), hipErrorInvalidResourceHandle); +} + +TEST_CASE("Unit_hipModuleGetTexRef_Negative_Name_Is_Empty_String") { + hipModule_t module = GetModule(); + hipTexRef tex_ref = nullptr; + + HIP_CHECK_ERROR(hipModuleGetTexRef(&tex_ref, module, ""), hipErrorInvalidValue); +} \ No newline at end of file From 836505f7b300aee6d892900c6a4276f1d7880c56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:33:15 +0100 Subject: [PATCH 08/19] EXSWHTEC-97 - Implement tests for hipMemcpy2D APIs #53 Change-Id: I674741e641b1ebd6adb5c60c05c2d8ade27510c6 [ROCm/hip-tests commit: c9e664b8e646404731b40cfc2e4d38f07a62e3aa] --- .../catch/unit/memory/CMakeLists.txt | 2 + .../catch/unit/memory/hipMemcpy2D.cc | 569 +++----------- .../catch/unit/memory/hipMemcpy2DAsync.cc | 691 ++++-------------- .../catch/unit/memory/hipMemcpy2DAsync_old.cc | 555 ++++++++++++++ .../catch/unit/memory/hipMemcpy2D_old.cc | 496 +++++++++++++ .../unit/memory/memcpy2d_tests_common.hh | 325 ++++++++ 6 files changed, 1652 insertions(+), 986 deletions(-) create mode 100644 projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync_old.cc create mode 100644 projects/hip-tests/catch/unit/memory/hipMemcpy2D_old.cc create mode 100644 projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh diff --git a/projects/hip-tests/catch/unit/memory/CMakeLists.txt b/projects/hip-tests/catch/unit/memory/CMakeLists.txt index 09e7277aac..2b4e0e6dd9 100644 --- a/projects/hip-tests/catch/unit/memory/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/memory/CMakeLists.txt @@ -40,7 +40,9 @@ set(TEST_SRC hipMemcpyParam2D.cc hipMemcpyParam2DAsync.cc hipMemcpy2D.cc + hipMemcpy2D_old.cc hipMemcpy2DAsync.cc + hipMemcpy2DAsync_old.cc hipMemcpy2DFromArray.cc hipMemcpy2DFromArray_old.cc hipMemcpy2DFromArrayAsync.cc diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2D.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2D.cc index d7e21e42f2..2ae89fc5cb 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpy2D.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2D.cc @@ -1,496 +1,151 @@ /* -Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * @addtogroup hipMemcpy2D hipMemcpy2D - * @{ - * @ingroup MemcpyTest - * `hipMemcpy2D(void* dst, size_t dpitch, const void* src, - * size_t spitch, size_t width, size_t height, - * hipMemcpyKind kind)` - - * Copies data between host and device. - */ - -// Testcase Description: -// 1) Verifies the working of Memcpy2D API negative scenarios by -// Pass NULL to destination pointer -// Pass NULL to Source pointer -// Pass width greater than spitch/dpitch -// 2) Verifies hipMemcpy2D API by -// pass 0 to destionation pitch -// pass 0 to source pitch -// pass 0 to width -// pass 0 to height -// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by -// performing D2H, D2D and H2D memory kind copies on same GPU -// 4) Verifies working of Memcpy2D API for the following scenarios -// H2D-D2D-D2H on host and device memory -// H2D-D2D-D2H on pinned host and device memory -// H2D-D2D-D2H functionalities where memory is allocated in GPU-0 -// and API is triggered from GPU-1 +#include "memcpy2d_tests_common.hh" #include -#include +#include +#include +#include -static constexpr auto NUM_W{16}; -static constexpr auto NUM_H{16}; -static constexpr auto COLUMNS{8}; -static constexpr auto ROWS{8}; +TEST_CASE("Unit_hipMemcpy2D_Positive_Basic") { + constexpr bool async = false; -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2D API on same GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + SECTION("Device to Host") { Memcpy2DDeviceToHostShell(hipMemcpy2D); } - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "B_d" using D2D copy - "B_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - // 1 refers to pinned host memory - auto mem_type = GENERATE(0, 1); - HIP_CHECK(hipSetDevice(0)); - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(TestType)}; - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + SECTION("Device to Device") { + SECTION("Peer access disabled") { Memcpy2DDeviceToDeviceShell(hipMemcpy2D); } + SECTION("Peer access enabled") { Memcpy2DDeviceToDeviceShell(hipMemcpy2D); } } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + SECTION("Host to Device") { Memcpy2DHostToDeviceShell(hipMemcpy2D); } - // Host to Device - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice)); - - // hipMemcpy2D Device to Host - HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } + SECTION("Host to Host") { Memcpy2DHostToHostShell(hipMemcpy2D); } } -/** - * Test Description - * ------------------------ - * - This testcase performs the following scenarios of hipMemcpy2D API on same GPU. - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - The src and dst input pointers to hipMemCpy2D add an offset to the pointers - returned by the allocation functions. +TEST_CASE("Unit_hipMemcpy2D_Positive_Synchronization_Behavior") { + HIP_CHECK(hipDeviceSynchronize()); - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "B_d" using D2D copy - "B_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ + SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(hipMemcpy2D, true); } -TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - // 1 refers to pinned host memory - auto mem_type = GENERATE(0, 1); - HIP_CHECK(hipSetDevice(0)); - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(TestType)}; - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + SECTION("Device to Host") { + Memcpy2DDtoHPageableSyncBehavior(hipMemcpy2D, true); + Memcpy2DDtoHPinnedSyncBehavior(hipMemcpy2D, true); } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Host to Device - HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h, - COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType), - ROWS, hipMemcpyHostToDevice)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B, - A_d+COLUMNS*sizeof(TestType), - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice)); - - // hipMemcpy2D Device to Host - HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), - B_d+COLUMNS*sizeof(TestType), pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost)); - - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); + SECTION("Device to Device") { +#if HT_NVIDIA + Memcpy2DDtoDSyncBehavior(hipMemcpy2D, false); +#else + Memcpy2DDtoDSyncBehavior(hipMemcpy2D, true); +#endif } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232 + SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(hipMemcpy2D, true); } +#endif } -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - 3. Device context change where memory is allocated in GPU-0 - and API is trigerred from GPU-1 +TEST_CASE("Unit_hipMemcpy2D_Positive_Parameters") { + constexpr bool async = false; + Memcpy2DZeroWidthHeight(hipMemcpy2D); +} - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "X_d" using D2D copy - "X_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ +TEST_CASE("Unit_hipMemcpy2D_Negative_Parameters") { + constexpr size_t cols = 128; + constexpr size_t rows = 128; -TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - auto mem_type = GENERATE(0, 1); - int numDevices = 0; - int canAccessPeer = 0; - TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer) { - HIP_CHECK(hipSetDevice(0)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - char *X_d{nullptr}; - size_t pitch_X; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - - // Change device - HIP_CHECK(hipSetDevice(1)); - - // Host to Device - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice)); - - // Device to Device - HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice)); - - // Device to Host - HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d, - pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipFree(X_d)); - } else { - SUCCEED("Machine does not seem to have P2P"); + constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + SECTION("dst == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2D(nullptr, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); } - } else { - SUCCEED("skipped the testcase as no of devices is less than 2"); - } -} -/** - * Test Description - * ------------------------ - * - This Testcase verifies the null size checks of hipMemcpy2D API - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2D_SizeCheck") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2D API where Source Pitch is zero") { - REQUIRE(hipMemcpy2D(A_h, 0, A_d, - pitch_A, NUM_W, NUM_H, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API where Destination Pitch is zero") { - REQUIRE(hipMemcpy2D(A_h, width, A_d, - 0, NUM_W, NUM_H, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API where height is zero") { - REQUIRE(hipMemcpy2D(A_h, width, A_d, - pitch_A, NUM_W, 0, - hipMemcpyDeviceToHost) == hipSuccess); - } - - SECTION("hipMemcpy2D API where width is zero") { - REQUIRE(hipMemcpy2D(A_h, width, A_d, - pitch_A, 0, NUM_H, - hipMemcpyDeviceToHost) == hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - free(A_h); -} - -/** - * Test Description - * ------------------------ - * - This Testcase verifies all the negative scenarios of hipMemcpy2D API - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2D_Negative") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2D API by Passing nullptr to destination") { - REQUIRE(hipMemcpy2D(nullptr, width, A_d, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API by Passing nullptr to destination") { - REQUIRE(hipMemcpy2D(nullptr, width, nullptr, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost) != hipSuccess); - } - - SECTION("hipMemcpy2D API where width is greater than destination pitch") { - REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A, - COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost) != hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - free(A_h); -} - -static void hipMemcpy2D_Basic_Size_Test(size_t inc) { - constexpr int defaultProgramSize = 256 * 1024 * 1024; - constexpr int N = 2; - constexpr int value = 42; - int *in, *out, *dev; - size_t newSize = 0, inp = 0; - size_t size = sizeof(int) * N * inc; - - size_t free, total; - HIP_CHECK(hipMemGetInfo(&free, &total)); - - if ( free < 2 * size ) - newSize = ( free - defaultProgramSize ) / 2; - else - newSize = size; - - INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); - INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); - INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); - - HIP_CHECK(hipHostMalloc(&in, newSize)); - HIP_CHECK(hipHostMalloc(&out, newSize)); - HIP_CHECK(hipMalloc(&dev, newSize)); - - inp = newSize / (sizeof(int) * N); - for (size_t i=0; i < N; i++) { - in[i * inp] = value; - } - - size_t pitch = sizeof(int) * inp; - - HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int), - N, hipMemcpyHostToDevice)); - HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int), - N, hipMemcpyDeviceToHost)); - - for (size_t i=0; i < N; i++) { - REQUIRE(out[i * inp] == value); - } - - HIP_CHECK(hipFree(dev)); - HIP_CHECK(hipHostFree(in)); - HIP_CHECK(hipHostFree(out)); -} - -/** - * Test Description - * ------------------------ - * - This testcase performs multidevice size check on hipMemcpy2D API - 1. Verify hipMemcpy2D with 1 << 20 size - 2. Verify hipMemcpy2D with 1 << 21 size - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2D.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") { - CHECK_IMAGE_SUPPORT - size_t input = 1 << 20; - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - - for (int i=0; i < numDevices; i++) { - HIP_CHECK(hipSetDevice(i)); - - SECTION("Verify hipMemcpy2D with 1 << 20 size") { - hipMemcpy2D_Basic_Size_Test(input); + SECTION("src == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, nullptr, spitch, width, height, kind), + hipErrorInvalidValue); } - SECTION("Verify hipMemcpy2D with 1 << 21 size") { - input <<= 1; - hipMemcpy2D_Basic_Size_Test(input); + + SECTION("dpitch < width") { + HIP_CHECK_ERROR(hipMemcpy2D(dst, width - 1, src, spitch, width, height, kind), + hipErrorInvalidPitchValue); } + + SECTION("spitch < width") { + HIP_CHECK_ERROR(hipMemcpy2D(dst, dpitch, src, width - 1, width, height, kind), + hipErrorInvalidPitchValue); + } + + SECTION("dpitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR( + hipMemcpy2D(dst, static_cast(attr) + 1, src, spitch, width, height, kind), + hipErrorInvalidValue); + } + + SECTION("spitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR( + hipMemcpy2D(dst, dpitch, src, static_cast(attr) + 1, width, height, kind), + hipErrorInvalidValue); + } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234 + SECTION("Invalid MemcpyKind") { + HIP_CHECK_ERROR( + hipMemcpy2D(dst, dpitch, src, spitch, width, height, static_cast(-1)), + hipErrorInvalidMemcpyDirection); + } +#endif + }; + + SECTION("Host to Device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice); + } + + SECTION("Device to Host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost); + } + + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int), + cols * sizeof(int), rows, hipMemcpyHostToHost); + } + + SECTION("Device to Device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); } } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc index 1ca39bd6c9..4639993b9c 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync.cc @@ -1,555 +1,188 @@ /* -Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** - * @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync - * @{ - * @ingroup MemcpyTest - * `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, - * size_t spitch, size_t width, size_t height, - * hipMemcpyKind kind, hipStream_t stream = 0 )` - - * Copies data between host and device. - */ - -// Testcase Description: -// 1) Verifies the working of Memcpy2DAsync API negative scenarios by -// Pass NULL to destination pointer -// Pass NULL to Source pointer -// Pass width greater than spitch/dpitch -// 2) Verifies hipMemcpy2DAsync API by -// pass 0 to destionation pitch -// pass 0 to source pitch -// pass 0 to width -// pass 0 to height -// 3) Verifies working of Memcpy2DAsync API on host memory -// and pinned host memory by -// performing D2H, D2D and H2D memory kind copies on same GPU -// 4) Verifies working of Memcpy2DAsync API on host memory -// and pinned host memory by -// performing D2H, D2D and H2D memory kind copies on peer GPU -// 5) Verifies working of Memcpy2DAsync API where memory is allocated -// in GPU-0 and stream is created on GPU-1 +#include "memcpy2d_tests_common.hh" #include -#include +#include +#include +#include -static constexpr auto NUM_W{16}; -static constexpr auto NUM_H{16}; -static constexpr auto COLUMNS{6}; -static constexpr auto ROWS{6}; +TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Basic") { + using namespace std::placeholders; -/** - * Test Description - * ------------------------ - * - This performs the following scenarios of hipMemcpy2DAsync API on same GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + constexpr bool async = true; - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "B_d" using D2D copy - "B_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ + const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created); + const StreamGuard stream_guard(stream_type); + const hipStream_t stream = stream_guard.stream(); -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - // 1 refers to pinned host memory - auto mem_type = GENERATE(0, 1); - HIP_CHECK(hipSetDevice(0)); - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, - *B_d{nullptr}; - size_t pitch_A, pitch_B; - size_t width{NUM_W * sizeof(TestType)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), - &pitch_B, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - SECTION("Calling Async apis with stream object created by user") { - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice, stream)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); - - // hipMemcpy2DAsync Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - } - SECTION("Calling Async apis with hipStreamPerThread") { - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyHostToDevice, hipStreamPerThread)); - - // Performs D2D on same GPU device - HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToDevice, hipStreamPerThread)); - - // hipMemcpy2DAsync Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, - COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, hipStreamPerThread)); - HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + SECTION("Device to Host") { + Memcpy2DDeviceToHostShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); } - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipFree(B_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipStreamDestroy(stream)); -} - -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "X_d" using D2D copy - "X_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - auto mem_type = GENERATE(0, 1); - int numDevices = 0; - int canAccessPeer = 0; - TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - hipStream_t stream; - - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer) { - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); - - // Change device - HIP_CHECK(hipSetDevice(1)); - - char *X_d{nullptr}; - size_t pitch_X; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - - // Device to Device - HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); - - // Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, - pitch_X, COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipFree(X_d)); - HIP_CHECK(hipStreamDestroy(stream)); - } else { - SUCCEED("Machine does not seem to have P2P"); + SECTION("Device to Device") { + SECTION("Peer access disabled") { + Memcpy2DDeviceToDeviceShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); } - } else { - SUCCEED("skipped the testcase as no of devices is less than 2"); - } -} - -/** - * Test Description - * ------------------------ - * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU - 1. H2D-D2D-D2H for Host Memory<-->Device Memory - 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory - Memory is allocated in GPU-0 and Stream is created in GPU-1 - - Input : "A_h" initialized based on data type - "A_h" --> "A_d" using H2D copy - "A_d" --> "X_d" using D2D copy - "X_d" --> "B_h" using D2H copy - Output: Validating A_h with B_h both should be equal for - the number of COLUMNS and ROWS copied - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", "" - , int, float, double) { - CHECK_IMAGE_SUPPORT - auto mem_type = GENERATE(0, 1); - int numDevices = 0; - int canAccessPeer = 0; - TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - hipStream_t stream; - - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); - if (canAccessPeer) { - HIP_CHECK(hipSetDevice(0)); - - // Allocating memory - if (mem_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, NUM_W*NUM_H, false); - } - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - char *X_d{nullptr}; - size_t pitch_X; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), - &pitch_X, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - - // Change device - HIP_CHECK(hipSetDevice(1)); - HIP_CHECK(hipStreamCreate(&stream)); - - // Host to Device - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), - COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); - - // Device to Device - HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, - pitch_A, COLUMNS*sizeof(TestType), - ROWS, hipMemcpyDeviceToDevice, stream)); - - // Device to Host - HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, - pitch_X, COLUMNS*sizeof(TestType), ROWS, - hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - if (mem_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); - } - HIP_CHECK(hipFree(X_d)); - HIP_CHECK(hipStreamDestroy(stream)); - } else { - SUCCEED("Machine does not seem to have P2P"); - } - } else { - SUCCEED("skipped the testcase as no of devices is less than 2"); - } -} - -/** - * Test Description - * ------------------------ - * - This testcase verifies the null checks of hipMemcpy2DAsync API - 1. hipMemcpy2DAsync API where Source Pitch is zero - 2. hipMemcpy2DAsync API where Destination Pitch is zero - 3. hipMemcpy2DAsync API where height is zero - 4. hipMemcpy2DAsync API where width is zero - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2DAsync API where Source Pitch is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d, - pitch_A, NUM_W, NUM_H, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, - 0, NUM_W, NUM_H, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API where height is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, - pitch_A, NUM_W, 0, - hipMemcpyDeviceToHost, stream) == hipSuccess); - } - - SECTION("hipMemcpy2DAsync API where width is zero") { - REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, - pitch_A, 0, NUM_H, - hipMemcpyDeviceToHost, stream) == hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - free(A_h); -} - -/** - * Test Description - * ------------------------ - * - This testcase performs the negative scenarios of hipMemcpy2DAsync API - 1. hipMemcpy2DAsync API by Passing nullptr to destination - 2. hipMemcpy2DAsync API by Passing nullptr to source - 3. hipMemcpy2DAsync API where width is > destination pitch - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ - -TEST_CASE("Unit_hipMemcpy2DAsync_Negative") { - CHECK_IMAGE_SUPPORT - HIP_CHECK(hipSetDevice(0)); - int* A_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(int)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating memory - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, nullptr, NUM_W*NUM_H); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Initialize the data - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); - - SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") { - REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API by Passing nullptr to source") { - REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr, - pitch_A, COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - SECTION("hipMemcpy2DAsync API where width is > destination pitch") { - REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A, - COLUMNS*sizeof(int), ROWS, - hipMemcpyDeviceToHost, stream) != hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamDestroy(stream)); - free(A_h); -} - -static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) { - constexpr int defaultProgramSize = 256 * 1024 * 1024; - constexpr int N = 2; - constexpr int value = 42; - int *in, *out, *dev; - size_t newSize = 0, inp = 0; - size_t size = sizeof(int) * N * inc; - - size_t free, total; - HIP_CHECK(hipMemGetInfo(&free, &total)); - - if ( free < 2 * size ) - newSize = ( free - defaultProgramSize ) / 2; - else - newSize = size; - - INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); - INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); - INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); - - HIP_CHECK(hipHostMalloc(&in, newSize)); - HIP_CHECK(hipHostMalloc(&out, newSize)); - HIP_CHECK(hipMalloc(&dev, newSize)); - - inp = newSize / (sizeof(int) * N); - for (size_t i=0; i < N; i++) { - in[i * inp] = value; - } - - size_t pitch = sizeof(int) * inp; - - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int), - N, hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int), - N, hipMemcpyDeviceToHost, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - - for (size_t i=0; i < N; i++) { - REQUIRE(out[i * inp] == value); - } - - HIP_CHECK(hipFree(dev)); - HIP_CHECK(hipHostFree(in)); - HIP_CHECK(hipHostFree(out)); - HIP_CHECK(hipStreamDestroy(stream)); -} - -/** - * Test Description - * ------------------------ - * - This testcase performs multidevice size check on hipMemcpy2DAsync API - 1. Verify hipMemcpy2DAsync with 1 << 20 size - 2. Verify hipMemcpy2DAsync with 1 << 21 size - * Test source - * ------------------------ - * - unit/memory/hipMemcpy2DAsync.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.0 - */ - -TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") { - CHECK_IMAGE_SUPPORT - size_t input = 1 << 20; - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - - for (int i=0; i < numDevices; i++) { - HIP_CHECK(hipSetDevice(i)); - - SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") { - hipMemcpy2DAsync_Basic_Size_Test(input); - } - SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") { - input <<= 1; - hipMemcpy2DAsync_Basic_Size_Test(input); + SECTION("Peer access enabled") { + Memcpy2DDeviceToDeviceShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); } } + + SECTION("Host to Device") { + Memcpy2DHostToDeviceShell( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), stream); + } + + SECTION("Host to Host") { + Memcpy2DHostToHostShell(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, stream), + stream); + } +} + +TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Synchronization_Behavior") { + using namespace std::placeholders; + + HIP_CHECK(hipDeviceSynchronize()); + + SECTION("Host to Device") { + Memcpy2DHtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + false); + } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233 + SECTION("Device to Pageable Host") { + Memcpy2DDtoHPageableSyncBehavior( + std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), true); + } +#endif + + SECTION("Device to Pinned Host") { + Memcpy2DDtoHPinnedSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + false); + } + + SECTION("Device to Device") { + Memcpy2DDtoDSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + false); + } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233 + SECTION("Host to Host") { + Memcpy2DHtoHSyncBehavior(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr), + true); + } +#endif +} + +TEST_CASE("Unit_hipMemcpy2DAsync_Positive_Parameters") { + using namespace std::placeholders; + constexpr bool async = true; + Memcpy2DZeroWidthHeight(std::bind(hipMemcpy2DAsync, _1, _2, _3, _4, _5, _6, _7, nullptr)); +} + +TEST_CASE("Unit_hipMemcpy2DAsync_Negative_Parameters") { + constexpr size_t cols = 128; + constexpr size_t rows = 128; + + constexpr auto NegativeTests = [](void* dst, size_t dpitch, const void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + SECTION("dst == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(nullptr, dpitch, src, spitch, width, height, kind, nullptr), + hipErrorInvalidValue); + } + SECTION("src == nullptr") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, nullptr, spitch, width, height, kind, nullptr), + hipErrorInvalidValue); + } + SECTION("dpitch < width") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, width - 1, src, spitch, width, height, kind, nullptr), + hipErrorInvalidPitchValue); + } + SECTION("spitch < width") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, width - 1, width, height, kind, nullptr), + hipErrorInvalidPitchValue); + } + SECTION("dpitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, static_cast(attr) + 1, src, spitch, width, + height, kind, nullptr), + hipErrorInvalidValue); + } + SECTION("spitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, static_cast(attr) + 1, width, + height, kind, nullptr), + hipErrorInvalidValue); + } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-234 + SECTION("Invalid MemcpyKind") { + HIP_CHECK_ERROR(hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, + static_cast(-1), nullptr), + hipErrorInvalidMemcpyDirection); + } +#endif +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235 + SECTION("Invalid stream") { + StreamGuard stream_guard(Streams::created); + HIP_CHECK(hipStreamDestroy(stream_guard.stream())); + HIP_CHECK_ERROR( + hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream_guard.stream()), + hipErrorContextIsDestroyed); + } +#endif + }; + + SECTION("Host to device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice); + } + + SECTION("Device to host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost); + } + + SECTION("Host to host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int), + cols * sizeof(int), rows, hipMemcpyHostToHost); + } + + SECTION("Device to device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); + } } diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync_old.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync_old.cc new file mode 100644 index 0000000000..1ca39bd6c9 --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2DAsync_old.cc @@ -0,0 +1,555 @@ +/* +Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemcpy2DAsync hipMemcpy2DAsync + * @{ + * @ingroup MemcpyTest + * `hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, + * size_t spitch, size_t width, size_t height, + * hipMemcpyKind kind, hipStream_t stream = 0 )` - + * Copies data between host and device. + */ + +// Testcase Description: +// 1) Verifies the working of Memcpy2DAsync API negative scenarios by +// Pass NULL to destination pointer +// Pass NULL to Source pointer +// Pass width greater than spitch/dpitch +// 2) Verifies hipMemcpy2DAsync API by +// pass 0 to destionation pitch +// pass 0 to source pitch +// pass 0 to width +// pass 0 to height +// 3) Verifies working of Memcpy2DAsync API on host memory +// and pinned host memory by +// performing D2H, D2D and H2D memory kind copies on same GPU +// 4) Verifies working of Memcpy2DAsync API on host memory +// and pinned host memory by +// performing D2H, D2D and H2D memory kind copies on peer GPU +// 5) Verifies working of Memcpy2DAsync API where memory is allocated +// in GPU-0 and stream is created on GPU-1 + +#include +#include + +static constexpr auto NUM_W{16}; +static constexpr auto NUM_H{16}; +static constexpr auto COLUMNS{6}; +static constexpr auto ROWS{6}; + +/** + * Test Description + * ------------------------ + * - This performs the following scenarios of hipMemcpy2DAsync API on same GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "B_d" using D2D copy + "B_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_Host&PinnedMem", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + // 1 refers to pinned host memory + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(TestType)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + SECTION("Calling Async apis with stream object created by user") { + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice, stream)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); + + // hipMemcpy2DAsync Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + SECTION("Calling Async apis with hipStreamPerThread") { + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice, hipStreamPerThread)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2DAsync(B_d, pitch_B, A_d, pitch_A, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToDevice, hipStreamPerThread)); + + // hipMemcpy2DAsync Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + } + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "X_d" using D2D copy + "X_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-Host&PinnedMem", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + auto mem_type = GENERATE(0, 1); + int numDevices = 0; + int canAccessPeer = 0; + TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + hipStream_t stream; + + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); + if (canAccessPeer) { + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); + + // Change device + HIP_CHECK(hipSetDevice(1)); + + char *X_d{nullptr}; + size_t pitch_X; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + + // Device to Device + HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); + + // Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, + pitch_X, COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipFree(X_d)); + HIP_CHECK(hipStreamDestroy(stream)); + } else { + SUCCEED("Machine does not seem to have P2P"); + } + } else { + SUCCEED("skipped the testcase as no of devices is less than 2"); + } +} + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2DAsync API on Peer GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + Memory is allocated in GPU-0 and Stream is created in GPU-1 + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "X_d" using D2D copy + "X_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice-StreamOnDiffDevice", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + auto mem_type = GENERATE(0, 1); + int numDevices = 0; + int canAccessPeer = 0; + TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + hipStream_t stream; + + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); + if (canAccessPeer) { + HIP_CHECK(hipSetDevice(0)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + char *X_d{nullptr}; + size_t pitch_X; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Change device + HIP_CHECK(hipSetDevice(1)); + HIP_CHECK(hipStreamCreate(&stream)); + + // Host to Device + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice, stream)); + + // Device to Device + HIP_CHECK(hipMemcpy2DAsync(X_d, pitch_X, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice, stream)); + + // Device to Host + HIP_CHECK(hipMemcpy2DAsync(B_h, COLUMNS*sizeof(TestType), X_d, + pitch_X, COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipFree(X_d)); + HIP_CHECK(hipStreamDestroy(stream)); + } else { + SUCCEED("Machine does not seem to have P2P"); + } + } else { + SUCCEED("skipped the testcase as no of devices is less than 2"); + } +} + +/** + * Test Description + * ------------------------ + * - This testcase verifies the null checks of hipMemcpy2DAsync API + 1. hipMemcpy2DAsync API where Source Pitch is zero + 2. hipMemcpy2DAsync API where Destination Pitch is zero + 3. hipMemcpy2DAsync API where height is zero + 4. hipMemcpy2DAsync API where width is zero + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEST_CASE("Unit_hipMemcpy2DAsync_SizeCheck") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2DAsync API where Source Pitch is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, 0, A_d, + pitch_A, NUM_W, NUM_H, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where Destination Pitch is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, + 0, NUM_W, NUM_H, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where height is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, + pitch_A, NUM_W, 0, + hipMemcpyDeviceToHost, stream) == hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where width is zero") { + REQUIRE(hipMemcpy2DAsync(A_h, width, A_d, + pitch_A, 0, NUM_H, + hipMemcpyDeviceToHost, stream) == hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + free(A_h); +} + +/** + * Test Description + * ------------------------ + * - This testcase performs the negative scenarios of hipMemcpy2DAsync API + 1. hipMemcpy2DAsync API by Passing nullptr to destination + 2. hipMemcpy2DAsync API by Passing nullptr to source + 3. hipMemcpy2DAsync API where width is > destination pitch + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ + +TEST_CASE("Unit_hipMemcpy2DAsync_Negative") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2DAsync API by Passing nullptr to destination") { + REQUIRE(hipMemcpy2DAsync(nullptr, width, A_d, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API by Passing nullptr to source") { + REQUIRE(hipMemcpy2DAsync(A_h, width, nullptr, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + SECTION("hipMemcpy2DAsync API where width is > destination pitch") { + REQUIRE(hipMemcpy2DAsync(A_h, 10, A_d, pitch_A, + COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost, stream) != hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamDestroy(stream)); + free(A_h); +} + +static void hipMemcpy2DAsync_Basic_Size_Test(size_t inc) { + constexpr int defaultProgramSize = 256 * 1024 * 1024; + constexpr int N = 2; + constexpr int value = 42; + int *in, *out, *dev; + size_t newSize = 0, inp = 0; + size_t size = sizeof(int) * N * inc; + + size_t free, total; + HIP_CHECK(hipMemGetInfo(&free, &total)); + + if ( free < 2 * size ) + newSize = ( free - defaultProgramSize ) / 2; + else + newSize = size; + + INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); + INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); + INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); + + HIP_CHECK(hipHostMalloc(&in, newSize)); + HIP_CHECK(hipHostMalloc(&out, newSize)); + HIP_CHECK(hipMalloc(&dev, newSize)); + + inp = newSize / (sizeof(int) * N); + for (size_t i=0; i < N; i++) { + in[i * inp] = value; + } + + size_t pitch = sizeof(int) * inp; + + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipMemcpy2DAsync(dev, pitch, in, pitch, sizeof(int), + N, hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipMemcpy2DAsync(out, pitch, dev, pitch, sizeof(int), + N, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + for (size_t i=0; i < N; i++) { + REQUIRE(out[i * inp] == value); + } + + HIP_CHECK(hipFree(dev)); + HIP_CHECK(hipHostFree(in)); + HIP_CHECK(hipHostFree(out)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - This testcase performs multidevice size check on hipMemcpy2DAsync API + 1. Verify hipMemcpy2DAsync with 1 << 20 size + 2. Verify hipMemcpy2DAsync with 1 << 21 size + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2DAsync_multiDevice_Basic_Size_Test") { + CHECK_IMAGE_SUPPORT + size_t input = 1 << 20; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + + for (int i=0; i < numDevices; i++) { + HIP_CHECK(hipSetDevice(i)); + + SECTION("Verify hipMemcpy2DAsync with 1 << 20 size") { + hipMemcpy2DAsync_Basic_Size_Test(input); + } + SECTION("Verify hipMemcpy2DAsync with 1 << 21 size") { + input <<= 1; + hipMemcpy2DAsync_Basic_Size_Test(input); + } + } +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpy2D_old.cc b/projects/hip-tests/catch/unit/memory/hipMemcpy2D_old.cc new file mode 100644 index 0000000000..d7e21e42f2 --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemcpy2D_old.cc @@ -0,0 +1,496 @@ +/* +Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemcpy2D hipMemcpy2D + * @{ + * @ingroup MemcpyTest + * `hipMemcpy2D(void* dst, size_t dpitch, const void* src, + * size_t spitch, size_t width, size_t height, + * hipMemcpyKind kind)` - + * Copies data between host and device. + */ + +// Testcase Description: +// 1) Verifies the working of Memcpy2D API negative scenarios by +// Pass NULL to destination pointer +// Pass NULL to Source pointer +// Pass width greater than spitch/dpitch +// 2) Verifies hipMemcpy2D API by +// pass 0 to destionation pitch +// pass 0 to source pitch +// pass 0 to width +// pass 0 to height +// 3) Verifies working of Memcpy2D API on host memory and pinned host memory by +// performing D2H, D2D and H2D memory kind copies on same GPU +// 4) Verifies working of Memcpy2D API for the following scenarios +// H2D-D2D-D2H on host and device memory +// H2D-D2D-D2H on pinned host and device memory +// H2D-D2D-D2H functionalities where memory is allocated in GPU-0 +// and API is triggered from GPU-1 + +#include +#include + +static constexpr auto NUM_W{16}; +static constexpr auto NUM_H{16}; +static constexpr auto COLUMNS{8}; +static constexpr auto ROWS{8}; + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2D API on same GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "B_d" using D2D copy + "B_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + // 1 refers to pinned host memory + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(TestType)}; + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Host to Device + HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyHostToDevice)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2D(B_d, pitch_B, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice)); + + // hipMemcpy2D Device to Host + HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), B_d, pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } +} + +/** + * Test Description + * ------------------------ + * - This testcase performs the following scenarios of hipMemcpy2D API on same GPU. + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + The src and dst input pointers to hipMemCpy2D add an offset to the pointers + returned by the allocation functions. + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "B_d" using D2D copy + "B_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_H2D-D2D-D2H_WithOffset", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + // 1 refers to pinned host memory + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}, + *B_d{nullptr}; + size_t pitch_A, pitch_B; + size_t width{NUM_W * sizeof(TestType)}; + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&B_d), + &pitch_B, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + // Host to Device + HIP_CHECK(hipMemcpy2D(A_d+COLUMNS*sizeof(TestType), pitch_A, A_h, + COLUMNS*sizeof(TestType), COLUMNS*sizeof(TestType), + ROWS, hipMemcpyHostToDevice)); + + // Performs D2D on same GPU device + HIP_CHECK(hipMemcpy2D(B_d+COLUMNS*sizeof(TestType), pitch_B, + A_d+COLUMNS*sizeof(TestType), + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice)); + + // hipMemcpy2D Device to Host + HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), + B_d+COLUMNS*sizeof(TestType), pitch_B, + COLUMNS*sizeof(TestType), ROWS, + hipMemcpyDeviceToHost)); + + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipFree(B_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } +} + +/** + * Test Description + * ------------------------ + * - This testcases performs the following scenarios of hipMemcpy2D API on Peer GPU + 1. H2D-D2D-D2H for Host Memory<-->Device Memory + 2. H2D-D2D-D2H for Pinned Host Memory<-->Device Memory + 3. Device context change where memory is allocated in GPU-0 + and API is trigerred from GPU-1 + + Input : "A_h" initialized based on data type + "A_h" --> "A_d" using H2D copy + "A_d" --> "X_d" using D2D copy + "X_d" --> "B_h" using D2H copy + Output: Validating A_h with B_h both should be equal for + the number of COLUMNS and ROWS copied + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEMPLATE_TEST_CASE("Unit_hipMemcpy2D_multiDevice-D2D", "" + , int, float, double) { + CHECK_IMAGE_SUPPORT + auto mem_type = GENERATE(0, 1); + int numDevices = 0; + int canAccessPeer = 0; + TestType* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, 0, 1)); + if (canAccessPeer) { + HIP_CHECK(hipSetDevice(0)); + + // Allocating memory + if (mem_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, NUM_W*NUM_H, false); + } + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + + char *X_d{nullptr}; + size_t pitch_X; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&X_d), + &pitch_X, width, NUM_H)); + + // Change device + HIP_CHECK(hipSetDevice(1)); + + // Host to Device + HIP_CHECK(hipMemcpy2D(A_d, pitch_A, A_h, COLUMNS*sizeof(TestType), + COLUMNS*sizeof(TestType), ROWS, hipMemcpyHostToDevice)); + + // Device to Device + HIP_CHECK(hipMemcpy2D(X_d, pitch_X, A_d, + pitch_A, COLUMNS*sizeof(TestType), + ROWS, hipMemcpyDeviceToDevice)); + + // Device to Host + HIP_CHECK(hipMemcpy2D(B_h, COLUMNS*sizeof(TestType), X_d, + pitch_X, COLUMNS*sizeof(TestType), ROWS, hipMemcpyDeviceToHost)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, B_h, COLUMNS, ROWS) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + if (mem_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); + } + HIP_CHECK(hipFree(X_d)); + } else { + SUCCEED("Machine does not seem to have P2P"); + } + } else { + SUCCEED("skipped the testcase as no of devices is less than 2"); + } +} + +/** + * Test Description + * ------------------------ + * - This Testcase verifies the null size checks of hipMemcpy2D API + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2D_SizeCheck") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2D API where Source Pitch is zero") { + REQUIRE(hipMemcpy2D(A_h, 0, A_d, + pitch_A, NUM_W, NUM_H, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API where Destination Pitch is zero") { + REQUIRE(hipMemcpy2D(A_h, width, A_d, + 0, NUM_W, NUM_H, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API where height is zero") { + REQUIRE(hipMemcpy2D(A_h, width, A_d, + pitch_A, NUM_W, 0, + hipMemcpyDeviceToHost) == hipSuccess); + } + + SECTION("hipMemcpy2D API where width is zero") { + REQUIRE(hipMemcpy2D(A_h, width, A_d, + pitch_A, 0, NUM_H, + hipMemcpyDeviceToHost) == hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + free(A_h); +} + +/** + * Test Description + * ------------------------ + * - This Testcase verifies all the negative scenarios of hipMemcpy2D API + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2D_Negative") { + CHECK_IMAGE_SUPPORT + HIP_CHECK(hipSetDevice(0)); + int* A_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(int)}; + + // Allocating memory + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, nullptr, NUM_W*NUM_H); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Initialize the data + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, nullptr); + + SECTION("hipMemcpy2D API by Passing nullptr to destination") { + REQUIRE(hipMemcpy2D(nullptr, width, A_d, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API by Passing nullptr to destination") { + REQUIRE(hipMemcpy2D(nullptr, width, nullptr, + pitch_A, COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost) != hipSuccess); + } + + SECTION("hipMemcpy2D API where width is greater than destination pitch") { + REQUIRE(hipMemcpy2D(A_h, 10, A_d, pitch_A, + COLUMNS*sizeof(int), ROWS, + hipMemcpyDeviceToHost) != hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + free(A_h); +} + +static void hipMemcpy2D_Basic_Size_Test(size_t inc) { + constexpr int defaultProgramSize = 256 * 1024 * 1024; + constexpr int N = 2; + constexpr int value = 42; + int *in, *out, *dev; + size_t newSize = 0, inp = 0; + size_t size = sizeof(int) * N * inc; + + size_t free, total; + HIP_CHECK(hipMemGetInfo(&free, &total)); + + if ( free < 2 * size ) + newSize = ( free - defaultProgramSize ) / 2; + else + newSize = size; + + INFO("Array size: " << size/1024.0/1024.0 << " MB or " << size << " Bytes."); + INFO("Free memory: " << free/1024.0/1024.0 << " MB or " << free << " Bytes"); + INFO("NewSize:" << newSize/1024.0/1024.0 << "MB or " << newSize << " Bytes"); + + HIP_CHECK(hipHostMalloc(&in, newSize)); + HIP_CHECK(hipHostMalloc(&out, newSize)); + HIP_CHECK(hipMalloc(&dev, newSize)); + + inp = newSize / (sizeof(int) * N); + for (size_t i=0; i < N; i++) { + in[i * inp] = value; + } + + size_t pitch = sizeof(int) * inp; + + HIP_CHECK(hipMemcpy2D(dev, pitch, in, pitch, sizeof(int), + N, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy2D(out, pitch, dev, pitch, sizeof(int), + N, hipMemcpyDeviceToHost)); + + for (size_t i=0; i < N; i++) { + REQUIRE(out[i * inp] == value); + } + + HIP_CHECK(hipFree(dev)); + HIP_CHECK(hipHostFree(in)); + HIP_CHECK(hipHostFree(out)); +} + +/** + * Test Description + * ------------------------ + * - This testcase performs multidevice size check on hipMemcpy2D API + 1. Verify hipMemcpy2D with 1 << 20 size + 2. Verify hipMemcpy2D with 1 << 21 size + * Test source + * ------------------------ + * - unit/memory/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipMemcpy2D_multiDevice_Basic_Size_Test") { + CHECK_IMAGE_SUPPORT + size_t input = 1 << 20; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + + for (int i=0; i < numDevices; i++) { + HIP_CHECK(hipSetDevice(i)); + + SECTION("Verify hipMemcpy2D with 1 << 20 size") { + hipMemcpy2D_Basic_Size_Test(input); + } + SECTION("Verify hipMemcpy2D with 1 << 21 size") { + input <<= 1; + hipMemcpy2D_Basic_Size_Test(input); + } + } +} diff --git a/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh b/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh new file mode 100644 index 0000000000..990dd9dcdc --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh @@ -0,0 +1,325 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +template +void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyDeviceToHost, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + LinearAllocGuard2D device_alloc(cols, rows); + + const size_t host_pitch = GENERATE_REF(device_alloc.width(), device_alloc.width() + 64); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows); + + const dim3 threads_per_block(32, 32); + const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1); + Iota<<>>(device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width_logical(), device_alloc.height(), 1); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(memcpy_func(host_alloc.ptr(), host_pitch, device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemoryVerify(host_alloc.ptr(), host_pitch, device_alloc.width_logical(), + device_alloc.height(), 1, f); +} + +template +void Memcpy2DDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyDeviceToDevice, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + const auto device_count = HipTest::getDeviceCount(); + const auto src_device = GENERATE_COPY(range(0, device_count)); + const auto dst_device = GENERATE_COPY(range(0, device_count)); + const size_t src_cols_mult = GENERATE(1, 2); + + INFO("Src device: " << src_device << ", Dst device: " << dst_device); + + HIP_CHECK(hipSetDevice(src_device)); + if constexpr (enable_peer_access) { + if (src_device == dst_device) { + return; + } + int can_access_peer = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); + if (!can_access_peer) { + INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device); + REQUIRE(can_access_peer); + } + HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); + } + + LinearAllocGuard2D src_alloc(cols * src_cols_mult, rows); + HIP_CHECK(hipSetDevice(src_device)); + LinearAllocGuard2D dst_alloc(cols, rows); + HIP_CHECK(hipSetDevice(src_device)); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows); + + const dim3 threads_per_block(32, 32); + const dim3 blocks(cols / threads_per_block.x + 1, rows / threads_per_block.y + 1); + // Using dst_alloc width and height to set only the elements that will be copied over to + // dst_alloc + Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), dst_alloc.width_logical(), + dst_alloc.height(), 1); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost)); + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemoryVerify(host_alloc.ptr(), dst_alloc.width(), dst_alloc.width_logical(), + dst_alloc.height(), 1, f); +} + +template +void Memcpy2DHostToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyHostToDevice, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + LinearAllocGuard2D device_alloc(cols, rows); + + const size_t host_pitch = GENERATE_REF(device_alloc.pitch(), 2 * device_alloc.pitch()); + + LinearAllocGuard src_host_alloc(LinearAllocs::hipHostMalloc, host_pitch * rows); + LinearAllocGuard dst_host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows); + + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemorySet(src_host_alloc.ptr(), host_pitch, device_alloc.width_logical(), + device_alloc.height(), 1, f); + + std::fill_n(dst_host_alloc.ptr(), device_alloc.width_logical() * rows, 0); + + HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(), host_pitch, + device_alloc.width(), device_alloc.height(), kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(), + device_alloc.pitch(), device_alloc.width(), device_alloc.height(), + hipMemcpyDeviceToHost)); + + PitchedMemoryVerify(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.width_logical(), + device_alloc.height(), 1, f); +} + +template +void Memcpy2DHostToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + const auto kind = GENERATE(hipMemcpyHostToHost, hipMemcpyDefault); + + constexpr size_t cols = 127; + constexpr size_t rows = 128; + + const size_t src_pitch = GENERATE_REF(cols * sizeof(int), cols * sizeof(int) + 64); + + LinearAllocGuard src_host(LinearAllocs::hipHostMalloc, src_pitch * rows); + LinearAllocGuard dst_host(LinearAllocs::hipHostMalloc, cols * sizeof(int) * rows); + + constexpr auto f = [](size_t x, size_t y, size_t z) { return z * cols * rows + y * cols + x; }; + PitchedMemorySet(src_host.ptr(), src_pitch, cols, rows, 1, f); + + HIP_CHECK(memcpy_func(dst_host.ptr(), cols * sizeof(int), src_host.ptr(), src_pitch, + cols * sizeof(int), rows, kind)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + PitchedMemoryVerify(dst_host.ptr(), cols * sizeof(int), cols, rows, 1, f); +} + +// Synchronization behavior checks +template +void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream) { + LaunchDelayKernel(std::chrono::milliseconds{300}, kernel_stream); + HIP_CHECK(memcpy_func()); + if (should_sync) { + HIP_CHECK(hipStreamQuery(kernel_stream)); + } else { + HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady); + } +} + +template +void Memcpy2DHtoDSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + using LA = LinearAllocs; + const auto host_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc); + LinearAllocGuard host_alloc(host_alloc_type, 32 * sizeof(int) * 32); + LinearAllocGuard2D device_alloc(32, 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, device_alloc.ptr(), device_alloc.pitch(), + host_alloc.ptr(), device_alloc.width(), device_alloc.width(), + device_alloc.height(), hipMemcpyHostToDevice), + should_sync, kernel_stream); +} + +template +void Memcpy2DDtoHPageableSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + LinearAllocGuard host_alloc(LinearAllocs::malloc, 32 * sizeof(int) * 32); + LinearAllocGuard2D device_alloc(32, 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(), + device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height(), hipMemcpyDeviceToHost), + should_sync, kernel_stream); +} + +template +void Memcpy2DDtoHPinnedSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, 32 * sizeof(int) * 32); + LinearAllocGuard2D device_alloc(32, 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, host_alloc.ptr(), device_alloc.width(), + device_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height(), hipMemcpyDeviceToHost), + should_sync, kernel_stream); +} + +template +void Memcpy2DDtoDSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + LinearAllocGuard2D src_alloc(32, 32); + LinearAllocGuard2D dst_alloc(32, 32); + MemcpySyncBehaviorCheck( + std::bind(memcpy_func, dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice), + should_sync, kernel_stream); +} + +template +void Memcpy2DHtoHSyncBehavior(F memcpy_func, const bool should_sync, + const hipStream_t kernel_stream = nullptr) { + using LA = LinearAllocs; + const auto src_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc); + const auto dst_alloc_type = GENERATE(LA::malloc, LA::hipHostMalloc); + + LinearAllocGuard src_alloc(src_alloc_type, 32 * sizeof(int) * 32); + LinearAllocGuard dst_alloc(dst_alloc_type, 32 * sizeof(int) * 32); + MemcpySyncBehaviorCheck(std::bind(memcpy_func, dst_alloc.ptr(), 32 * sizeof(int), src_alloc.ptr(), + 32 * sizeof(int), 32 * sizeof(int), 32, hipMemcpyHostToHost), + should_sync, kernel_stream); +} + +template +void Memcpy2DZeroWidthHeight(F memcpy_func, const hipStream_t stream = nullptr) { + constexpr size_t cols = 63; + constexpr size_t rows = 64; + + const auto [width_mult, height_mult] = + GENERATE(std::make_pair(0, 1), std::make_pair(1, 0), std::make_pair(0, 0)); + + SECTION("Device to Host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.width() * rows); + std::fill_n(host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 42); + HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 1, device_alloc.width(), + device_alloc.height())); + + HIP_CHECK(memcpy_func(host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(), + device_alloc.pitch(), device_alloc.width() * width_mult, + device_alloc.height() * height_mult, hipMemcpyDeviceToHost)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + ArrayFindIfNot(host_alloc.ptr(), static_cast(42), + device_alloc.width_logical() * device_alloc.height()); + } + + SECTION("Device to Device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, dst_alloc.width() * rows); + HIP_CHECK( + hipMemset2D(src_alloc.ptr(), src_alloc.pitch(), 1, src_alloc.width(), src_alloc.height())); + HIP_CHECK( + hipMemset2D(dst_alloc.ptr(), dst_alloc.pitch(), 42, dst_alloc.width(), dst_alloc.height())); + HIP_CHECK(memcpy_func(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width() * width_mult, dst_alloc.height() * height_mult, + hipMemcpyDeviceToDevice)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + HIP_CHECK(hipMemcpy2D(host_alloc.ptr(), dst_alloc.width(), dst_alloc.ptr(), dst_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToHost)); + ArrayFindIfNot(host_alloc.ptr(), static_cast(42), + dst_alloc.width_logical() * dst_alloc.height()); + } + + SECTION("Host to Device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard src_host_alloc(LinearAllocs::hipHostMalloc, + device_alloc.width() * rows); + LinearAllocGuard dst_host_alloc(LinearAllocs::hipHostMalloc, + device_alloc.width() * rows); + std::fill_n(src_host_alloc.ptr(), device_alloc.width_logical() * device_alloc.height(), 1); + HIP_CHECK(hipMemset2D(device_alloc.ptr(), device_alloc.pitch(), 42, device_alloc.width(), + device_alloc.height())); + HIP_CHECK(memcpy_func(device_alloc.ptr(), device_alloc.pitch(), src_host_alloc.ptr(), + device_alloc.width(), device_alloc.width() * width_mult, + device_alloc.height() * height_mult, hipMemcpyHostToDevice)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + HIP_CHECK(hipMemcpy2D(dst_host_alloc.ptr(), device_alloc.width(), device_alloc.ptr(), + device_alloc.pitch(), device_alloc.width(), device_alloc.height(), + hipMemcpyDeviceToHost)); + ArrayFindIfNot(dst_host_alloc.ptr(), static_cast(42), + device_alloc.width_logical() * device_alloc.height()); + } + + SECTION("Host to Host") { + const auto alloc_size = cols * rows; + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, alloc_size); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, alloc_size); + std::fill_n(src_alloc.ptr(), alloc_size, 1); + std::fill_n(dst_alloc.ptr(), alloc_size, 42); + HIP_CHECK(memcpy_func(dst_alloc.ptr(), cols, src_alloc.ptr(), cols, cols * width_mult, + rows * height_mult, hipMemcpyHostToHost)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(stream)); + } + ArrayFindIfNot(dst_alloc.ptr(), static_cast(42), alloc_size); + } +} \ No newline at end of file From 25263b855335ecd325c45a76d8daa09c36136a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:33:58 +0100 Subject: [PATCH 09/19] EXSWHTEC-102 - Implement tests for hipMemcpyParam2D APIs #54 Change-Id: Ieac4d5000915b80f579c8e5f72d8d072bde63ab9 [ROCm/hip-tests commit: 9a3fd8ec41a11051ae5e1fd069d517cb68ca5993] --- .../catch/unit/memory/CMakeLists.txt | 2 + .../catch/unit/memory/hipMemcpyParam2D.cc | 462 +++++--------- .../unit/memory/hipMemcpyParam2DAsync.cc | 585 ++++++------------ .../unit/memory/hipMemcpyParam2DAsync_old.cc | 441 +++++++++++++ .../catch/unit/memory/hipMemcpyParam2D_old.cc | 337 ++++++++++ .../unit/memory/memcpy2d_tests_common.hh | 211 +++++++ 6 files changed, 1333 insertions(+), 705 deletions(-) create mode 100644 projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync_old.cc create mode 100644 projects/hip-tests/catch/unit/memory/hipMemcpyParam2D_old.cc diff --git a/projects/hip-tests/catch/unit/memory/CMakeLists.txt b/projects/hip-tests/catch/unit/memory/CMakeLists.txt index 2b4e0e6dd9..fda74f5b2e 100644 --- a/projects/hip-tests/catch/unit/memory/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/memory/CMakeLists.txt @@ -38,7 +38,9 @@ set(TEST_SRC hipMemcpy3DAsync.cc hipMemcpy3DAsync_old.cc hipMemcpyParam2D.cc + hipMemcpyParam2D_old.cc hipMemcpyParam2DAsync.cc + hipMemcpyParam2DAsync_old.cc hipMemcpy2D.cc hipMemcpy2D_old.cc hipMemcpy2DAsync.cc diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D.cc index 3a35a1a16e..f2b5b270ce 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D.cc @@ -1,337 +1,195 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* -This testfile verifies the following scenarios of hipMemcpyParam2D API -1. Negative Scenarios -2. Extent Validation Scenarios -3. D2D copy for different datatypes -4. H2D and D2H copy for different datatypes -*/ +#include "memcpy2d_tests_common.hh" #include -#include +#include +#include +#include -static constexpr size_t NUM_W{10}; -static constexpr size_t NUM_H{10}; -/* - * This testcase verifies D2D functionality of hipMemcpyParam2D API - * Input: Intializing "A_d" device variable with "C_h" host variable - * Output: "A_d" device variable to "E_d" device variable - * - * Validating the result by copying "E_d" to "A_h" and checking - * it with the initalized data "C_h". - * - */ -TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-D2D", "[hipMemcpyParam2D]", char, float, int, - double, long double) { - CHECK_IMAGE_SUPPORT +TEST_CASE("Unit_hipMemcpyParam2D_Positive_Basic") { + constexpr bool async = false; - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - // Initialize and Allocating Memory - HIP_CHECK(hipSetDevice(0)); - TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236 + SECTION("Device to Host") { Memcpy2DDeviceToHostShell(MemcpyParam2DAdapter()); } +#endif - int peerAccess = 0; - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - if (!peerAccess) { - SUCCEED("Skipped the test as there is no peer access"); - } else { - HIP_CHECK(hipSetDevice(1)); - char *E_d; - size_t pitch_E; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&E_d), - &pitch_E, width, NUM_H)); - - // Initalizing A_d with C_h - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width, - NUM_W * sizeof(TestType), NUM_H, hipMemcpyHostToDevice)); - - // Device to Device - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = E_d; - desc.dstDevice = hipDeviceptr_t(E_d); - desc.dstPitch = pitch_E; - desc.WidthInBytes = NUM_W * sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); - - // Copying E_d to A_h - HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E, - NUM_W * sizeof(TestType), NUM_H, - hipMemcpyDeviceToHost)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, false); + SECTION("Device to Device") { + SECTION("Peer access disabled") { + Memcpy2DDeviceToDeviceShell(MemcpyParam2DAdapter()); } - } else { - SUCCEED("skipping the testcases as numDevices < 2"); - } -} - -/* - * This testcase verifies H2D & D2H functionality of hipMemcpyParam2D API - * H2D case: - * Input: "C_h" host variable initialized with default data - * Output: "A_d" device variable - * - * D2H case: - * Input: "A_d" device variable from the previous output - * OutPut: "A_h" variable - * - * Validating the result by comparing "A_h" to "C_h" - */ -TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-H2D-D2H", "[hipMemcpyParam2D]", char, float, - int, double, long double) { - CHECK_IMAGE_SUPPORT - - // 1 refers to pinned host memory and 0 refers - // to unpinned memory - auto memory_type = GENERATE(0, 1); - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIP_CHECK(hipSetDevice(0)); - - // Initialize and Allocating Memory - TestType* A_h{nullptr}, *C_h{nullptr}, - *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Based on memory type (pinned/unpinned) allocating memory - if (memory_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); + SECTION("Peer access enabled") { + Memcpy2DDeviceToDeviceShell(MemcpyParam2DAdapter()); } - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); - int peerAccess = 0; - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - if (!peerAccess) { - SUCCEED("Skipped the test as there is no peer access"); - } else { - // Host to Device - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeHost; - desc.srcHost = C_h; - desc.srcDevice = hipDeviceptr_t(C_h); - desc.srcPitch = width; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = A_d; - desc.dstDevice = hipDeviceptr_t(A_d); - desc.dstPitch = pitch_A; - desc.WidthInBytes = NUM_W*sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + } - // Device to Host - memset(&desc, 0x0, sizeof(hip_Memcpy2D)); - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeHost; - desc.dstHost = A_h; - desc.dstDevice = hipDeviceptr_t(A_h); - desc.dstPitch = width; - desc.WidthInBytes = NUM_W*sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + SECTION("Host to Device") { Memcpy2DHostToDeviceShell(MemcpyParam2DAdapter()); } - // Validating the result - REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236 + SECTION("Host to Host") { Memcpy2DHostToHostShell(MemcpyParam2DAdapter()); } +#endif +} - // DeAllocating the Memory - HIP_CHECK(hipFree(A_d)); - if (memory_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, false); - } +TEST_CASE("Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior") { + HIP_CHECK(hipDeviceSynchronize()); + + SECTION("Host to Device") { Memcpy2DHtoDSyncBehavior(MemcpyParam2DAdapter<>(), true); } + + SECTION("Device to Pageable Host") { + Memcpy2DDtoHPageableSyncBehavior(MemcpyParam2DAdapter<>(), true); + } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236 + SECTION("Device to Pinned Host") { + Memcpy2DDtoHPinnedSyncBehavior(MemcpyParam2DAdapter<>(), true); + } +#endif + + SECTION("Device to Device") { +#if HT_NVIDIA + Memcpy2DDtoDSyncBehavior(MemcpyParam2DAdapter<>(), false); +#else + Memcpy2DDtoDSyncBehavior(MemcpyParam2DAdapter<>(), true); +#endif + } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-232 + SECTION("Host to Host") { Memcpy2DHtoHSyncBehavior(MemcpyParam2DAdapter<>(), true); } +#endif +} + +TEST_CASE("Unit_hipMemcpyParam2D_Positive_Parameters") { + constexpr bool async = false; + Memcpy2DZeroWidthHeight(MemcpyParam2DAdapter()); +} + +TEST_CASE("Unit_hipMemcpyParam2D_Positive_Array") { + constexpr bool async = false; + SECTION("Array from/to Host") { + MemcpyParam2DArrayHostShell(MemcpyParam2DAdapter()); + } + SECTION("Array from/to Device") { + MemcpyParam2DArrayDeviceShell(MemcpyParam2DAdapter()); + } +} + +TEST_CASE("Unit_hipMemcpyParam2D_Negative_Parameters") { + constexpr size_t cols = 128; + constexpr size_t rows = 128; + + constexpr auto NegativeTests = [](void* dst, size_t dpitch, void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + SECTION("dst == nullptr") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(static_cast(nullptr), dpitch, src, spitch, + width, height, kind), + hipErrorInvalidValue); } - } else { - SUCCEED("skipping the testcases as numDevices < 2"); - } -} -/* - * This testcase verifies the extent validation scenarios - */ -TEST_CASE("Unit_hipMemcpyParam2D_ExtentValidation") { - CHECK_IMAGE_SUPPORT - // Allocating memory and Initializing the data - HIP_CHECK(hipSetDevice(0)); - char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, - * A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(char)}; - constexpr auto memsetval{100}; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); - HipTest::initArrays(nullptr, nullptr, nullptr, - &B_h, nullptr, nullptr, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); - HipTest::setDefaultData(NUM_W*NUM_H, B_h, nullptr, nullptr); - HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + SECTION("src == nullptr") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, static_cast(nullptr), spitch, + width, height, kind), + hipErrorInvalidValue); + } - // Device to Host - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeHost; - desc.dstHost = A_h; - desc.dstDevice = hipDeviceptr_t(A_h); - desc.dstPitch = width; - desc.WidthInBytes = NUM_W; - desc.Height = NUM_H; + SECTION("dstPitch < WithInBytes") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, width - 1, src, spitch, width, height, kind), + hipErrorInvalidValue); + } - SECTION("Destination Pitch is 0") { - desc.dstPitch = 0; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + SECTION("srcPitch < WidthInBytes") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, src, width - 1, width, height, kind), + hipErrorInvalidValue); + } + + SECTION("dstPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, static_cast(attr) + 1, src, spitch, + width, height, kind), + hipErrorInvalidValue); + } + + SECTION("srcPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>()(dst, dpitch, src, static_cast(attr) + 1, + width, height, kind), + hipErrorInvalidValue); + } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237 + SECTION("WidthInBytes + srcXInBytes > srcPitch") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(spitch - width + 1, 0, 0))( + dst, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } + + SECTION("WidthInBytes + dstXInBytes > dstPitch") { + HIP_CHECK_ERROR( + MemcpyParam2DAdapter<>(make_hipExtent(0, 0, 0), make_hipExtent(dpitch - width + 1, 0, 0))( + dst, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } + + SECTION("srcY out of bounds") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(0, 1, 0))(dst, dpitch, src, spitch, + width, height, kind), + hipErrorInvalidValue); + } + + SECTION("dstY out of bounds") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter<>(make_hipExtent(0, 0, 0), make_hipExtent(0, 1, 0))( + dst, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } +#endif + }; + + SECTION("Host to Device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice); } - SECTION("Source Pitch is 0") { - desc.srcPitch = 0; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + SECTION("Device to Host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost); } - SECTION("Height is 0") { - desc.Height = 0; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); - REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int), + cols * sizeof(int), rows, hipMemcpyHostToHost); } - SECTION("Width is 0") { - desc.WidthInBytes = 0; - REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); - REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + SECTION("Device to Device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); } - - // DeAllocating the Memory - HIP_CHECK(hipFree(A_d)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); -} - -/* - * This testcase verifies the negative scenarios - */ -TEST_CASE("Unit_hipMemcpyParam2D_Negative") { - CHECK_IMAGE_SUPPORT - - HIP_CHECK(hipSetDevice(0)); - - // Allocating and Initializing the data - float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, - * A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(float)}; - constexpr auto memsetval{100}; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeHost; - desc.dstHost = A_h; - desc.dstDevice = hipDeviceptr_t(A_h); - desc.dstPitch = width; - desc.WidthInBytes = NUM_W; - desc.Height = NUM_H; - - SECTION("Null Pointer to Source Device Pointer") { - desc.srcDevice = hipDeviceptr_t(nullptr); - REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); - } - - SECTION("Null Pointer to Destination Device Pointer") { - memset(&desc, 0x0, sizeof(hip_Memcpy2D)); - desc.srcMemoryType = hipMemoryTypeHost; - desc.srcHost = A_h; - desc.srcDevice = hipDeviceptr_t(A_h); - desc.srcPitch = width; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = A_d; - desc.dstDevice = hipDeviceptr_t(nullptr); - desc.dstPitch = pitch_A; - desc.WidthInBytes = NUM_W; - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); - } - - SECTION("Null Pointer to both Src & Dst Device Pointer") { - desc.srcDevice = hipDeviceptr_t(nullptr); - desc.dstDevice = hipDeviceptr_t(nullptr); - REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); - } - - SECTION("Width > src/dest pitches") { - desc.WidthInBytes = pitch_A+1; - REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); - } - - // DeAllocating the Memory - HIP_CHECK(hipFree(A_d)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); -} +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc index 763ef9185d..44ec9b01cb 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync.cc @@ -1,441 +1,220 @@ /* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* -This testfile verifies the following scenarios of hipMemcpyParam2DAsync API -1. Negative Scenarios -2. Extent Validation Scenarios -3. D2D copy for different datatypes -4. H2D and D2H copy for different datatypes -5. Device context change scenario where memory allocated in one GPU - stream created in another GPU -*/ +#include "memcpy2d_tests_common.hh" #include -#include +#include +#include +#include -static constexpr size_t NUM_W{10}; -static constexpr size_t NUM_H{10}; -/* - * This testcase verifies D2D functionality of hipMemcpyParam2DAsync API - * Where Memory is allocated in GPU-0 and stream is created in GPU-1 - * - * Input: Intializing "A_d" device variable with "C_h" host variable - * Output: "A_d" device variable to "E_d" device variable - * - * Validating the result by copying "E_d" to "A_h" and checking - * it with the initalized data "C_h". - * - */ -TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice", - "[hipMemcpyParam2DAsync]", char, float, int, double, long double) { - CHECK_IMAGE_SUPPORT +TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Basic") { + using namespace std::placeholders; - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - // Allocating and Initializing the data - HIP_CHECK(hipSetDevice(0)); - TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); - int peerAccess = 0; - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - if (!peerAccess) { - SUCCEED("Skipped the test as there is no peer access"); - } else { - TestType *E_d{nullptr}; - size_t pitch_E; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&E_d), - &pitch_E, width, NUM_H)); + constexpr bool async = true; - // Initalizing A_d with C_h - HIP_CHECK(hipSetDevice(1)); - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); + const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created); + const StreamGuard stream_guard(stream_type); + const hipStream_t stream = stream_guard.stream(); - HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width, - NUM_W*sizeof(TestType), NUM_H, - hipMemcpyHostToDevice, stream)); - HIP_CHECK(hipStreamSynchronize(stream)); - // Device to Device - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = E_d; - desc.dstDevice = hipDeviceptr_t(E_d); - desc.dstPitch = pitch_E; - desc.WidthInBytes = NUM_W*sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Copying the result E_d to A_h host variable - HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E, - NUM_W*sizeof(TestType), NUM_H, - hipMemcpyDeviceToHost)); - HIP_CHECK(hipDeviceSynchronize()); - // Validating the result - REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(E_d)); - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamDestroy(stream)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, false); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236 + SECTION("Device to Host") { + Memcpy2DDeviceToHostShell( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, stream), stream); + } +#endif + SECTION("Device to Device") { + SECTION("Peer access disabled") { + Memcpy2DDeviceToDeviceShell( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, stream), stream); } - } else { - SUCCEED("skipping the testcases as numDevices < 2"); - } -} - -/* - * This testcase verifies D2D functionality of hipMemcpyParam2DAsync API - * Input: Intializing "A_d" device variable with "C_h" host variable - * Output: "A_d" device variable to "E_d" device variable - * - * Validating the result by copying "E_d" to "A_h" and checking - * it with the initalized data "C_h". - * - */ -TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-D2D", "[hipMemcpyParam2DAsync]", char, - int, float, double, long double) { - CHECK_IMAGE_SUPPORT - - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - // Allocating and Initializing the data - HIP_CHECK(hipSetDevice(0)); - TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); - - int peerAccess = 0; - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - if (!peerAccess) { - SUCCEED("Skipped the test as there is no peer access"); - } else { - HIP_CHECK(hipSetDevice(1)); - TestType *E_d; - size_t pitch_E; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&E_d), - &pitch_E, width, NUM_H)); - - // Initializing A_d with C_h - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width, - NUM_W*sizeof(TestType), NUM_H, hipMemcpyHostToDevice)); - - // Device to Device - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = E_d; - desc.dstDevice = hipDeviceptr_t(E_d); - desc.dstPitch = pitch_E; - desc.WidthInBytes = NUM_W*sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Copying the result E_d to A_h host variable - HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E, - NUM_W*sizeof(TestType), NUM_H, hipMemcpyDeviceToHost)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamDestroy(stream)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, false); + SECTION("Peer access enabled") { + Memcpy2DDeviceToDeviceShell( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, stream), stream); } - } else { - SUCCEED("skipping the testcases as numDevices < 2"); + } + SECTION("Host to Device") { + Memcpy2DHostToDeviceShell( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, stream), stream); + } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236 + SECTION("Host to Host") { + Memcpy2DHostToHostShell( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, stream), stream); + } +#endif +} + +TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Synchronization_Behavior") { + using namespace std::placeholders; + + constexpr bool async = true; + + HIP_CHECK(hipDeviceSynchronize()); + + SECTION("Host to Device") { + Memcpy2DHtoDSyncBehavior( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, nullptr), false); + } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233 + SECTION("Device to Pageable Host") { + Memcpy2DDtoHPageableSyncBehavior( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, nullptr), true); + } +#endif +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-236 + SECTION("Device to Pinned Host") { + Memcpy2DDtoHPinnedSyncBehavior( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, nullptr), false); + } +#endif + SECTION("Device to Device") { + Memcpy2DDtoDSyncBehavior( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, nullptr), false); + } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-233 + SECTION("Host to Host") { + Memcpy2DHtoHSyncBehavior( + std::bind(MemcpyParam2DAdapter(), _1, _2, _3, _4, _5, _6, _7, nullptr), true); + } +#endif +} + +TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Parameters") { + constexpr bool async = true; + Memcpy2DZeroWidthHeight(MemcpyParam2DAdapter()); +} + +TEST_CASE("Unit_hipMemcpyParam2DAsync_Positive_Array") { + constexpr bool async = true; + SECTION("Array from/to Host") { + MemcpyParam2DArrayHostShell(MemcpyParam2DAdapter()); + } + SECTION("Array from/to Device") { + MemcpyParam2DArrayDeviceShell(MemcpyParam2DAdapter()); } } -/* - * This testcase verifies H2D & D2H functionality of hipMemcpyParam2DAsync API - * H2D case: - * Input: "C_h" host variable initialized with default data - * Output: "A_d" device variable - * - * D2H case: - * Input: "A_d" device variable from the previous output - * OutPut: "A_h" variable - * - * Validating the result by comparing "A_h" to "C_h" - */ -TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-H2D-D2H", "[hipMemcpyParam2DAsync]", - char, int, float, double, long double) { - CHECK_IMAGE_SUPPORT +TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative_Parameters") { + constexpr bool async = true; - // 1 refers to pinned host memory and 0 refers - // to unpinned memory - auto memory_type = GENERATE(0, 1); - int numDevices = 0; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - // Allocating and Initializing the data - HIP_CHECK(hipSetDevice(0)); - TestType* A_h{nullptr}, *C_h{nullptr}, - *A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(TestType)}; - hipStream_t stream; + constexpr size_t cols = 128; + constexpr size_t rows = 128; - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - - // Based on memory type (pinned/unpinned) allocating memory - if (memory_type) { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, true); - } else { - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); + constexpr auto NegativeTests = [](void* dst, size_t dpitch, void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + SECTION("dst == nullptr") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter()(static_cast(nullptr), dpitch, src, + spitch, width, height, kind), + hipErrorInvalidValue); } - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); - int peerAccess = 0; - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - if (!peerAccess) { - SUCCEED("Skipped the test as there is no peer access"); - } else { - // Host to Device - hip_Memcpy2D desc = {}; - HIP_CHECK(hipStreamCreate(&stream)); - desc.srcMemoryType = hipMemoryTypeHost; - desc.srcHost = C_h; - desc.srcDevice = hipDeviceptr_t(C_h); - desc.srcPitch = width; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = A_d; - desc.dstDevice = hipDeviceptr_t(A_d); - desc.dstPitch = pitch_A; - desc.WidthInBytes = NUM_W*sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Device to Host - memset(&desc, 0x0, sizeof(hip_Memcpy2D)); - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeHost; - desc.dstHost = A_h; - desc.dstDevice = hipDeviceptr_t(A_h); - desc.dstPitch = width; - desc.WidthInBytes = NUM_W*sizeof(TestType); - desc.Height = NUM_H; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); - - // Validating the result - REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamDestroy(stream)); - if (memory_type) { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, true); - } else { - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, nullptr, C_h, false); - } + SECTION("src == nullptr") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter()(dst, dpitch, static_cast(nullptr), + spitch, width, height, kind), + hipErrorInvalidValue); } - } else { - SUCCEED("skipping the testcases as numDevices < 2"); - } -} -/* - * This testcase verifies the extent validation scenarios - */ -TEST_CASE("Unit_hipMemcpyParam2DAsync_ExtentValidation") { - CHECK_IMAGE_SUPPORT + SECTION("dstPitch < WidthInBytes") { + HIP_CHECK_ERROR( + MemcpyParam2DAdapter()(dst, width - 1, src, spitch, width, height, kind), + hipErrorInvalidValue); + } + SECTION("srcPitch < WidthInBytes") { + HIP_CHECK_ERROR( + MemcpyParam2DAdapter()(dst, dpitch, src, width - 1, width, height, kind), + hipErrorInvalidValue); + } + SECTION("dpitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(MemcpyParam2DAdapter()(dst, static_cast(attr) + 1, src, spitch, + width, height, kind), + hipErrorInvalidValue); + } + SECTION("spitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + HIP_CHECK_ERROR(MemcpyParam2DAdapter()(dst, dpitch, src, static_cast(attr) + 1, + width, height, kind), + hipErrorInvalidValue); + } +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-237 + SECTION("WidthInBytes + srcXInBytes > srcPitch") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter(make_hipExtent(spitch - width + 1, 0, 0))( + dst, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } + SECTION("WidthInBytes + dstXInBytes > dstPitch") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter(make_hipExtent(0, 0, 0), + make_hipExtent(dpitch - width + 1, 0, 0))( + dst, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } + SECTION("srcY out of bounds") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter(make_hipExtent(0, 1, 0))(dst, dpitch, src, spitch, + width, height, kind), + hipErrorInvalidValue); + } + SECTION("dstY out of bounds") { + HIP_CHECK_ERROR(MemcpyParam2DAdapter(make_hipExtent(0, 0, 0), make_hipExtent(0, 1, 0))( + dst, dpitch, src, spitch, width, height, kind), + hipErrorInvalidValue); + } +#endif +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-235 + SECTION("Invalid stream") { + StreamGuard stream_guard(Streams::created); + HIP_CHECK(hipStreamDestroy(stream_guard.stream())); + HIP_CHECK_ERROR(MemcpyParam2DAdapter()(dst, dpitch, src, spitch, width, height, kind, + stream_guard.stream()), + hipErrorContextIsDestroyed); + } +#endif + }; - HIP_CHECK(hipSetDevice(0)); - char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, - * A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(char)}; - constexpr auto memsetval{100}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating and Initializing the data - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, nullptr, &C_h, - width*NUM_H, false); - HipTest::initArrays(nullptr, nullptr, nullptr, - &B_h, nullptr, nullptr, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); - HipTest::setDefaultData(NUM_W*NUM_H, B_h, nullptr, nullptr); - HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - - // Device to Host - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeHost; - desc.dstHost = A_h; - desc.dstDevice = hipDeviceptr_t(A_h); - desc.dstPitch = width; - desc.WidthInBytes = NUM_W; - desc.Height = NUM_H; - - SECTION("Destination Pitch is 0") { - desc.dstPitch = 0; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + SECTION("Host to device") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(device_alloc.ptr(), device_alloc.pitch(), host_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyHostToDevice); } - SECTION("Source Pitch is 0") { - desc.srcPitch = 0; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + SECTION("Device to host") { + LinearAllocGuard2D device_alloc(cols, rows); + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, device_alloc.pitch() * rows); + NegativeTests(host_alloc.ptr(), device_alloc.pitch(), device_alloc.ptr(), device_alloc.pitch(), + device_alloc.width(), device_alloc.height(), hipMemcpyDeviceToHost); } - SECTION("Height is 0") { - desc.Height = 0; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); - REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + SECTION("Host to host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, cols * rows * sizeof(int)); + NegativeTests(dst_alloc.ptr(), cols * sizeof(int), src_alloc.ptr(), cols * sizeof(int), + cols * sizeof(int), rows, hipMemcpyHostToHost); } - SECTION("Width is 0") { - desc.Height = 0; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); - HIP_CHECK(hipStreamSynchronize(stream)); - REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + SECTION("Device to device") { + LinearAllocGuard2D src_alloc(cols, rows); + LinearAllocGuard2D dst_alloc(cols, rows); + NegativeTests(dst_alloc.ptr(), dst_alloc.pitch(), src_alloc.ptr(), src_alloc.pitch(), + dst_alloc.width(), dst_alloc.height(), hipMemcpyDeviceToDevice); } - - // DeAllocating the Memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamDestroy(stream)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); -} - -/* - * This testcase verifies the negative scenarios - */ -TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative") { - CHECK_IMAGE_SUPPORT - - HIP_CHECK(hipSetDevice(0)); - float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, - * A_d{nullptr}; - size_t pitch_A; - size_t width{NUM_W * sizeof(float)}; - constexpr auto memsetval{100}; - hipStream_t stream; - HIP_CHECK(hipStreamCreate(&stream)); - - // Allocating and Initializing the data - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), - &pitch_A, width, NUM_H)); - HipTest::initArrays(nullptr, nullptr, nullptr, - &A_h, &B_h, &C_h, - width*NUM_H, false); - HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); - HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); - - // Device to Host - hip_Memcpy2D desc = {}; - desc.srcMemoryType = hipMemoryTypeDevice; - desc.srcHost = A_d; - desc.srcDevice = hipDeviceptr_t(A_d); - desc.srcPitch = pitch_A; - desc.dstMemoryType = hipMemoryTypeHost; - desc.dstHost = A_h; - desc.dstDevice = hipDeviceptr_t(A_h); - desc.dstPitch = width; - desc.WidthInBytes = NUM_W; - desc.Height = NUM_H; - - SECTION("Null Pointer to Source Device Pointer") { - desc.srcDevice = hipDeviceptr_t(nullptr); - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); - } - - SECTION("Null Pointer to Destination Device Pointer") { - memset(&desc, 0x0, sizeof(hip_Memcpy2D)); - desc.srcMemoryType = hipMemoryTypeHost; - desc.srcHost = A_h; - desc.srcDevice = hipDeviceptr_t(A_h); - desc.srcPitch = width; - desc.dstMemoryType = hipMemoryTypeDevice; - desc.dstHost = A_d; - desc.dstDevice = hipDeviceptr_t(nullptr); - desc.dstPitch = pitch_A; - desc.WidthInBytes = NUM_W; - desc.Height = NUM_H; - - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); - } - - SECTION("Null Pointer to both Src & Dst Device Pointer") { - desc.srcDevice = hipDeviceptr_t(nullptr); - desc.dstDevice = hipDeviceptr_t(nullptr); - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); - } - - SECTION("Width > src/dest pitches") { - desc.WidthInBytes = pitch_A+1; - REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); - } - - // DeAllocating the memory - HIP_CHECK(hipFree(A_d)); - HIP_CHECK(hipStreamSynchronize(stream)); - HIP_CHECK(hipStreamDestroy(stream)); - HipTest::freeArrays(nullptr, nullptr, nullptr, - A_h, B_h, C_h, false); -} +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync_old.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync_old.cc new file mode 100644 index 0000000000..763ef9185d --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2DAsync_old.cc @@ -0,0 +1,441 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testfile verifies the following scenarios of hipMemcpyParam2DAsync API +1. Negative Scenarios +2. Extent Validation Scenarios +3. D2D copy for different datatypes +4. H2D and D2H copy for different datatypes +5. Device context change scenario where memory allocated in one GPU + stream created in another GPU +*/ + +#include +#include + +static constexpr size_t NUM_W{10}; +static constexpr size_t NUM_H{10}; +/* + * This testcase verifies D2D functionality of hipMemcpyParam2DAsync API + * Where Memory is allocated in GPU-0 and stream is created in GPU-1 + * + * Input: Intializing "A_d" device variable with "C_h" host variable + * Output: "A_d" device variable to "E_d" device variable + * + * Validating the result by copying "E_d" to "A_h" and checking + * it with the initalized data "C_h". + * + */ +TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice", + "[hipMemcpyParam2DAsync]", char, float, int, double, long double) { + CHECK_IMAGE_SUPPORT + + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + // Allocating and Initializing the data + HIP_CHECK(hipSetDevice(0)); + TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + int peerAccess = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + if (!peerAccess) { + SUCCEED("Skipped the test as there is no peer access"); + } else { + TestType *E_d{nullptr}; + size_t pitch_E; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&E_d), + &pitch_E, width, NUM_H)); + + // Initalizing A_d with C_h + HIP_CHECK(hipSetDevice(1)); + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width, + NUM_W*sizeof(TestType), NUM_H, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Device to Device + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = E_d; + desc.dstDevice = hipDeviceptr_t(E_d); + desc.dstPitch = pitch_E; + desc.WidthInBytes = NUM_W*sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Copying the result E_d to A_h host variable + HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E, + NUM_W*sizeof(TestType), NUM_H, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + // Validating the result + REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(E_d)); + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamDestroy(stream)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, false); + } + } else { + SUCCEED("skipping the testcases as numDevices < 2"); + } +} + +/* + * This testcase verifies D2D functionality of hipMemcpyParam2DAsync API + * Input: Intializing "A_d" device variable with "C_h" host variable + * Output: "A_d" device variable to "E_d" device variable + * + * Validating the result by copying "E_d" to "A_h" and checking + * it with the initalized data "C_h". + * + */ +TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-D2D", "[hipMemcpyParam2DAsync]", char, + int, float, double, long double) { + CHECK_IMAGE_SUPPORT + + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + // Allocating and Initializing the data + HIP_CHECK(hipSetDevice(0)); + TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + + int peerAccess = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + if (!peerAccess) { + SUCCEED("Skipped the test as there is no peer access"); + } else { + HIP_CHECK(hipSetDevice(1)); + TestType *E_d; + size_t pitch_E; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&E_d), + &pitch_E, width, NUM_H)); + + // Initializing A_d with C_h + HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width, + NUM_W*sizeof(TestType), NUM_H, hipMemcpyHostToDevice)); + + // Device to Device + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = E_d; + desc.dstDevice = hipDeviceptr_t(E_d); + desc.dstPitch = pitch_E; + desc.WidthInBytes = NUM_W*sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Copying the result E_d to A_h host variable + HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E, + NUM_W*sizeof(TestType), NUM_H, hipMemcpyDeviceToHost)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamDestroy(stream)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, false); + } + } else { + SUCCEED("skipping the testcases as numDevices < 2"); + } +} + +/* + * This testcase verifies H2D & D2H functionality of hipMemcpyParam2DAsync API + * H2D case: + * Input: "C_h" host variable initialized with default data + * Output: "A_d" device variable + * + * D2H case: + * Input: "A_d" device variable from the previous output + * OutPut: "A_h" variable + * + * Validating the result by comparing "A_h" to "C_h" + */ +TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-H2D-D2H", "[hipMemcpyParam2DAsync]", + char, int, float, double, long double) { + CHECK_IMAGE_SUPPORT + + // 1 refers to pinned host memory and 0 refers + // to unpinned memory + auto memory_type = GENERATE(0, 1); + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + // Allocating and Initializing the data + HIP_CHECK(hipSetDevice(0)); + TestType* A_h{nullptr}, *C_h{nullptr}, + *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + hipStream_t stream; + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Based on memory type (pinned/unpinned) allocating memory + if (memory_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + } + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + int peerAccess = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + if (!peerAccess) { + SUCCEED("Skipped the test as there is no peer access"); + } else { + // Host to Device + hip_Memcpy2D desc = {}; + HIP_CHECK(hipStreamCreate(&stream)); + desc.srcMemoryType = hipMemoryTypeHost; + desc.srcHost = C_h; + desc.srcDevice = hipDeviceptr_t(C_h); + desc.srcPitch = width; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = A_d; + desc.dstDevice = hipDeviceptr_t(A_d); + desc.dstPitch = pitch_A; + desc.WidthInBytes = NUM_W*sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Device to Host + memset(&desc, 0x0, sizeof(hip_Memcpy2D)); + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeHost; + desc.dstHost = A_h; + desc.dstDevice = hipDeviceptr_t(A_h); + desc.dstPitch = width; + desc.WidthInBytes = NUM_W*sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamDestroy(stream)); + if (memory_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, false); + } + } + } else { + SUCCEED("skipping the testcases as numDevices < 2"); + } +} +/* + * This testcase verifies the extent validation scenarios + */ +TEST_CASE("Unit_hipMemcpyParam2DAsync_ExtentValidation") { + CHECK_IMAGE_SUPPORT + + HIP_CHECK(hipSetDevice(0)); + char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, + * A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(char)}; + constexpr auto memsetval{100}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating and Initializing the data + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + HipTest::initArrays(nullptr, nullptr, nullptr, + &B_h, nullptr, nullptr, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + HipTest::setDefaultData(NUM_W*NUM_H, B_h, nullptr, nullptr); + HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + + // Device to Host + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeHost; + desc.dstHost = A_h; + desc.dstDevice = hipDeviceptr_t(A_h); + desc.dstPitch = width; + desc.WidthInBytes = NUM_W; + desc.Height = NUM_H; + + SECTION("Destination Pitch is 0") { + desc.dstPitch = 0; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + } + + SECTION("Source Pitch is 0") { + desc.srcPitch = 0; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + } + + SECTION("Height is 0") { + desc.Height = 0; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + } + + SECTION("Width is 0") { + desc.Height = 0; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) == hipSuccess); + HIP_CHECK(hipStreamSynchronize(stream)); + REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + } + + // DeAllocating the Memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamDestroy(stream)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); +} + +/* + * This testcase verifies the negative scenarios + */ +TEST_CASE("Unit_hipMemcpyParam2DAsync_Negative") { + CHECK_IMAGE_SUPPORT + + HIP_CHECK(hipSetDevice(0)); + float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, + * A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(float)}; + constexpr auto memsetval{100}; + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + + // Allocating and Initializing the data + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + + // Device to Host + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeHost; + desc.dstHost = A_h; + desc.dstDevice = hipDeviceptr_t(A_h); + desc.dstPitch = width; + desc.WidthInBytes = NUM_W; + desc.Height = NUM_H; + + SECTION("Null Pointer to Source Device Pointer") { + desc.srcDevice = hipDeviceptr_t(nullptr); + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); + } + + SECTION("Null Pointer to Destination Device Pointer") { + memset(&desc, 0x0, sizeof(hip_Memcpy2D)); + desc.srcMemoryType = hipMemoryTypeHost; + desc.srcHost = A_h; + desc.srcDevice = hipDeviceptr_t(A_h); + desc.srcPitch = width; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = A_d; + desc.dstDevice = hipDeviceptr_t(nullptr); + desc.dstPitch = pitch_A; + desc.WidthInBytes = NUM_W; + desc.Height = NUM_H; + + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); + } + + SECTION("Null Pointer to both Src & Dst Device Pointer") { + desc.srcDevice = hipDeviceptr_t(nullptr); + desc.dstDevice = hipDeviceptr_t(nullptr); + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); + } + + SECTION("Width > src/dest pitches") { + desc.WidthInBytes = pitch_A+1; + REQUIRE(hipMemcpyParam2DAsync(&desc, stream) != hipSuccess); + } + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); +} diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D_old.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D_old.cc new file mode 100644 index 0000000000..3a35a1a16e --- /dev/null +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyParam2D_old.cc @@ -0,0 +1,337 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testfile verifies the following scenarios of hipMemcpyParam2D API +1. Negative Scenarios +2. Extent Validation Scenarios +3. D2D copy for different datatypes +4. H2D and D2H copy for different datatypes +*/ + +#include +#include + +static constexpr size_t NUM_W{10}; +static constexpr size_t NUM_H{10}; +/* + * This testcase verifies D2D functionality of hipMemcpyParam2D API + * Input: Intializing "A_d" device variable with "C_h" host variable + * Output: "A_d" device variable to "E_d" device variable + * + * Validating the result by copying "E_d" to "A_h" and checking + * it with the initalized data "C_h". + * + */ +TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-D2D", "[hipMemcpyParam2D]", char, float, int, + double, long double) { + CHECK_IMAGE_SUPPORT + + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + // Initialize and Allocating Memory + HIP_CHECK(hipSetDevice(0)); + TestType* A_h{nullptr}, *C_h{nullptr}, *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + + int peerAccess = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + if (!peerAccess) { + SUCCEED("Skipped the test as there is no peer access"); + } else { + HIP_CHECK(hipSetDevice(1)); + char *E_d; + size_t pitch_E; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&E_d), + &pitch_E, width, NUM_H)); + + // Initalizing A_d with C_h + HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width, + NUM_W * sizeof(TestType), NUM_H, hipMemcpyHostToDevice)); + + // Device to Device + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = E_d; + desc.dstDevice = hipDeviceptr_t(E_d); + desc.dstPitch = pitch_E; + desc.WidthInBytes = NUM_W * sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + + // Copying E_d to A_h + HIP_CHECK(hipMemcpy2D(A_h, width, E_d, pitch_E, + NUM_W * sizeof(TestType), NUM_H, + hipMemcpyDeviceToHost)); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); + + // DeAllocating the memory + HIP_CHECK(hipFree(A_d)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, false); + } + } else { + SUCCEED("skipping the testcases as numDevices < 2"); + } +} + +/* + * This testcase verifies H2D & D2H functionality of hipMemcpyParam2D API + * H2D case: + * Input: "C_h" host variable initialized with default data + * Output: "A_d" device variable + * + * D2H case: + * Input: "A_d" device variable from the previous output + * OutPut: "A_h" variable + * + * Validating the result by comparing "A_h" to "C_h" + */ +TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2D_multiDevice-H2D-D2H", "[hipMemcpyParam2D]", char, float, + int, double, long double) { + CHECK_IMAGE_SUPPORT + + // 1 refers to pinned host memory and 0 refers + // to unpinned memory + auto memory_type = GENERATE(0, 1); + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + HIP_CHECK(hipSetDevice(0)); + + // Initialize and Allocating Memory + TestType* A_h{nullptr}, *C_h{nullptr}, + *A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(TestType)}; + + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + + // Based on memory type (pinned/unpinned) allocating memory + if (memory_type) { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, true); + } else { + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + } + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + int peerAccess = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + if (!peerAccess) { + SUCCEED("Skipped the test as there is no peer access"); + } else { + // Host to Device + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeHost; + desc.srcHost = C_h; + desc.srcDevice = hipDeviceptr_t(C_h); + desc.srcPitch = width; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = A_d; + desc.dstDevice = hipDeviceptr_t(A_d); + desc.dstPitch = pitch_A; + desc.WidthInBytes = NUM_W*sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + + // Device to Host + memset(&desc, 0x0, sizeof(hip_Memcpy2D)); + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeHost; + desc.dstHost = A_h; + desc.dstDevice = hipDeviceptr_t(A_h); + desc.dstPitch = width; + desc.WidthInBytes = NUM_W*sizeof(TestType); + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + + // Validating the result + REQUIRE(HipTest::checkArray(A_h, C_h, NUM_W, NUM_H) == true); + + // DeAllocating the Memory + HIP_CHECK(hipFree(A_d)); + if (memory_type) { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, true); + } else { + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, nullptr, C_h, false); + } + } + } else { + SUCCEED("skipping the testcases as numDevices < 2"); + } +} +/* + * This testcase verifies the extent validation scenarios + */ +TEST_CASE("Unit_hipMemcpyParam2D_ExtentValidation") { + CHECK_IMAGE_SUPPORT + + // Allocating memory and Initializing the data + HIP_CHECK(hipSetDevice(0)); + char* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, + * A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(char)}; + constexpr auto memsetval{100}; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, + width*NUM_H, false); + HipTest::initArrays(nullptr, nullptr, nullptr, + &B_h, nullptr, nullptr, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, nullptr, C_h); + HipTest::setDefaultData(NUM_W*NUM_H, B_h, nullptr, nullptr); + HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + + // Device to Host + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeHost; + desc.dstHost = A_h; + desc.dstDevice = hipDeviceptr_t(A_h); + desc.dstPitch = width; + desc.WidthInBytes = NUM_W; + desc.Height = NUM_H; + + SECTION("Destination Pitch is 0") { + desc.dstPitch = 0; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + } + + SECTION("Source Pitch is 0") { + desc.srcPitch = 0; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + } + + SECTION("Height is 0") { + desc.Height = 0; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + } + + SECTION("Width is 0") { + desc.WidthInBytes = 0; + REQUIRE(hipMemcpyParam2D(&desc) == hipSuccess); + REQUIRE(HipTest::checkArray(A_h, B_h, NUM_W, NUM_H) == true); + } + + // DeAllocating the Memory + HIP_CHECK(hipFree(A_d)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); +} + +/* + * This testcase verifies the negative scenarios + */ +TEST_CASE("Unit_hipMemcpyParam2D_Negative") { + CHECK_IMAGE_SUPPORT + + HIP_CHECK(hipSetDevice(0)); + + // Allocating and Initializing the data + float* A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}, + * A_d{nullptr}; + size_t pitch_A; + size_t width{NUM_W * sizeof(float)}; + constexpr auto memsetval{100}; + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), + &pitch_A, width, NUM_H)); + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, &B_h, &C_h, + width*NUM_H, false); + HipTest::setDefaultData(NUM_W*NUM_H, A_h, B_h, C_h); + HIP_CHECK(hipMemset2D(A_d, pitch_A, memsetval, NUM_W, NUM_H)); + + hip_Memcpy2D desc = {}; + desc.srcMemoryType = hipMemoryTypeDevice; + desc.srcHost = A_d; + desc.srcDevice = hipDeviceptr_t(A_d); + desc.srcPitch = pitch_A; + desc.dstMemoryType = hipMemoryTypeHost; + desc.dstHost = A_h; + desc.dstDevice = hipDeviceptr_t(A_h); + desc.dstPitch = width; + desc.WidthInBytes = NUM_W; + desc.Height = NUM_H; + + SECTION("Null Pointer to Source Device Pointer") { + desc.srcDevice = hipDeviceptr_t(nullptr); + REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); + } + + SECTION("Null Pointer to Destination Device Pointer") { + memset(&desc, 0x0, sizeof(hip_Memcpy2D)); + desc.srcMemoryType = hipMemoryTypeHost; + desc.srcHost = A_h; + desc.srcDevice = hipDeviceptr_t(A_h); + desc.srcPitch = width; + desc.dstMemoryType = hipMemoryTypeDevice; + desc.dstHost = A_d; + desc.dstDevice = hipDeviceptr_t(nullptr); + desc.dstPitch = pitch_A; + desc.WidthInBytes = NUM_W; + desc.Height = NUM_H; + REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); + } + + SECTION("Null Pointer to both Src & Dst Device Pointer") { + desc.srcDevice = hipDeviceptr_t(nullptr); + desc.dstDevice = hipDeviceptr_t(nullptr); + REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); + } + + SECTION("Width > src/dest pitches") { + desc.WidthInBytes = pitch_A+1; + REQUIRE(hipMemcpyParam2D(&desc) != hipSuccess); + } + + // DeAllocating the Memory + HIP_CHECK(hipFree(A_d)); + HipTest::freeArrays(nullptr, nullptr, nullptr, + A_h, B_h, C_h, false); +} diff --git a/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh b/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh index 990dd9dcdc..9f489a2e23 100644 --- a/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh +++ b/projects/hip-tests/catch/unit/memory/memcpy2d_tests_common.hh @@ -22,10 +22,13 @@ THE SOFTWARE. #pragma once +#include + #include #include #include #include +#include template void Memcpy2DDeviceToHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { @@ -322,4 +325,212 @@ void Memcpy2DZeroWidthHeight(F memcpy_func, const hipStream_t stream = nullptr) } ArrayFindIfNot(dst_alloc.ptr(), static_cast(42), alloc_size); } +} + +constexpr auto MemTypeHost() { +#if HT_AMD + return hipMemoryTypeHost; +#else + return CU_MEMORYTYPE_HOST; +#endif +} + +constexpr auto MemTypeDevice() { +#if HT_AMD + return hipMemoryTypeDevice; +#else + return CU_MEMORYTYPE_DEVICE; +#endif +} + +constexpr auto MemTypeArray() { +#if HT_AMD + return hipMemoryTypeArray; +#else + return CU_MEMORYTYPE_ARRAY; +#endif +} + +constexpr auto MemTypeUnified() { +#if HT_AMD + return hipMemoryTypeUnified; +#else + return CU_MEMORYTYPE_UNIFIED; +#endif +} + +using PtrVariant = std::variant; + +template +constexpr auto MemcpyParam2DAdapter(const hipExtent src_offset = {0, 0, 0}, + const hipExtent dst_offset = {0, 0, 0}) { + return [=](PtrVariant dst, size_t dpitch, PtrVariant src, size_t spitch, size_t width, + size_t height, hipMemcpyKind kind, hipStream_t stream = nullptr) { + hip_Memcpy2D parms = {}; + + if (std::holds_alternative(dst)) { + parms.dstMemoryType = MemTypeArray(); + parms.dstArray = std::get(dst); + } else { + parms.dstPitch = dpitch; + auto ptr = std::get(dst); + switch (kind) { + case hipMemcpyDeviceToHost: + case hipMemcpyHostToHost: + parms.dstMemoryType = MemTypeHost(); + parms.dstHost = ptr; + break; + case hipMemcpyDeviceToDevice: + case hipMemcpyHostToDevice: + parms.dstMemoryType = MemTypeDevice(); + parms.dstDevice = reinterpret_cast(ptr); + break; + case hipMemcpyDefault: + parms.dstMemoryType = MemTypeUnified(); + parms.dstDevice = reinterpret_cast(ptr); + break; + default: + assert(false); + } + } + + if (std::holds_alternative(src)) { + parms.srcMemoryType = MemTypeArray(); + parms.srcArray = std::get(src); + } else { + parms.srcPitch = spitch; + auto ptr = std::get(src); + switch (kind) { + case hipMemcpyDeviceToHost: + case hipMemcpyDeviceToDevice: + parms.srcMemoryType = MemTypeDevice(); + parms.srcDevice = reinterpret_cast(ptr); + break; + case hipMemcpyHostToDevice: + case hipMemcpyHostToHost: + parms.srcMemoryType = MemTypeHost(); + parms.srcHost = ptr; + break; + case hipMemcpyDefault: + parms.srcMemoryType = MemTypeUnified(); + parms.srcDevice = reinterpret_cast(ptr); + break; + default: + assert(false); + } + } + + parms.WidthInBytes = width; + parms.Height = height; + parms.srcXInBytes = src_offset.width; + parms.srcY = src_offset.height; + parms.dstXInBytes = dst_offset.width; + parms.dstY = dst_offset.height; + + if constexpr (async) { + return hipMemcpyParam2DAsync(&parms, stream); + } else { + return hipMemcpyParam2D(&parms); + } + }; +} + +template +void MemcpyParam2DArrayHostShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + constexpr hipExtent extent{127 * sizeof(int), 128, 1}; + + LinearAllocGuard src_host(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_host(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + + DrvArrayAllocGuard src_array(extent); + DrvArrayAllocGuard dst_array(extent); + + const auto f = [](size_t x, size_t y, size_t z) { + constexpr auto width_logical = extent.width / sizeof(int); + return z * width_logical * extent.height + y * width_logical + x; + }; + PitchedMemorySet(src_host.ptr(), extent.width, extent.width / sizeof(int), extent.height, + extent.depth, f); + + // Host -> Array + HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_host.ptr(), extent.width, extent.width, + extent.height, hipMemcpyHostToDevice, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + // Array -> Array + HIP_CHECK(memcpy_func(dst_array.ptr(), 0, src_array.ptr(), 0, extent.width, extent.height, + hipMemcpyDeviceToDevice, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + // Array -> Host + HIP_CHECK(memcpy_func(dst_host.ptr(), extent.width, dst_array.ptr(), 0, extent.width, + extent.height, hipMemcpyDeviceToHost, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + PitchedMemoryVerify(dst_host.ptr(), extent.width, extent.width / sizeof(int), extent.height, + extent.depth, f); +} + +template +void MemcpyParam2DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream = nullptr) { + constexpr hipExtent extent{127 * sizeof(int), 128, 1}; + + LinearAllocGuard host_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + + DrvArrayAllocGuard src_array(extent); + DrvArrayAllocGuard dst_array(extent); + + LinearAllocGuard3D src_device(extent); + LinearAllocGuard3D dst_device(extent); + + const dim3 threads_per_block(32, 32); + const dim3 blocks(src_device.width_logical() / threads_per_block.x + 1, + src_device.height() / threads_per_block.y + 1, src_device.depth()); + Iota<<>>(src_device.ptr(), src_device.pitch(), + src_device.width_logical(), src_device.height(), + src_device.depth()); + HIP_CHECK(hipGetLastError()); + + // Device -> Array + HIP_CHECK(memcpy_func(src_array.ptr(), 0, src_device.ptr(), src_device.pitch(), extent.width, + extent.height, hipMemcpyDeviceToDevice, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + // Array -> Array + HIP_CHECK(memcpy_func(dst_array.ptr(), 0, src_array.ptr(), 0, extent.width, extent.height, + hipMemcpyDeviceToDevice, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + // Array -> Device + HIP_CHECK(memcpy_func(dst_device.ptr(), dst_device.pitch(), dst_array.ptr(), 0, extent.width, + extent.height, hipMemcpyDeviceToDevice, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + HIP_CHECK(memcpy_func(host_alloc.ptr(), extent.width, dst_device.ptr(), dst_device.pitch(), + extent.width, extent.height, hipMemcpyDeviceToHost, kernel_stream)); + if constexpr (should_synchronize) { + HIP_CHECK(hipStreamSynchronize(kernel_stream)); + } + + const auto f = [](size_t x, size_t y, size_t z) { + constexpr auto width_logical = extent.width / sizeof(int); + return z * width_logical * extent.height + y * width_logical + x; + }; + PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height, + extent.depth, f); } \ No newline at end of file From 4194470cd67f2aa130c46124783c21c062722e46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:34:33 +0100 Subject: [PATCH 10/19] EXSWHTEC-217 - Implement new and update existing tests for the hipGraph*MemcpyNode family of APIs #48 Change-Id: Iae7ac9c855ba6e3288257e99e49f8b16cebb1bac [ROCm/hip-tests commit: cefbaed5cf253ff47d4df5a6c673395483615e25] --- .../catch/hipTestMain/config/config_amd_linux | 1 + .../hipTestMain/config/config_amd_windows | 1 + .../catch/include/memcpy1d_tests_common.hh | 6 +- .../catch/include/memcpy3d_tests_common.hh | 1 + .../hip-tests/catch/unit/graph/CMakeLists.txt | 4 + .../catch/unit/graph/hipGraphAddMemcpyNode.cc | 775 ++++++------------ .../unit/graph/hipGraphAddMemcpyNode_old.cc | 576 +++++++++++++ .../graph/hipGraphExecMemcpyNodeSetParams.cc | 470 +++++------ .../hipGraphExecMemcpyNodeSetParams_old.cc | 263 ++++++ .../unit/graph/hipGraphMemcpyNodeGetParams.cc | 264 ++---- .../graph/hipGraphMemcpyNodeGetParams_old.cc | 236 ++++++ .../unit/graph/hipGraphMemcpyNodeSetParams.cc | 432 +++++----- .../graph/hipGraphMemcpyNodeSetParams_old.cc | 219 +++++ 13 files changed, 2090 insertions(+), 1158 deletions(-) create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode_old.cc create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams_old.cc create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams_old.cc create mode 100644 projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams_old.cc diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index d24f21a772..8d89762cf3 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -127,6 +127,7 @@ "Unit_deviceAllocation_InOneThread_AccessInAllThreads", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", + "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipMemAddressReserve_Negative", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 9a780a8730..623b25dfba 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -219,6 +219,7 @@ "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipMemAddressReserve_AlignmentTest", + "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemCreate_ChkWithKerLaunch", "Unit_hipMemCreate_MapNonContiguousChunks", "Unit_hipMemMap_MapPartialPhysicalMem", diff --git a/projects/hip-tests/catch/include/memcpy1d_tests_common.hh b/projects/hip-tests/catch/include/memcpy1d_tests_common.hh index d357d992cb..ec8f991fa7 100644 --- a/projects/hip-tests/catch/include/memcpy1d_tests_common.hh +++ b/projects/hip-tests/catch/include/memcpy1d_tests_common.hh @@ -141,6 +141,7 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = int can_access_peer = 0; HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); if (!can_access_peer) { + INFO("Peer access cannot be enabled between devices " << src_device << " " << dst_device); return; } HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); @@ -168,8 +169,8 @@ void MemcpyDeviceToDeviceShell(F memcpy_func, const hipStream_t kernel_stream = HIP_CHECK( hipMemcpy(result.host_ptr(), dst_allocation.ptr(), allocation_size, hipMemcpyDeviceToHost)); if constexpr (enable_peer_access) { - // If we've gotten this far, EnablePeerAccess must have succeeded, so we only need to check this - // condition + // If we've gotten this far, EnablePeerAccess must have succeeded, so we + // only need to check this condition HIP_CHECK(hipDeviceDisablePeerAccess(dst_device)); } @@ -237,7 +238,6 @@ void MemcpySyncBehaviorCheck(F memcpy_func, const bool should_sync, LaunchDelayKernel(std::chrono::milliseconds{100}, kernel_stream); HIP_CHECK(memcpy_func()); if (should_sync) { - HIP_CHECK(hipStreamSynchronize(kernel_stream)); HIP_CHECK(hipStreamQuery(kernel_stream)); } else { HIP_CHECK_ERROR(hipStreamQuery(kernel_stream), hipErrorNotReady); diff --git a/projects/hip-tests/catch/include/memcpy3d_tests_common.hh b/projects/hip-tests/catch/include/memcpy3d_tests_common.hh index 4978a264ac..84d0fc517b 100644 --- a/projects/hip-tests/catch/include/memcpy3d_tests_common.hh +++ b/projects/hip-tests/catch/include/memcpy3d_tests_common.hh @@ -23,6 +23,7 @@ THE SOFTWARE. #pragma once #pragma clang diagnostic ignored "-Wmissing-field-initializers" #pragma clang diagnostic ignored "-Wunused-lambda-capture" + #include #include diff --git a/projects/hip-tests/catch/unit/graph/CMakeLists.txt b/projects/hip-tests/catch/unit/graph/CMakeLists.txt index 2d0a3c50ff..cef5d2f5b7 100644 --- a/projects/hip-tests/catch/unit/graph/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/graph/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_SRC hipGraph.cc hipSimpleGraphWithKernel.cc hipGraphAddMemcpyNode.cc + hipGraphAddMemcpyNode_old.cc hipGraphClone.cc hipGraphInstantiateWithFlags.cc hipGraphAddHostNode.cc @@ -72,6 +73,7 @@ set(TEST_SRC hipGraphEventRecordNodeSetEvent.cc hipGraphEventWaitNodeGetEvent.cc hipGraphExecMemcpyNodeSetParams.cc + hipGraphExecMemcpyNodeSetParams_old.cc hipStreamBeginCapture.cc hipGraphAddMemcpyNode1D_old.cc hipGraphAddMemcpyNode1D.cc @@ -101,7 +103,9 @@ set(TEST_SRC hipGraphAddMemsetNode.cc hipGraphAddKernelNode.cc hipGraphMemcpyNodeGetParams.cc + hipGraphMemcpyNodeGetParams_old.cc hipGraphMemcpyNodeSetParams.cc + hipGraphMemcpyNodeSetParams_old.cc hipGraphKernelNodeGetParams.cc hipGraphKernelNodeSetParams.cc hipGraphExecKernelNodeSetParams.cc diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode.cc index 6b8e34bed8..57b5d3f0b6 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode.cc @@ -1,576 +1,287 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : Negative -1) Pass pGraphNode as nullptr and check if api returns error. -2) When graph is un-initialized argument(skipping graph creation), - api should return error code. -3) Passing pDependencies as nullptr, api should return success. -4) When numDependencies is max(size_t) and pDependencies is not valid ptr, - api expected to return error code. -5) When pDependencies is nullptr, but numDependencies is non-zero, - api expected to return error. -6) When pCopyParams is nullptr, api expected to return error code. -7) API expects atleast one memcpy src pointer to be set. - When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both - are nullptr, api expected to return error code. -8) API expects atleast one memcpy dst pointer to be set. - When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both - are nullptr, api expected to return error code. -9) Passing different element size for hipMemcpy3DParms::srcArray and - hipMemcpy3DParms::dstArray is expected to return error code. - -Testcase Scenarios : Functional -1) Add memcpy node to graph and verify memcpy operation is success for all - memcpy kinds(H2D, D2H and D2D). - Memcpy nodes are added and assigned to default device. -2) Perform memcpy operation for 1D, 2D and 3D arrays on default device and - verify the results. -3) Add memcpy node to graph and verify memcpy operation is success for all - memcpy kinds(H2D, D2H and D2D). - Memcpy nodes are added and assigned to Peer device. -4) Perform memcpy operation for 1D, 2D and 3D arrays on Peer device and - verify the results. -5) Create two host pointers, copy the data between them by the api - hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost. - Validate the output. -*/ +#include #include -#include -#include -#include +#include +#include -#define ZSIZE 32 -#define YSIZE 32 -#define XSIZE 32 +#include "graph_tests_common.hh" -/* Test verifies hipGraphAddMemcpyNode API Negative scenarios. +/** + * @addtogroup hipGraphAddMemcpyNode hipGraphAddMemcpyNode + * @{ + * @ingroup GraphTest + * `hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const + * hipGraphNode_t *pDependencies, size_t numDependencies, const hipMemcpy3DParms + * *pCopyParams)` - Creates a memcpy node and adds it to a graph */ -TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative") { - CHECK_IMAGE_SUPPORT +/** + * Test Description + * ------------------------ + * - Verify basic API behavior. A Memcpy node is created with parameters set according to the + * test run, after which the graph is run and the memcpy results are verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphAddMemcpyNode_Positive_Basic") { + constexpr bool async = false; - constexpr int width{10}, height{10}, depth{10}; - hipArray_t devArray1; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparams; - uint32_t size = width * height * depth * sizeof(int); - hipGraph_t graph; - hipGraphNode_t memcpyNode; - hipStream_t streamForGraph; - hipError_t ret; + SECTION("Device to host") { Memcpy3DDeviceToHostShell(Memcpy3DWrapper); } - int *hData = reinterpret_cast(malloc(size)); - int *hOutputData = reinterpret_cast(malloc(size)); + SECTION("Device to host with default kind") { + Memcpy3DDeviceToHostShell(Memcpy3DWrapper); + } - REQUIRE(hData != nullptr); - REQUIRE(hOutputData != nullptr); - memset(hData, 0, size); - memset(hOutputData, 0, size); + SECTION("Host to device") { Memcpy3DHostToDeviceShell(Memcpy3DWrapper); } - HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphCreate(&graph, 0)); + SECTION("Host to device with default kind") { + Memcpy3DHostToDeviceShell(Memcpy3DWrapper); + } - // Initialize host buffer - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } + SECTION("Host to host") { Memcpy3DHostToHostShell(Memcpy3DWrapper); } + + SECTION("Host to host with default kind") { + Memcpy3DHostToHostShell(Memcpy3DWrapper); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); } } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, - make_hipExtent(width, height, depth), hipArrayDefault)); - - // Host to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width , height, depth); - myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparams.dstArray = devArray1; - myparams.kind = hipMemcpyHostToDevice; - - SECTION("Pass pGraphNode as nullptr") { - ret = hipGraphAddMemcpyNode(nullptr, graph, nullptr, 0, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("When graph is nullptr") { - ret = hipGraphAddMemcpyNode(&memcpyNode, nullptr, nullptr, 0, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Passing pDependencies as nullptr") { - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); - REQUIRE(hipSuccess == ret); - } - SECTION("When numDependencies is max and pDependencies is not valid ptr") { - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, - nullptr, INT_MAX, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("When pDependencies is nullptr, but numDependencies is non-zero") { - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 11, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass pCopyParams as nullptr") { - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, nullptr); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("API expects atleast one memcpy src pointer to be set") { - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width , height, depth); - myparams.dstArray = devArray1; - myparams.kind = hipMemcpyHostToDevice; - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("API expects atleast one memcpy dst pointer to be set") { - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width , height, depth); - myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparams.kind = hipMemcpyHostToDevice; - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Passing different element size for hipMemcpy3DParms::srcArray" - "and hipMemcpy3DParms::dstArray") { - myparams.srcArray = devArray1; - hipArray_t devArray2; - HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, - make_hipExtent(width+1, height+1, depth+1), hipArrayDefault)); - myparams.dstArray = devArray2; - ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); - REQUIRE(hipErrorInvalidValue == ret); - HIP_CHECK(hipFreeArray(devArray2)); - } - - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFreeArray(devArray1)); - free(hData); - free(hOutputData); -} - -static void validateMemcpyNode3DArray(bool peerAccess = false) { - constexpr int width{10}, height{10}, depth{10}; - hipArray_t devArray1, devArray2; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparams; - uint32_t size = width * height * depth * sizeof(int); - hipGraph_t graph; - hipGraphNode_t memcpyNode; - std::vector dependencies; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - - HIP_CHECK(hipSetDevice(0)); - int *hData = reinterpret_cast(malloc(size)); - int *hOutputData = reinterpret_cast(malloc(size)); - - REQUIRE(hData != nullptr); - REQUIRE(hOutputData != nullptr); - memset(hData, 0, size); - memset(hOutputData, 0, size); - - HIP_CHECK(hipStreamCreate(&streamForGraph)); - - // Initialize host buffer - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); } } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, - make_hipExtent(width, height, depth), hipArrayDefault)); - HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, - make_hipExtent(width, height, depth), hipArrayDefault)); - HIP_CHECK(hipGraphCreate(&graph, 0)); + SECTION("Array from/to Host") { Memcpy3DArrayHostShell(Memcpy3DWrapper); } - // For peer access test, Memory is allocated on device(0) - // while memcpy nodes are allocated and assigned to peer device(1) - if (peerAccess) { - HIP_CHECK(hipSetDevice(1)); - } - - // Host to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width , height, depth); - myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparams.dstArray = devArray1; - myparams.kind = hipMemcpyHostToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); - dependencies.push_back(memcpyNode); - - // Device to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.srcArray = devArray1; - myparams.dstArray = devArray2; - myparams.extent = make_hipExtent(width, height, depth); - myparams.kind = hipMemcpyDeviceToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - dependencies.clear(); - dependencies.push_back(memcpyNode); - - // Device to host - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int), - width, height); - myparams.srcArray = devArray2; - myparams.extent = make_hipExtent(width, height, depth); - myparams.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Check result - HipTest::checkArray(hData, hOutputData, width, height, depth); - - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFreeArray(devArray1)); - HIP_CHECK(hipFreeArray(devArray2)); - free(hData); - free(hOutputData); -} - -static void validateMemcpyNode2DArray(bool peerAccess = false) { - int harray2D[YSIZE][XSIZE]{}; - int harray2Dres[YSIZE][XSIZE]{}; - constexpr int width{XSIZE}, height{YSIZE}; - hipArray_t devArray1, devArray2; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparams; - hipGraph_t graph; - hipGraphNode_t memcpyNode; - std::vector dependencies; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipStreamCreate(&streamForGraph)); - // Initialize 2D object - for (int i = 0; i < YSIZE; i++) { - for (int j = 0; j < XSIZE; j++) { - harray2D[i][j] = i + j + 1; - } - } - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - // Allocate 2D device array by passing depth(0) - HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, - make_hipExtent(width, height, 0), hipArrayDefault)); - HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, - make_hipExtent(width, height, 0), hipArrayDefault)); - HIP_CHECK(hipGraphCreate(&graph, 0)); - - // For peer access test, Memory is allocated on device(0) - // while memcpy nodes are allocated and assigned to peer device(1) - if (peerAccess) { - HIP_CHECK(hipSetDevice(1)); - } - - // Host to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, height, 1); - myparams.srcPtr = make_hipPitchedPtr(harray2D, width * sizeof(int), - width, height); - myparams.dstArray = devArray1; - myparams.kind = hipMemcpyHostToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); - dependencies.push_back(memcpyNode); - - // Device to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.srcArray = devArray1; - myparams.dstArray = devArray2; - myparams.extent = make_hipExtent(width, height, 1); - myparams.kind = hipMemcpyDeviceToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - dependencies.clear(); - dependencies.push_back(memcpyNode); - - // Device to host - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, height, 1); - myparams.dstPtr = make_hipPitchedPtr(harray2Dres, width * sizeof(int), - width, height); - myparams.srcArray = devArray2; - myparams.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Validate result - for (int i = 0; i < YSIZE; i++) { - for (int j = 0; j < XSIZE; j++) { - if (harray2D[i][j] != harray2Dres[i][j]) { - INFO("harray2D: " << harray2D[i][j] << "harray2Dres: " - << harray2Dres[i][j] << " mismatch at (i,j) : " << i << j); - REQUIRE(false); - } - } - } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFreeArray(devArray1)); - HIP_CHECK(hipFreeArray(devArray2)); -} - -static void validateMemcpyNode1DArray(bool peerAccess = false) { - int harray1D[XSIZE]{}; - int harray1Dres[XSIZE]{}; - constexpr int width{XSIZE}; - hipArray_t devArray1, devArray2; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparams; - hipGraph_t graph; - hipGraphNode_t memcpyNode; - std::vector dependencies; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - - HIP_CHECK(hipSetDevice(0)); - HIP_CHECK(hipStreamCreate(&streamForGraph)); - // Initialize 1D object - for (int i = 0; i < XSIZE; i++) { - harray1D[i] = i + 1; - } - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - // Allocate 1D device array by passing depth(0), height(0) - HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, - make_hipExtent(width, 0, 0), hipArrayDefault)); - HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, - make_hipExtent(width, 0, 0), hipArrayDefault)); - HIP_CHECK(hipGraphCreate(&graph, 0)); - - // For peer access test, Memory is allocated on device(0) - // while memcpy nodes are allocated and assigned to peer device(1) - if (peerAccess) { - HIP_CHECK(hipSetDevice(1)); - } - - // Host to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, 1, 1); - myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int), - width, 1); - myparams.dstArray = devArray1; - myparams.kind = hipMemcpyHostToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); - dependencies.push_back(memcpyNode); - - // Device to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.srcArray = devArray1; - myparams.dstArray = devArray2; - myparams.extent = make_hipExtent(width, 1, 1); - myparams.kind = hipMemcpyDeviceToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - dependencies.clear(); - dependencies.push_back(memcpyNode); - - // Device to host - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, 1, 1); - myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int), - width, 1); - myparams.srcArray = devArray2; - myparams.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Validate result - for (int i = 0; i < XSIZE; i++) { - if (harray1D[i] != harray1Dres[i]) { - INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i] - << " mismatch at : " << i); - REQUIRE(false); - } - } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFreeArray(devArray1)); - HIP_CHECK(hipFreeArray(devArray2)); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220 + SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell(Memcpy3DWrapper); } +#endif } /** - * Basic Functional Tests adds memcpy nodes of types H2D, D2D and D2H to graph - * and verifies execution sequence by launching graph on default device. - * Tests also verify memcpy node addition with 1D, 2D and 3D objects. + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# kind is an invalid enum value + * -# count is zero + * -# count is larger than dst allocation size + * -# count is larger than src allocation size + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 */ -TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") { - CHECK_IMAGE_SUPPORT +TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative_Parameters") { + using namespace std::placeholders; - SECTION("Memcpy with 3D array on default device") { - validateMemcpyNode3DArray(); + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + GraphAddNodeCommonNegativeTests(std::bind(hipGraphAddMemcpyNode, _1, _2, _3, _4, ¶ms), + graph); + + SECTION("dst_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("src_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("dst_ptr.pitch < width") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidPitchValue); + } + + SECTION("src_ptr.pitch < width") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidPitchValue); + } + + SECTION("dst_ptr.pitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = attr; + auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("src_ptr.pitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = attr; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("extent.width + dst_pos.x > dst_ptr.pitch") { + hipPos invalid_pos = dst_pos; + invalid_pos.x = dst_ptr.pitch - extent.width + 1; + auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("extent.width + src_pos.x > src_ptr.pitch") { + hipPos invalid_pos = src_pos; + invalid_pos.x = src_ptr.pitch - extent.width + 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("dst_pos.y out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.y = 1; + auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("src_pos.y out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.y = 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("dst_pos.z out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.z = 1; + auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("src_pos.z out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.z = 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidValue); + } + + SECTION("Invalid MemcpyKind") { + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, + static_cast(-1)); + HIP_CHECK_ERROR(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms), + hipErrorInvalidMemcpyDirection); + } + + HIP_CHECK(hipGraphDestroy(graph)); + }; + + SECTION("Host to Device") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), + make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice); } - SECTION("Memcpy with 2D array on default device") { - validateMemcpyNode2DArray(); + SECTION("Device to Host") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent, + hipMemcpyDeviceToHost); } - SECTION("Memcpy with 1D array on default device") { - validateMemcpyNode1DArray(); + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), + make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost); + } + + SECTION("Device to Device") { + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice); } } - -/** - * Peer access tests adds and assigns memcpy nodes of types H2D, D2D and D2H - * to peer device. Memory allocations happen on device(0) and memcpy operations - * are performed from device(1). - * Tests also verify memcpy node addition with 1D, 2D and 3D objects. - */ -TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") { - CHECK_IMAGE_SUPPORT - - int numDevices{}, peerAccess{}; - HIP_CHECK(hipGetDeviceCount(&numDevices)); - if (numDevices > 1) { - HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); - } - - if (!peerAccess) { - WARN("Skipping test as peer device access is not found!"); - return; - } - - SECTION("Memcpy with 3D array on peer device") { - validateMemcpyNode3DArray(true); - } - - SECTION("Memcpy with 2D array on peer device") { - validateMemcpyNode2DArray(true); - } - - SECTION("Memcpy with 1D array on peer device") { - validateMemcpyNode1DArray(true); - } -} -/* -* Create two host pointers, copy the data between them by the api -* hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost. -* Validate the output. -*/ -TEST_CASE("Unit_hipGraphAddMemcpyNode_HostToHost") { - constexpr size_t size = 1024; - size_t numW = size * sizeof(int); - // Host Vectors - std::vector A_h(numW); - std::vector B_h(numW); - // Initialization - std::iota(A_h.begin(), A_h.end(), 0); - std::fill_n(B_h.begin(), size, 0); - - hipGraph_t graph; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - hipGraphNode_t memcpyH2H; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipStreamCreate(&streamForGraph)); - - hipMemcpy3DParms myparms{}; - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.srcPtr = make_hipPitchedPtr(A_h.data(), numW, numW, 1); - myparms.dstPtr = make_hipPitchedPtr(B_h.data(), numW, numW, 1); - myparms.extent = make_hipExtent(numW, 1, 1); - myparms.kind = hipMemcpyHostToHost; - - // Host to Host - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyH2H, graph, nullptr, - 0, &myparms)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - - // Validation - REQUIRE(memcmp(A_h.data(), B_h.data(), numW) == 0); -} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode_old.cc new file mode 100644 index 0000000000..6b8e34bed8 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphAddMemcpyNode_old.cc @@ -0,0 +1,576 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : Negative +1) Pass pGraphNode as nullptr and check if api returns error. +2) When graph is un-initialized argument(skipping graph creation), + api should return error code. +3) Passing pDependencies as nullptr, api should return success. +4) When numDependencies is max(size_t) and pDependencies is not valid ptr, + api expected to return error code. +5) When pDependencies is nullptr, but numDependencies is non-zero, + api expected to return error. +6) When pCopyParams is nullptr, api expected to return error code. +7) API expects atleast one memcpy src pointer to be set. + When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both + are nullptr, api expected to return error code. +8) API expects atleast one memcpy dst pointer to be set. + When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both + are nullptr, api expected to return error code. +9) Passing different element size for hipMemcpy3DParms::srcArray and + hipMemcpy3DParms::dstArray is expected to return error code. + +Testcase Scenarios : Functional +1) Add memcpy node to graph and verify memcpy operation is success for all + memcpy kinds(H2D, D2H and D2D). + Memcpy nodes are added and assigned to default device. +2) Perform memcpy operation for 1D, 2D and 3D arrays on default device and + verify the results. +3) Add memcpy node to graph and verify memcpy operation is success for all + memcpy kinds(H2D, D2H and D2D). + Memcpy nodes are added and assigned to Peer device. +4) Perform memcpy operation for 1D, 2D and 3D arrays on Peer device and + verify the results. +5) Create two host pointers, copy the data between them by the api + hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost. + Validate the output. +*/ + +#include +#include +#include +#include + +#define ZSIZE 32 +#define YSIZE 32 +#define XSIZE 32 + +/* Test verifies hipGraphAddMemcpyNode API Negative scenarios. + */ + +TEST_CASE("Unit_hipGraphAddMemcpyNode_Negative") { + CHECK_IMAGE_SUPPORT + + constexpr int width{10}, height{10}, depth{10}; + hipArray_t devArray1; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparams; + uint32_t size = width * height * depth * sizeof(int); + hipGraph_t graph; + hipGraphNode_t memcpyNode; + hipStream_t streamForGraph; + hipError_t ret; + + int *hData = reinterpret_cast(malloc(size)); + int *hOutputData = reinterpret_cast(malloc(size)); + + REQUIRE(hData != nullptr); + REQUIRE(hOutputData != nullptr); + memset(hData, 0, size); + memset(hOutputData, 0, size); + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // Initialize host buffer + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, + make_hipExtent(width, height, depth), hipArrayDefault)); + + // Host to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width , height, depth); + myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparams.dstArray = devArray1; + myparams.kind = hipMemcpyHostToDevice; + + SECTION("Pass pGraphNode as nullptr") { + ret = hipGraphAddMemcpyNode(nullptr, graph, nullptr, 0, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("When graph is nullptr") { + ret = hipGraphAddMemcpyNode(&memcpyNode, nullptr, nullptr, 0, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Passing pDependencies as nullptr") { + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); + REQUIRE(hipSuccess == ret); + } + SECTION("When numDependencies is max and pDependencies is not valid ptr") { + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, + nullptr, INT_MAX, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("When pDependencies is nullptr, but numDependencies is non-zero") { + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 11, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass pCopyParams as nullptr") { + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, nullptr); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("API expects atleast one memcpy src pointer to be set") { + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width , height, depth); + myparams.dstArray = devArray1; + myparams.kind = hipMemcpyHostToDevice; + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("API expects atleast one memcpy dst pointer to be set") { + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width , height, depth); + myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparams.kind = hipMemcpyHostToDevice; + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Passing different element size for hipMemcpy3DParms::srcArray" + "and hipMemcpy3DParms::dstArray") { + myparams.srcArray = devArray1; + hipArray_t devArray2; + HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, + make_hipExtent(width+1, height+1, depth+1), hipArrayDefault)); + myparams.dstArray = devArray2; + ret = hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams); + REQUIRE(hipErrorInvalidValue == ret); + HIP_CHECK(hipFreeArray(devArray2)); + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFreeArray(devArray1)); + free(hData); + free(hOutputData); +} + +static void validateMemcpyNode3DArray(bool peerAccess = false) { + constexpr int width{10}, height{10}, depth{10}; + hipArray_t devArray1, devArray2; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparams; + uint32_t size = width * height * depth * sizeof(int); + hipGraph_t graph; + hipGraphNode_t memcpyNode; + std::vector dependencies; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + + HIP_CHECK(hipSetDevice(0)); + int *hData = reinterpret_cast(malloc(size)); + int *hOutputData = reinterpret_cast(malloc(size)); + + REQUIRE(hData != nullptr); + REQUIRE(hOutputData != nullptr); + memset(hData, 0, size); + memset(hOutputData, 0, size); + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + // Initialize host buffer + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, + make_hipExtent(width, height, depth), hipArrayDefault)); + HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, + make_hipExtent(width, height, depth), hipArrayDefault)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // For peer access test, Memory is allocated on device(0) + // while memcpy nodes are allocated and assigned to peer device(1) + if (peerAccess) { + HIP_CHECK(hipSetDevice(1)); + } + + // Host to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width , height, depth); + myparams.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparams.dstArray = devArray1; + myparams.kind = hipMemcpyHostToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); + dependencies.push_back(memcpyNode); + + // Device to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.srcArray = devArray1; + myparams.dstArray = devArray2; + myparams.extent = make_hipExtent(width, height, depth); + myparams.kind = hipMemcpyDeviceToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + dependencies.clear(); + dependencies.push_back(memcpyNode); + + // Device to host + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int), + width, height); + myparams.srcArray = devArray2; + myparams.extent = make_hipExtent(width, height, depth); + myparams.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Check result + HipTest::checkArray(hData, hOutputData, width, height, depth); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFreeArray(devArray1)); + HIP_CHECK(hipFreeArray(devArray2)); + free(hData); + free(hOutputData); +} + +static void validateMemcpyNode2DArray(bool peerAccess = false) { + int harray2D[YSIZE][XSIZE]{}; + int harray2Dres[YSIZE][XSIZE]{}; + constexpr int width{XSIZE}, height{YSIZE}; + hipArray_t devArray1, devArray2; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparams; + hipGraph_t graph; + hipGraphNode_t memcpyNode; + std::vector dependencies; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Initialize 2D object + for (int i = 0; i < YSIZE; i++) { + for (int j = 0; j < XSIZE; j++) { + harray2D[i][j] = i + j + 1; + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + // Allocate 2D device array by passing depth(0) + HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, + make_hipExtent(width, height, 0), hipArrayDefault)); + HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, + make_hipExtent(width, height, 0), hipArrayDefault)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // For peer access test, Memory is allocated on device(0) + // while memcpy nodes are allocated and assigned to peer device(1) + if (peerAccess) { + HIP_CHECK(hipSetDevice(1)); + } + + // Host to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, height, 1); + myparams.srcPtr = make_hipPitchedPtr(harray2D, width * sizeof(int), + width, height); + myparams.dstArray = devArray1; + myparams.kind = hipMemcpyHostToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); + dependencies.push_back(memcpyNode); + + // Device to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.srcArray = devArray1; + myparams.dstArray = devArray2; + myparams.extent = make_hipExtent(width, height, 1); + myparams.kind = hipMemcpyDeviceToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + dependencies.clear(); + dependencies.push_back(memcpyNode); + + // Device to host + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, height, 1); + myparams.dstPtr = make_hipPitchedPtr(harray2Dres, width * sizeof(int), + width, height); + myparams.srcArray = devArray2; + myparams.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate result + for (int i = 0; i < YSIZE; i++) { + for (int j = 0; j < XSIZE; j++) { + if (harray2D[i][j] != harray2Dres[i][j]) { + INFO("harray2D: " << harray2D[i][j] << "harray2Dres: " + << harray2Dres[i][j] << " mismatch at (i,j) : " << i << j); + REQUIRE(false); + } + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFreeArray(devArray1)); + HIP_CHECK(hipFreeArray(devArray2)); +} + +static void validateMemcpyNode1DArray(bool peerAccess = false) { + int harray1D[XSIZE]{}; + int harray1Dres[XSIZE]{}; + constexpr int width{XSIZE}; + hipArray_t devArray1, devArray2; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparams; + hipGraph_t graph; + hipGraphNode_t memcpyNode; + std::vector dependencies; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Initialize 1D object + for (int i = 0; i < XSIZE; i++) { + harray1D[i] = i + 1; + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + // Allocate 1D device array by passing depth(0), height(0) + HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, + make_hipExtent(width, 0, 0), hipArrayDefault)); + HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, + make_hipExtent(width, 0, 0), hipArrayDefault)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // For peer access test, Memory is allocated on device(0) + // while memcpy nodes are allocated and assigned to peer device(1) + if (peerAccess) { + HIP_CHECK(hipSetDevice(1)); + } + + // Host to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, 1, 1); + myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int), + width, 1); + myparams.dstArray = devArray1; + myparams.kind = hipMemcpyHostToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); + dependencies.push_back(memcpyNode); + + // Device to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.srcArray = devArray1; + myparams.dstArray = devArray2; + myparams.extent = make_hipExtent(width, 1, 1); + myparams.kind = hipMemcpyDeviceToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + dependencies.clear(); + dependencies.push_back(memcpyNode); + + // Device to host + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, 1, 1); + myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int), + width, 1); + myparams.srcArray = devArray2; + myparams.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate result + for (int i = 0; i < XSIZE; i++) { + if (harray1D[i] != harray1Dres[i]) { + INFO("harray1D: " << harray1D[i] << " harray1Dres: " << harray1Dres[i] + << " mismatch at : " << i); + REQUIRE(false); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFreeArray(devArray1)); + HIP_CHECK(hipFreeArray(devArray2)); +} + +/** + * Basic Functional Tests adds memcpy nodes of types H2D, D2D and D2H to graph + * and verifies execution sequence by launching graph on default device. + * Tests also verify memcpy node addition with 1D, 2D and 3D objects. + */ +TEST_CASE("Unit_hipGraphAddMemcpyNode_BasicFunctional") { + CHECK_IMAGE_SUPPORT + + SECTION("Memcpy with 3D array on default device") { + validateMemcpyNode3DArray(); + } + + SECTION("Memcpy with 2D array on default device") { + validateMemcpyNode2DArray(); + } + + SECTION("Memcpy with 1D array on default device") { + validateMemcpyNode1DArray(); + } +} + +/** + * Peer access tests adds and assigns memcpy nodes of types H2D, D2D and D2H + * to peer device. Memory allocations happen on device(0) and memcpy operations + * are performed from device(1). + * Tests also verify memcpy node addition with 1D, 2D and 3D objects. + */ +TEST_CASE("Unit_hipGraphAddMemcpyNode_PeerAccessFunctional") { + CHECK_IMAGE_SUPPORT + + int numDevices{}, peerAccess{}; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices > 1) { + HIP_CHECK(hipDeviceCanAccessPeer(&peerAccess, 1, 0)); + } + + if (!peerAccess) { + WARN("Skipping test as peer device access is not found!"); + return; + } + + SECTION("Memcpy with 3D array on peer device") { + validateMemcpyNode3DArray(true); + } + + SECTION("Memcpy with 2D array on peer device") { + validateMemcpyNode2DArray(true); + } + + SECTION("Memcpy with 1D array on peer device") { + validateMemcpyNode1DArray(true); + } +} +/* +* Create two host pointers, copy the data between them by the api +* hipGraphAddMemcpyNode with data transfer kind hipMemcpyHostToHost. +* Validate the output. +*/ +TEST_CASE("Unit_hipGraphAddMemcpyNode_HostToHost") { + constexpr size_t size = 1024; + size_t numW = size * sizeof(int); + // Host Vectors + std::vector A_h(numW); + std::vector B_h(numW); + // Initialization + std::iota(A_h.begin(), A_h.end(), 0); + std::fill_n(B_h.begin(), size, 0); + + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + hipGraphNode_t memcpyH2H; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + hipMemcpy3DParms myparms{}; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.srcPtr = make_hipPitchedPtr(A_h.data(), numW, numW, 1); + myparms.dstPtr = make_hipPitchedPtr(B_h.data(), numW, numW, 1); + myparms.extent = make_hipExtent(numW, 1, 1); + myparms.kind = hipMemcpyHostToHost; + + // Host to Host + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyH2H, graph, nullptr, + 0, &myparms)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + + // Validation + REQUIRE(memcmp(A_h.data(), B_h.data(), numW) == 0); +} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams.cc b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams.cc index 3fe07bae58..7520d88d03 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,247 +20,248 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : -Functional- -1) Instantiate a graph with memcpy node, obtain executable graph and update the hipMemcpy3DParms node params with set. Make sure they are taking effect. -Negative- -1) Pass hGraphExec as nullptr and verify api returns error code. -2) Pass node as nullptr and verify api returns error code. -3) Pass pNodeParams as nullptr and verify api returns error code. -4) Pass pNodeParams as empty structure object and verify api returns error code. -5) API expects atleast one memcpy src pointer to be set. When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both are nullptr, api expected to return error code. -6) API expects atleast one memcpy dst pointer to be set. When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both are nullptr, api expected to return error code. -7) Passing different element size for hipMemcpy3DParms::srcArray and hipMemcpy3DParms::dstArray is expected to return error code. -8) Pass node of different graph and verify api returns error code. -*/ +#include #include -#include +#include +#include +#include -/* Test verifies hipGraphExecMemcpyNodeSetParams API Negative scenarios. +#include "graph_tests_common.hh" + +/** + * @addtogroup hipGraphExecMemcpyNodeSetParams hipGraphExecMemcpyNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipMemcpy3DParms + * *pNodeParams)` - Sets the parameters for a memcpy node in the given graphExec */ -TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative") { - CHECK_IMAGE_SUPPORT - constexpr int width{10}, height{10}, depth{10}; - hipArray_t devArray, devArray2; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparms; - hipError_t ret; - int* hData; - uint32_t size = width * height * depth * sizeof(int); - hData = reinterpret_cast(malloc(size)); - REQUIRE(hData != nullptr); - memset(hData, 0, size); - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values in the executable graph. The + * executable graph is run and the results of the memcpy verified. The test is run for all possible + * memcpy directions, with both the corresponding memcpy kind and hipMemcpyDefault, as well as half + * page and full page allocation sizes. Test source + * ------------------------ + * - unit/graph/hipGraphExecMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Positive_Basic") { + constexpr auto f = [](void* dst, void* src, size_t count, hipMemcpyKind direction) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + const auto offset_src = reinterpret_cast(src) + 1; + const auto offset_dst = reinterpret_cast(dst) + 1; + auto params = + GetMemcpy3DParms(make_hipPitchedPtr(offset_dst, 0, count - 1, 0), make_hipPos(0, 0, 0), + make_hipPitchedPtr(offset_src, 0, count - 1, 0), make_hipPos(0, 0, 0), + make_hipExtent(count - 1, 1, 1), direction); + HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms)); + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, count, 0), make_hipPos(0, 0, 0), + make_hipPitchedPtr(src, 0, count, 0), make_hipPos(0, 0, 0), + make_hipExtent(count, 1, 1), direction); + HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; + }; + +#if HT_NVIDIA + MemcpyWithDirectionCommonTests(f); +#else + using namespace std::placeholders; + + SECTION("Device to host") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToHost)); + } + + SECTION("Host to device") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyHostToDevice)); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDeviceToDevice)); } } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, - height, depth), hipArrayDefault)); - HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width+1, - height+1, depth+1), hipArrayDefault)); - memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.extent = make_hipExtent(width , height, depth); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparms.dstArray = devArray; - myparms.kind = hipMemcpyHostToDevice; - hipGraph_t graph; - hipGraphNode_t memcpyNode; - hipGraphExec_t graphExec; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); - - // Instantiate the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); - SECTION("Pass hGraphExec as nullptr") { - ret = hipGraphExecMemcpyNodeSetParams(nullptr, memcpyNode, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass node as nullptr") { - ret = hipGraphExecMemcpyNodeSetParams(graphExec, nullptr, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass pNodeParams as nullptr") { - ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, nullptr); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass pNodeParams as empty structure object") { - hipMemcpy3DParms temp{}; - ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("API expects atleast one memcpy src pointer to be set") { - hipMemcpy3DParms temp; - memset(&temp, 0x0, sizeof(hipMemcpy3DParms)); - temp.srcPos = make_hipPos(0, 0, 0); - temp.dstPos = make_hipPos(0, 0, 0); - temp.extent = make_hipExtent(width , height, depth); - temp.dstArray = devArray; - temp.kind = hipMemcpyHostToDevice; - ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("API expects atleast one memcpy dst pointer to be set") { - hipMemcpy3DParms temp; - memset(&temp, 0x0, sizeof(hipMemcpy3DParms)); - temp.srcPos = make_hipPos(0, 0, 0); - temp.dstPos = make_hipPos(0, 0, 0); - temp.extent = make_hipExtent(width , height, depth); - temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - temp.kind = hipMemcpyHostToDevice; - ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Passing different element size for hipMemcpy3DParms::srcArray" - "and hipMemcpy3DParms::dstArray") { - hipMemcpy3DParms temp; - memset(&temp, 0x0, sizeof(hipMemcpy3DParms)); - temp.srcPos = make_hipPos(0, 0, 0); - temp.dstPos = make_hipPos(0, 0, 0); - temp.extent = make_hipExtent(width , height, depth); - temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - temp.kind = hipMemcpyHostToDevice; - temp.srcArray = devArray; - temp.dstArray = devArray2; - ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Check with other graph node") { - hipGraph_t graph1; - hipGraphNode_t memcpyNode1; - HIP_CHECK(hipGraphCreate(&graph1, 0)); - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, graph1, NULL, 0, &myparms)); - ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode1, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - HIP_CHECK(hipGraphDestroy(graph1)); - } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipFreeArray(devArray)); - HIP_CHECK(hipFreeArray(devArray2)); - free(hData); -} - -/* Test verifies hipGraphExecMemcpyNodeSetParams API Functional scenarios. - */ -TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Functional") { - CHECK_IMAGE_SUPPORT - - constexpr int XSIZE = 1024; - int harray1D[XSIZE]{}; - int harray1Dres[XSIZE]{}; - constexpr int width{XSIZE}; - hipArray_t devArray1, devArray2; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparams; - hipGraph_t graph; - hipGraphNode_t memcpyNode; - std::vector dependencies; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - - HIP_CHECK(hipStreamCreate(&streamForGraph)); - // Initialize 1D object - for (int i = 0; i < XSIZE; i++) { - harray1D[i] = i + 1; - } - - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - // Allocate 1D device array by passing depth(0), height(0) - HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, - make_hipExtent(width, 0, 0), hipArrayDefault)); - HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, - make_hipExtent(width, 0, 0), hipArrayDefault)); - HIP_CHECK(hipGraphCreate(&graph, 0)); - - // Host to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, 1, 1); - myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int), - width, 1); - myparams.dstArray = devArray1; - myparams.kind = hipMemcpyHostToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); - dependencies.push_back(memcpyNode); - - // Device to Device - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.srcArray = devArray1; - myparams.dstArray = devArray2; - myparams.extent = make_hipExtent(width, 1, 1); - myparams.kind = hipMemcpyDeviceToDevice; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - dependencies.clear(); - dependencies.push_back(memcpyNode); - - // Device to host - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, 1, 1); - myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int), - width, 1); - myparams.srcArray = devArray2; - myparams.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparams)); - - // Instantiate the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - - int harray1Dupdate[XSIZE]{}; - hipArray_t devArray3; - HIP_CHECK(hipMalloc3DArray(&devArray3, &channelDesc, - make_hipExtent(width, 0, 0), hipArrayDefault)); - - // D2H updated with different pointer harray1Dres -> harray1Dupdate - memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); - myparams.srcPos = make_hipPos(0, 0, 0); - myparams.dstPos = make_hipPos(0, 0, 0); - myparams.extent = make_hipExtent(width, 1, 1); - myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int), - width, 1); - myparams.srcArray = devArray2; - myparams.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &myparams)); - - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Validate result - for (int i = 0; i < XSIZE; i++) { - if (harray1D[i] != harray1Dupdate[i]) { - INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " << - harray1Dupdate[i] << " mismatch at : " << i); - REQUIRE(false); + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + SECTION("Peer access disabled") { + MemcpyDeviceToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); } } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFreeArray(devArray1)); - HIP_CHECK(hipFreeArray(devArray2)); + +// Disabled on AMD due to defect - EXSWHTEC-209 +#if 0 + SECTION("Host to host") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyHostToHost)); + } + + SECTION("Host to host with default kind") { + MemcpyHostToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } +#endif + +// Disabled on AMD due to defect - EXSWHTEC-210 +#if 0 + SECTION("Device to host with default kind") { + MemcpyDeviceToHostShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } + + SECTION("Host to device with default kind") { + MemcpyHostToDeviceShell(std::bind(f, _1, _2, _3, hipMemcpyDefault)); + } +#endif + +#endif } + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# pGraphExec is nullptr + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# kind is an invalid enum value + * -# count is zero + * -# count is larger than dst allocation size + * -# count is larger than src allocation size + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + int src[2] = {}, dst[2] = {}; + + auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(dst), 0), make_hipPos(0, 0, 0), + make_hipPitchedPtr(src, 0, sizeof(src), 0), make_hipPos(0, 0, 0), + make_hipExtent(sizeof(dst), 1, 1), hipMemcpyDefault); + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + SECTION("pGraphExec == nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(nullptr, node, ¶ms), hipErrorInvalidValue); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, nullptr, ¶ms), + hipErrorInvalidValue); + } + + auto f = [&](void* dst, void* src, size_t count, hipMemcpyKind kind) { + auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, count, 0), make_hipPos(0, 0, 0), + make_hipPitchedPtr(src, 0, count, 0), make_hipPos(0, 0, 0), + make_hipExtent(sizeof(dst), 1, 1), kind); + return hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms); + }; + MemcpyWithDirectionCommonNegativeTests(f, dst, src, sizeof(dst), hipMemcpyDefault); + + SECTION("count == 0") { + HIP_CHECK_ERROR( + hipGraphExecMemcpyNodeSetParams1D(graph_exec, node, dst, src, 0, hipMemcpyDefault), + hipErrorInvalidValue); + } + + SECTION("count larger than dst allocation size") { + LinearAllocGuard dev_dst(LinearAllocs::hipMalloc, sizeof(int)); + params.dstPtr = make_hipPitchedPtr(dev_dst.ptr(), 0, sizeof(int), 0); + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms), + hipErrorInvalidValue); + } + + SECTION("count larger than src allocation size") { + LinearAllocGuard dev_src(LinearAllocs::hipMalloc, sizeof(int)); + params.dstPtr = make_hipPitchedPtr(dev_src.ptr(), 0, sizeof(int), 0); + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms), + hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/** + * Test Description + * ------------------------ + * - Verify that memcpy direction cannot be altered in an executable graph. The test is run for + * all memcpy directions with appropriate memory allocations. + * Test source + * ------------------------ + * - unit/graph/hipGraphExecMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative_Changing_Memcpy_Direction") { + int host; + LinearAllocGuard dev(LinearAllocs::hipMalloc, sizeof(int)); + + const auto [dir, src, dst] = + GENERATE_REF(std::make_tuple(hipMemcpyHostToHost, &host, &host), + std::make_tuple(hipMemcpyHostToDevice, &host, dev.ptr()), + std::make_tuple(hipMemcpyDeviceToHost, dev.ptr(), &host), + std::make_tuple(hipMemcpyDeviceToDevice, dev.ptr(), dev.ptr())); + + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + auto params = GetMemcpy3DParms(make_hipPitchedPtr(dst, 0, sizeof(int), 0), make_hipPos(0, 0, 0), + make_hipPitchedPtr(src, 0, sizeof(int), 0), make_hipPos(0, 0, 0), + make_hipExtent(sizeof(int), 1, 1), dir); + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + const auto set_dir = GENERATE(hipMemcpyHostToHost, hipMemcpyHostToDevice, hipMemcpyDeviceToHost, + hipMemcpyDeviceToDevice, hipMemcpyDefault); + if (dir == set_dir) { + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + return; + } + + params.kind = set_dir; + HIP_CHECK_ERROR(hipGraphExecMemcpyNodeSetParams(graph_exec, node, ¶ms), hipErrorInvalidValue); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams_old.cc new file mode 100644 index 0000000000..3fe07bae58 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphExecMemcpyNodeSetParams_old.cc @@ -0,0 +1,263 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : +Functional- +1) Instantiate a graph with memcpy node, obtain executable graph and update the hipMemcpy3DParms node params with set. Make sure they are taking effect. +Negative- +1) Pass hGraphExec as nullptr and verify api returns error code. +2) Pass node as nullptr and verify api returns error code. +3) Pass pNodeParams as nullptr and verify api returns error code. +4) Pass pNodeParams as empty structure object and verify api returns error code. +5) API expects atleast one memcpy src pointer to be set. When hipMemcpy3DParms::srcArray and hipMemcpy3DParms::srcPtr.ptr both are nullptr, api expected to return error code. +6) API expects atleast one memcpy dst pointer to be set. When hipMemcpy3DParms::dstArray and hipMemcpy3DParms::dstPtr.ptr both are nullptr, api expected to return error code. +7) Passing different element size for hipMemcpy3DParms::srcArray and hipMemcpy3DParms::dstArray is expected to return error code. +8) Pass node of different graph and verify api returns error code. +*/ + +#include +#include + +/* Test verifies hipGraphExecMemcpyNodeSetParams API Negative scenarios. + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Negative") { + CHECK_IMAGE_SUPPORT + + constexpr int width{10}, height{10}, depth{10}; + hipArray_t devArray, devArray2; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparms; + hipError_t ret; + int* hData; + uint32_t size = width * height * depth * sizeof(int); + hData = reinterpret_cast(malloc(size)); + REQUIRE(hData != nullptr); + memset(hData, 0, size); + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, + height, depth), hipArrayDefault)); + HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, make_hipExtent(width+1, + height+1, depth+1), hipArrayDefault)); + memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.extent = make_hipExtent(width , height, depth); + myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparms.dstArray = devArray; + myparms.kind = hipMemcpyHostToDevice; + + hipGraph_t graph; + hipGraphNode_t memcpyNode; + hipGraphExec_t graphExec; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); + + // Instantiate the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, NULL, NULL, 0)); + SECTION("Pass hGraphExec as nullptr") { + ret = hipGraphExecMemcpyNodeSetParams(nullptr, memcpyNode, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass node as nullptr") { + ret = hipGraphExecMemcpyNodeSetParams(graphExec, nullptr, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass pNodeParams as nullptr") { + ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, nullptr); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass pNodeParams as empty structure object") { + hipMemcpy3DParms temp{}; + ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("API expects atleast one memcpy src pointer to be set") { + hipMemcpy3DParms temp; + memset(&temp, 0x0, sizeof(hipMemcpy3DParms)); + temp.srcPos = make_hipPos(0, 0, 0); + temp.dstPos = make_hipPos(0, 0, 0); + temp.extent = make_hipExtent(width , height, depth); + temp.dstArray = devArray; + temp.kind = hipMemcpyHostToDevice; + ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("API expects atleast one memcpy dst pointer to be set") { + hipMemcpy3DParms temp; + memset(&temp, 0x0, sizeof(hipMemcpy3DParms)); + temp.srcPos = make_hipPos(0, 0, 0); + temp.dstPos = make_hipPos(0, 0, 0); + temp.extent = make_hipExtent(width , height, depth); + temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + temp.kind = hipMemcpyHostToDevice; + ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Passing different element size for hipMemcpy3DParms::srcArray" + "and hipMemcpy3DParms::dstArray") { + hipMemcpy3DParms temp; + memset(&temp, 0x0, sizeof(hipMemcpy3DParms)); + temp.srcPos = make_hipPos(0, 0, 0); + temp.dstPos = make_hipPos(0, 0, 0); + temp.extent = make_hipExtent(width , height, depth); + temp.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + temp.kind = hipMemcpyHostToDevice; + temp.srcArray = devArray; + temp.dstArray = devArray2; + ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &temp); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Check with other graph node") { + hipGraph_t graph1; + hipGraphNode_t memcpyNode1; + HIP_CHECK(hipGraphCreate(&graph1, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode1, graph1, NULL, 0, &myparms)); + ret = hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode1, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + HIP_CHECK(hipGraphDestroy(graph1)); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipFreeArray(devArray)); + HIP_CHECK(hipFreeArray(devArray2)); + free(hData); +} + +/* Test verifies hipGraphExecMemcpyNodeSetParams API Functional scenarios. + */ +TEST_CASE("Unit_hipGraphExecMemcpyNodeSetParams_Functional") { + CHECK_IMAGE_SUPPORT + + constexpr int XSIZE = 1024; + int harray1D[XSIZE]{}; + int harray1Dres[XSIZE]{}; + constexpr int width{XSIZE}; + hipArray_t devArray1, devArray2; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparams; + hipGraph_t graph; + hipGraphNode_t memcpyNode; + std::vector dependencies; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + // Initialize 1D object + for (int i = 0; i < XSIZE; i++) { + harray1D[i] = i + 1; + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + // Allocate 1D device array by passing depth(0), height(0) + HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc, + make_hipExtent(width, 0, 0), hipArrayDefault)); + HIP_CHECK(hipMalloc3DArray(&devArray2, &channelDesc, + make_hipExtent(width, 0, 0), hipArrayDefault)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // Host to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, 1, 1); + myparams.srcPtr = make_hipPitchedPtr(harray1D, width * sizeof(int), + width, 1); + myparams.dstArray = devArray1; + myparams.kind = hipMemcpyHostToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nullptr, 0, &myparams)); + dependencies.push_back(memcpyNode); + + // Device to Device + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.srcArray = devArray1; + myparams.dstArray = devArray2; + myparams.extent = make_hipExtent(width, 1, 1); + myparams.kind = hipMemcpyDeviceToDevice; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + dependencies.clear(); + dependencies.push_back(memcpyNode); + + // Device to host + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, 1, 1); + myparams.dstPtr = make_hipPitchedPtr(harray1Dres, width * sizeof(int), + width, 1); + myparams.srcArray = devArray2; + myparams.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparams)); + + // Instantiate the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + + int harray1Dupdate[XSIZE]{}; + hipArray_t devArray3; + HIP_CHECK(hipMalloc3DArray(&devArray3, &channelDesc, + make_hipExtent(width, 0, 0), hipArrayDefault)); + + // D2H updated with different pointer harray1Dres -> harray1Dupdate + memset(&myparams, 0x0, sizeof(hipMemcpy3DParms)); + myparams.srcPos = make_hipPos(0, 0, 0); + myparams.dstPos = make_hipPos(0, 0, 0); + myparams.extent = make_hipExtent(width, 1, 1); + myparams.dstPtr = make_hipPitchedPtr(harray1Dupdate, width * sizeof(int), + width, 1); + myparams.srcArray = devArray2; + myparams.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphExecMemcpyNodeSetParams(graphExec, memcpyNode, &myparams)); + + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Validate result + for (int i = 0; i < XSIZE; i++) { + if (harray1D[i] != harray1Dupdate[i]) { + INFO("harray1D: " << harray1D[i] << " harray1Dupdate: " << + harray1Dupdate[i] << " mismatch at : " << i); + REQUIRE(false); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFreeArray(devArray1)); + HIP_CHECK(hipFreeArray(devArray2)); +} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams.cc index 34a9ff7d48..752aae29c5 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,220 +20,69 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : -Negative - -1) Pass node as nullptr and verify api returns error code. -2) Pass un-initialize node and verify api returns error code. -3) Pass pNodeParams as nullptr and verify api returns error code. -Functional - -1) Create a graph, add Memcpy node to graph with desired node params. - Verify api fetches the node params mentioned while adding Memcpy node. -2) Set Memcpy node params with hipGraphMemcpyNodeSetParams, - now get the params and verify both are same. -*/ - +#include #include -#include +#include -#define SIZE 10 -#define UPDATESIZE 8 - -/* Test verifies hipGraphMemcpyNodeGetParams API Negative scenarios. - */ -TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative") { - CHECK_IMAGE_SUPPORT - - constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; - hipArray_t devArray; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparms; - int* hData; - uint32_t size = width * height * depth * sizeof(int); - hData = reinterpret_cast(malloc(size)); - REQUIRE(hData != nullptr); - memset(hData, 0, size); - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } - } - } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, - height, depth), hipArrayDefault)); - memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.extent = make_hipExtent(width , height, depth); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparms.dstArray = devArray; - myparms.kind = hipMemcpyHostToDevice; - - hipGraph_t graph; - hipError_t ret; - hipGraphNode_t memcpyNode; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); - - SECTION("Pass node as nullptr") { - ret = hipGraphMemcpyNodeGetParams(nullptr, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass un-initilize node") { - hipGraphNode_t memcpyNode_uninit{}; - ret = hipGraphMemcpyNodeGetParams(memcpyNode_uninit, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass GetNodeParams as nullptr") { - ret = hipGraphMemcpyNodeGetParams(memcpyNode, nullptr); - REQUIRE(hipErrorInvalidValue == ret); - } - HIP_CHECK(hipFreeArray(devArray)); - free(hData); - HIP_CHECK(hipGraphDestroy(graph)); -} - -/* Test verifies hipGraphMemcpyNodeGetParams API Functional scenarios. +/** + * @addtogroup hipGraphMemcpyNodeGetParams hipGraphMemcpyNodeGetParams + * @{ + * @ingroup GraphTest + * `hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms *pNodeParams)` - + * Gets a memcpy node's parameters + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipGraphMemcpyNodeSetParams_Positive_Basic */ -static bool compareHipPos(hipPos hPos1, hipPos hPos2) { - if ((hPos1.x == hPos2.x) && (hPos1.y == hPos2.y) && (hPos1.z == hPos2.z)) - return true; - else - return false; -} -static bool compareHipExtent(hipExtent hExt1, hipExtent hExt2) { - if ((hExt1.width == hExt2.width) && (hExt1.height == hExt2.height) && - (hExt1.depth == hExt2.depth)) - return true; - else - return false; -} -static bool compareHipPitchedPtr(hipPitchedPtr hpPtr1, hipPitchedPtr hpPtr2) { - if ((reinterpret_cast(hpPtr1.ptr) == - reinterpret_cast(hpPtr2.ptr)) - && (hpPtr1.pitch == hpPtr2.pitch) - #if HT_AMD - && (hpPtr1.xsize == hpPtr2.xsize) - /* xsize check below is disabled on nvidia as xsize value - * is not being updated properly due to issue with CUDA api */ - #endif - && (hpPtr1.ysize == hpPtr2.ysize)) - return true; - else - return false; -} +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# pNodeParams is nullptr + * -# node is destroyed + * Test source + * ------------------------ + * - unit/graph/hipGraphMemcpyNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative_Parameters") { + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; -static bool memcpyNodeCompare(hipMemcpy3DParms *mNode1, - hipMemcpy3DParms *mNode2) { - if (mNode1->srcArray != mNode2->srcArray) - return false; - if (!compareHipPos(mNode1->srcPos, mNode2->srcPos)) - return false; - if (!compareHipPitchedPtr(mNode1->srcPtr, mNode2->srcPtr)) - return false; - if (mNode1->dstArray != mNode2->dstArray) - return false; - if (!compareHipPos(mNode1->dstPos, mNode2->dstPos)) - return false; - if (!compareHipPitchedPtr(mNode1->dstPtr, mNode2->dstPtr)) - return false; - if (!compareHipExtent(mNode1->extent, mNode2->extent)) - return false; - if (mNode1->kind != mNode2->kind) - return false; - return true; -} + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); -TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Functional") { - CHECK_IMAGE_SUPPORT + hipMemcpy3DParms params = {}; + params.srcPtr = src_alloc.pitched_ptr(); + params.srcPos = make_hipPos(0, 0, 0); + params.dstPtr = dst_alloc.pitched_ptr(); + params.dstPos = make_hipPos(0, 0, 0); + params.extent = extent; + params.kind = hipMemcpyDeviceToDevice; - constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; - hipArray_t devArray; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparms; - int* hData; - uint32_t size = width * height * depth * sizeof(int); - hData = reinterpret_cast(malloc(size)); - REQUIRE(hData != nullptr); - memset(hData, 0, size); - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } - } - } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, - height, depth), hipArrayDefault)); - memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.extent = make_hipExtent(width , height, depth); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparms.dstArray = devArray; - myparms.kind = hipMemcpyHostToDevice; + hipGraph_t graph = nullptr; + hipGraphNode_t node = nullptr; - hipGraph_t graph; - hipGraphNode_t memcpyNode; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); - - SECTION("Get Memcpy Param and verify.") { - hipMemcpy3DParms m3DGetParams; - REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode, - &m3DGetParams)); - // Validating the result - REQUIRE(true == memcpyNodeCompare(&myparms, &m3DGetParams)); + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(nullptr, ¶ms), hipErrorInvalidValue); } - SECTION("Set memcpy params and Get param and verify.") { - hipMemcpy3DParms myparms1, m3DGetParams1; - constexpr int width1{UPDATESIZE}, height1{UPDATESIZE}, depth1{UPDATESIZE}; - hipArray_t devArray1; - hipChannelFormatKind formatKind1 = hipChannelFormatKindSigned; - int* hData1; - uint32_t size1 = width1 * height1 * depth1 * sizeof(int); - hData1 = reinterpret_cast(malloc(size1)); - REQUIRE(hData1 != nullptr); - memset(hData1, 0, size1); - for (int i = 0; i < depth1; i++) { - for (int j = 0; j < height1; j++) { - for (int k = 0; k < width1; k++) { - hData1[i*width1*height1 + j*width1 + k] = i*width1*height1 + - j*width1 + k; - } - } - } - hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind1); - HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc1, - make_hipExtent(width1, height1, depth1), hipArrayDefault)); - memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); - myparms1.srcPos = make_hipPos(0, 0, 0); - myparms1.dstPos = make_hipPos(0, 0, 0); - myparms1.extent = make_hipExtent(width1 , height1, depth1); - myparms1.srcPtr = make_hipPitchedPtr(hData1, width1 * sizeof(int), - width1, height1); - myparms1.dstArray = devArray1; - myparms1.kind = hipMemcpyHostToDevice; - - REQUIRE(hipSuccess == hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1)); - REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode, - &m3DGetParams1)); - REQUIRE(true == memcpyNodeCompare(&myparms1, &m3DGetParams1)); - - HIP_CHECK(hipFreeArray(devArray1)); - free(hData1); + SECTION("pNodeParams == nullptr") { + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms)); + HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(node, nullptr), hipErrorInvalidValue); + HIP_CHECK(hipGraphDestroy(graph)); } - HIP_CHECK(hipFreeArray(devArray)); - free(hData); - HIP_CHECK(hipGraphDestroy(graph)); -} + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-208 + SECTION("Node is destroyed") { + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK_ERROR(hipGraphMemcpyNodeGetParams(node, ¶ms), hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams_old.cc new file mode 100644 index 0000000000..34a9ff7d48 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeGetParams_old.cc @@ -0,0 +1,236 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : +Negative - +1) Pass node as nullptr and verify api returns error code. +2) Pass un-initialize node and verify api returns error code. +3) Pass pNodeParams as nullptr and verify api returns error code. +Functional - +1) Create a graph, add Memcpy node to graph with desired node params. + Verify api fetches the node params mentioned while adding Memcpy node. +2) Set Memcpy node params with hipGraphMemcpyNodeSetParams, + now get the params and verify both are same. +*/ + +#include +#include + +#define SIZE 10 +#define UPDATESIZE 8 + +/* Test verifies hipGraphMemcpyNodeGetParams API Negative scenarios. + */ +TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Negative") { + CHECK_IMAGE_SUPPORT + + constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; + hipArray_t devArray; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparms; + int* hData; + uint32_t size = width * height * depth * sizeof(int); + hData = reinterpret_cast(malloc(size)); + REQUIRE(hData != nullptr); + memset(hData, 0, size); + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, + height, depth), hipArrayDefault)); + memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.extent = make_hipExtent(width , height, depth); + myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparms.dstArray = devArray; + myparms.kind = hipMemcpyHostToDevice; + + hipGraph_t graph; + hipError_t ret; + hipGraphNode_t memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); + + SECTION("Pass node as nullptr") { + ret = hipGraphMemcpyNodeGetParams(nullptr, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass un-initilize node") { + hipGraphNode_t memcpyNode_uninit{}; + ret = hipGraphMemcpyNodeGetParams(memcpyNode_uninit, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass GetNodeParams as nullptr") { + ret = hipGraphMemcpyNodeGetParams(memcpyNode, nullptr); + REQUIRE(hipErrorInvalidValue == ret); + } + HIP_CHECK(hipFreeArray(devArray)); + free(hData); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/* Test verifies hipGraphMemcpyNodeGetParams API Functional scenarios. + */ + +static bool compareHipPos(hipPos hPos1, hipPos hPos2) { + if ((hPos1.x == hPos2.x) && (hPos1.y == hPos2.y) && (hPos1.z == hPos2.z)) + return true; + else + return false; +} +static bool compareHipExtent(hipExtent hExt1, hipExtent hExt2) { + if ((hExt1.width == hExt2.width) && (hExt1.height == hExt2.height) && + (hExt1.depth == hExt2.depth)) + return true; + else + return false; +} +static bool compareHipPitchedPtr(hipPitchedPtr hpPtr1, hipPitchedPtr hpPtr2) { + if ((reinterpret_cast(hpPtr1.ptr) == + reinterpret_cast(hpPtr2.ptr)) + && (hpPtr1.pitch == hpPtr2.pitch) + #if HT_AMD + && (hpPtr1.xsize == hpPtr2.xsize) + /* xsize check below is disabled on nvidia as xsize value + * is not being updated properly due to issue with CUDA api */ + #endif + && (hpPtr1.ysize == hpPtr2.ysize)) + return true; + else + return false; +} + +static bool memcpyNodeCompare(hipMemcpy3DParms *mNode1, + hipMemcpy3DParms *mNode2) { + if (mNode1->srcArray != mNode2->srcArray) + return false; + if (!compareHipPos(mNode1->srcPos, mNode2->srcPos)) + return false; + if (!compareHipPitchedPtr(mNode1->srcPtr, mNode2->srcPtr)) + return false; + if (mNode1->dstArray != mNode2->dstArray) + return false; + if (!compareHipPos(mNode1->dstPos, mNode2->dstPos)) + return false; + if (!compareHipPitchedPtr(mNode1->dstPtr, mNode2->dstPtr)) + return false; + if (!compareHipExtent(mNode1->extent, mNode2->extent)) + return false; + if (mNode1->kind != mNode2->kind) + return false; + return true; +} + +TEST_CASE("Unit_hipGraphMemcpyNodeGetParams_Functional") { + CHECK_IMAGE_SUPPORT + + constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; + hipArray_t devArray; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparms; + int* hData; + uint32_t size = width * height * depth * sizeof(int); + hData = reinterpret_cast(malloc(size)); + REQUIRE(hData != nullptr); + memset(hData, 0, size); + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, + height, depth), hipArrayDefault)); + memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.extent = make_hipExtent(width , height, depth); + myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparms.dstArray = devArray; + myparms.kind = hipMemcpyHostToDevice; + + hipGraph_t graph; + hipGraphNode_t memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); + + SECTION("Get Memcpy Param and verify.") { + hipMemcpy3DParms m3DGetParams; + REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode, + &m3DGetParams)); + // Validating the result + REQUIRE(true == memcpyNodeCompare(&myparms, &m3DGetParams)); + } + + SECTION("Set memcpy params and Get param and verify.") { + hipMemcpy3DParms myparms1, m3DGetParams1; + constexpr int width1{UPDATESIZE}, height1{UPDATESIZE}, depth1{UPDATESIZE}; + hipArray_t devArray1; + hipChannelFormatKind formatKind1 = hipChannelFormatKindSigned; + int* hData1; + uint32_t size1 = width1 * height1 * depth1 * sizeof(int); + hData1 = reinterpret_cast(malloc(size1)); + REQUIRE(hData1 != nullptr); + memset(hData1, 0, size1); + for (int i = 0; i < depth1; i++) { + for (int j = 0; j < height1; j++) { + for (int k = 0; k < width1; k++) { + hData1[i*width1*height1 + j*width1 + k] = i*width1*height1 + + j*width1 + k; + } + } + } + hipChannelFormatDesc channelDesc1 = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind1); + HIP_CHECK(hipMalloc3DArray(&devArray1, &channelDesc1, + make_hipExtent(width1, height1, depth1), hipArrayDefault)); + memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); + myparms1.srcPos = make_hipPos(0, 0, 0); + myparms1.dstPos = make_hipPos(0, 0, 0); + myparms1.extent = make_hipExtent(width1 , height1, depth1); + myparms1.srcPtr = make_hipPitchedPtr(hData1, width1 * sizeof(int), + width1, height1); + myparms1.dstArray = devArray1; + myparms1.kind = hipMemcpyHostToDevice; + + REQUIRE(hipSuccess == hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1)); + REQUIRE(hipSuccess == hipGraphMemcpyNodeGetParams(memcpyNode, + &m3DGetParams1)); + REQUIRE(true == memcpyNodeCompare(&myparms1, &m3DGetParams1)); + + HIP_CHECK(hipFreeArray(devArray1)); + free(hData1); + } + HIP_CHECK(hipFreeArray(devArray)); + free(hData); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams.cc index 2942729960..d56596164f 100644 --- a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams.cc +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -17,203 +20,264 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/** -Testcase Scenarios : -Negative - -1) Pass node as nullptr and verify api returns error code. -2) Pass un-initialize node and verify api returns error code. -3) Pass pNodeParams as nullptr and verify api returns error code. -Functional - -1) Add Memcpy node to graph, update the Memcpy node params with set and - launch the graph and check updated params are taking effect. -2) Add Memcpy node to graph, launch graph, then update the Memcpy node params - with set and launch the graph and check updated params are taking effect. -*/ +#include #include -#include +#include +#include -#define SIZE 10 +#include "graph_tests_common.hh" -/* Test verifies hipGraphMemcpyNodeSetParams API Negative scenarios. +/** + * @addtogroup hipGraphMemcpyNodeSetParams hipGraphMemcpyNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphMemcpyNodeSetParams (hipGraphNode_t node, const hipMemcpy3DParms *pNodeParams)` - Sets a + * memcpy node's parameters */ -TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative") { - CHECK_IMAGE_SUPPORT - constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; - hipArray_t devArray; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparms; - int* hData; - uint32_t size = width * height * depth * sizeof(int); - hData = reinterpret_cast(malloc(size)); - REQUIRE(hData != nullptr); - memset(hData, 0, size); - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values after which the graph is + * executed and the results of the memcpy verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipGraphMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Positive_Basic") { + constexpr bool async = false; + + SECTION("Device to host") { + Memcpy3DDeviceToHostShell(Memcpy3DWrapper); + } + + SECTION("Device to host with default kind") { + Memcpy3DDeviceToHostShell(Memcpy3DWrapper); + } + + SECTION("Host to device") { + Memcpy3DHostToDeviceShell(Memcpy3DWrapper); + } + + SECTION("Host to device with default kind") { + Memcpy3DHostToDeviceShell(Memcpy3DWrapper); + } + + SECTION("Host to host") { Memcpy3DHostToHostShell(Memcpy3DWrapper); } + + SECTION("Host to host with default kind") { + Memcpy3DHostToHostShell(Memcpy3DWrapper); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); } } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, - height, depth), hipArrayDefault)); - memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.extent = make_hipExtent(width , height, depth); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparms.dstArray = devArray; - myparms.kind = hipMemcpyHostToDevice; - hipGraph_t graph; - hipError_t ret; - hipGraphNode_t memcpyNode; - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); - - SECTION("Pass node as nullptr") { - ret = hipGraphMemcpyNodeSetParams(nullptr, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass un-initialize node") { - hipGraphNode_t memcpyNode_uninit{}; - ret = hipGraphMemcpyNodeSetParams(memcpyNode_uninit, &myparms); - REQUIRE(hipErrorInvalidValue == ret); - } - SECTION("Pass SetNodeParams as nullptr") { - ret = hipGraphMemcpyNodeSetParams(memcpyNode, nullptr); - REQUIRE(hipErrorInvalidValue == ret); - } - HIP_CHECK(hipFreeArray(devArray)); - free(hData); - HIP_CHECK(hipGraphDestroy(graph)); -} - -/* Test verifies hipGraphMemcpyNodeSetParams API Functional scenarios. - */ -TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Functional") { - CHECK_IMAGE_SUPPORT - - constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; - hipArray_t devArray; - hipChannelFormatKind formatKind = hipChannelFormatKindSigned; - hipMemcpy3DParms myparms, myparms1; - uint32_t size = width * height * depth * sizeof(int); - - int *hData = reinterpret_cast(malloc(size)); - REQUIRE(hData != nullptr); - memset(hData, 0, size); - int *hDataTemp = reinterpret_cast(malloc(size)); - REQUIRE(hDataTemp != nullptr); - memset(hDataTemp, 0, size); - int *hOutputData = reinterpret_cast(malloc(size)); - REQUIRE(hOutputData != nullptr); - memset(hOutputData, 0, size); - int *hOutputData1 = reinterpret_cast(malloc(size)); - REQUIRE(hOutputData1 != nullptr); - memset(hOutputData1, 0, size); - - for (int i = 0; i < depth; i++) { - for (int j = 0; j < height; j++) { - for (int k = 0; k < width; k++) { - hData[i*width*height + j*width + k] = i*width*height + j*width + k; - } + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(Memcpy3DWrapper); } } - hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, - 0, 0, 0, formatKind); - HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, - height, depth), hipArrayDefault)); - memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); - // Host to Device - myparms.srcPos = make_hipPos(0, 0, 0); - myparms.dstPos = make_hipPos(0, 0, 0); - myparms.extent = make_hipExtent(width , height, depth); - myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), - width, height); - myparms.dstArray = devArray; - myparms.kind = hipMemcpyHostToDevice; - - hipGraph_t graph; - hipGraphNode_t memcpyNode; - std::vector dependencies; - hipStream_t streamForGraph; - hipGraphExec_t graphExec; - - HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphCreate(&graph, 0)); - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); - dependencies.push_back(memcpyNode); - - // Device to host - memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); - myparms1.srcPos = make_hipPos(0, 0, 0); - myparms1.dstPos = make_hipPos(0, 0, 0); - myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int), - width, height); - myparms1.srcArray = devArray; - myparms1.extent = make_hipExtent(width, height, depth); - myparms1.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparms1)); - - SECTION("Update the memcpyNode and check") { - // Device to host with updated host ptr hDataTemp -> hOutputData - memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); - myparms1.srcPos = make_hipPos(0, 0, 0); - myparms1.dstPos = make_hipPos(0, 0, 0); - myparms1.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int), - width, height); - myparms1.srcArray = devArray; - myparms1.extent = make_hipExtent(width, height, depth); - myparms1.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Check result - HipTest::checkArray(hData, hOutputData, width, height, depth); + SECTION("Array from/to Host") { + Memcpy3DArrayHostShell(Memcpy3DWrapper); } - SECTION("Update the memcpyNode again and check") { - // Device to host with updated host ptr hOutputData -> hOutputData1 - memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); - myparms1.srcPos = make_hipPos(0, 0, 0); - myparms1.dstPos = make_hipPos(0, 0, 0); - myparms1.dstPtr = make_hipPitchedPtr(hOutputData1, width * sizeof(int), - width, height); - myparms1.srcArray = devArray; - myparms1.extent = make_hipExtent(width, height, depth); - myparms1.kind = hipMemcpyDeviceToHost; - - HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), - dependencies.size(), &myparms1)); - HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1)); - - // Instantiate and launch the graph - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); - HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); - HIP_CHECK(hipStreamSynchronize(streamForGraph)); - - // Check result - HipTest::checkArray(hData, hOutputData1, width, height, depth); +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220 + SECTION("Array from/to Device") { + Memcpy3DArrayDeviceShell(Memcpy3DWrapper); } - HIP_CHECK(hipGraphExecDestroy(graphExec)); - HIP_CHECK(hipGraphDestroy(graph)); - HIP_CHECK(hipStreamDestroy(streamForGraph)); - HIP_CHECK(hipFreeArray(devArray)); - free(hData); - free(hDataTemp); - free(hOutputData); - free(hOutputData1); +#endif } + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# kind is an invalid enum value + * -# count is zero + * -# count is larger than dst allocation size + * -# count is larger than src allocation size + * Test source + * ------------------------ + * - unit/graph/hipGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative_Parameters") { + using namespace std::placeholders; + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + SECTION("node == nullptr") { + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(nullptr, ¶ms), hipErrorInvalidValue); + } + + SECTION("dst_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("src_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("dst_ptr.pitch < width") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidPitchValue); + } + + SECTION("src_ptr.pitch < width") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidPitchValue); + } + + SECTION("dst_ptr.pitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = attr; + auto params = GetMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("src_ptr.pitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = attr; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("extent.width + dst_pos.x > dst_ptr.pitch") { + hipPos invalid_pos = dst_pos; + invalid_pos.x = dst_ptr.pitch - extent.width + 1; + auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("extent.width + src_pos.x > src_ptr.pitch") { + hipPos invalid_pos = src_pos; + invalid_pos.x = src_ptr.pitch - extent.width + 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("dst_pos.y out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.y = 1; + auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("src_pos.y out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.y = 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("dst_pos.z out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.z = 1; + auto params = GetMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("src_pos.z out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.z = 1; + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidValue); + } + + SECTION("Invalid MemcpyKind") { + auto params = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, + static_cast(-1)); + HIP_CHECK_ERROR(hipGraphMemcpyNodeSetParams(node, ¶ms), hipErrorInvalidMemcpyDirection); + } + + HIP_CHECK(hipGraphDestroy(graph)); + }; + + SECTION("Host to Device") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), + make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice); + } + + SECTION("Device to Host") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent, + hipMemcpyDeviceToHost); + } + + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), + make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost); + } + + SECTION("Device to Device") { + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice); + } +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams_old.cc b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams_old.cc new file mode 100644 index 0000000000..2942729960 --- /dev/null +++ b/projects/hip-tests/catch/unit/graph/hipGraphMemcpyNodeSetParams_old.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenarios : +Negative - +1) Pass node as nullptr and verify api returns error code. +2) Pass un-initialize node and verify api returns error code. +3) Pass pNodeParams as nullptr and verify api returns error code. +Functional - +1) Add Memcpy node to graph, update the Memcpy node params with set and + launch the graph and check updated params are taking effect. +2) Add Memcpy node to graph, launch graph, then update the Memcpy node params + with set and launch the graph and check updated params are taking effect. +*/ + +#include +#include + +#define SIZE 10 + +/* Test verifies hipGraphMemcpyNodeSetParams API Negative scenarios. + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Negative") { + CHECK_IMAGE_SUPPORT + + constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; + hipArray_t devArray; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparms; + int* hData; + uint32_t size = width * height * depth * sizeof(int); + hData = reinterpret_cast(malloc(size)); + REQUIRE(hData != nullptr); + memset(hData, 0, size); + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, + height, depth), hipArrayDefault)); + memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.extent = make_hipExtent(width , height, depth); + myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparms.dstArray = devArray; + myparms.kind = hipMemcpyHostToDevice; + + hipGraph_t graph; + hipError_t ret; + hipGraphNode_t memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); + + SECTION("Pass node as nullptr") { + ret = hipGraphMemcpyNodeSetParams(nullptr, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass un-initialize node") { + hipGraphNode_t memcpyNode_uninit{}; + ret = hipGraphMemcpyNodeSetParams(memcpyNode_uninit, &myparms); + REQUIRE(hipErrorInvalidValue == ret); + } + SECTION("Pass SetNodeParams as nullptr") { + ret = hipGraphMemcpyNodeSetParams(memcpyNode, nullptr); + REQUIRE(hipErrorInvalidValue == ret); + } + HIP_CHECK(hipFreeArray(devArray)); + free(hData); + HIP_CHECK(hipGraphDestroy(graph)); +} + +/* Test verifies hipGraphMemcpyNodeSetParams API Functional scenarios. + */ +TEST_CASE("Unit_hipGraphMemcpyNodeSetParams_Functional") { + CHECK_IMAGE_SUPPORT + + constexpr int width{SIZE}, height{SIZE}, depth{SIZE}; + hipArray_t devArray; + hipChannelFormatKind formatKind = hipChannelFormatKindSigned; + hipMemcpy3DParms myparms, myparms1; + uint32_t size = width * height * depth * sizeof(int); + + int *hData = reinterpret_cast(malloc(size)); + REQUIRE(hData != nullptr); + memset(hData, 0, size); + int *hDataTemp = reinterpret_cast(malloc(size)); + REQUIRE(hDataTemp != nullptr); + memset(hDataTemp, 0, size); + int *hOutputData = reinterpret_cast(malloc(size)); + REQUIRE(hOutputData != nullptr); + memset(hOutputData, 0, size); + int *hOutputData1 = reinterpret_cast(malloc(size)); + REQUIRE(hOutputData1 != nullptr); + memset(hOutputData1, 0, size); + + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + hData[i*width*height + j*width + k] = i*width*height + j*width + k; + } + } + } + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(sizeof(int)*8, + 0, 0, 0, formatKind); + HIP_CHECK(hipMalloc3DArray(&devArray, &channelDesc, make_hipExtent(width, + height, depth), hipArrayDefault)); + memset(&myparms, 0x0, sizeof(hipMemcpy3DParms)); + + // Host to Device + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.extent = make_hipExtent(width , height, depth); + myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(int), + width, height); + myparms.dstArray = devArray; + myparms.kind = hipMemcpyHostToDevice; + + hipGraph_t graph; + hipGraphNode_t memcpyNode; + std::vector dependencies; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, NULL, 0, &myparms)); + dependencies.push_back(memcpyNode); + + // Device to host + memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); + myparms1.srcPos = make_hipPos(0, 0, 0); + myparms1.dstPos = make_hipPos(0, 0, 0); + myparms1.dstPtr = make_hipPitchedPtr(hDataTemp, width * sizeof(int), + width, height); + myparms1.srcArray = devArray; + myparms1.extent = make_hipExtent(width, height, depth); + myparms1.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparms1)); + + SECTION("Update the memcpyNode and check") { + // Device to host with updated host ptr hDataTemp -> hOutputData + memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); + myparms1.srcPos = make_hipPos(0, 0, 0); + myparms1.dstPos = make_hipPos(0, 0, 0); + myparms1.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(int), + width, height); + myparms1.srcArray = devArray; + myparms1.extent = make_hipExtent(width, height, depth); + myparms1.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Check result + HipTest::checkArray(hData, hOutputData, width, height, depth); + } + + SECTION("Update the memcpyNode again and check") { + // Device to host with updated host ptr hOutputData -> hOutputData1 + memset(&myparms1, 0x0, sizeof(hipMemcpy3DParms)); + myparms1.srcPos = make_hipPos(0, 0, 0); + myparms1.dstPos = make_hipPos(0, 0, 0); + myparms1.dstPtr = make_hipPitchedPtr(hOutputData1, width * sizeof(int), + width, height); + myparms1.srcArray = devArray; + myparms1.extent = make_hipExtent(width, height, depth); + myparms1.kind = hipMemcpyDeviceToHost; + + HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, dependencies.data(), + dependencies.size(), &myparms1)); + HIP_CHECK(hipGraphMemcpyNodeSetParams(memcpyNode, &myparms1)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Check result + HipTest::checkArray(hData, hOutputData1, width, height, depth); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFreeArray(devArray)); + free(hData); + free(hDataTemp); + free(hOutputData); + free(hOutputData1); +} From 18e922a48a33786096e8cfd02db738082c6ecba6 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Fri, 29 Dec 2023 16:30:00 +0000 Subject: [PATCH 11/19] EXSWHTEC-260 - Implement new and extend existing tests for thread_block_tile #152 Change-Id: I797d895f582d01efa9a2657b0fbd3dc69049f80b [ROCm/hip-tests commit: 9ab359fa172ff2233ba97c9a81a3e4d5cc61b771] --- .../catch/unit/cooperativeGrps/CMakeLists.txt | 2 + .../unit/cooperativeGrps/thread_block_tile.cc | 553 ++++++++++++++++++ 2 files changed, 555 insertions(+) create mode 100644 projects/hip-tests/catch/unit/cooperativeGrps/thread_block_tile.cc diff --git a/projects/hip-tests/catch/unit/cooperativeGrps/CMakeLists.txt b/projects/hip-tests/catch/unit/cooperativeGrps/CMakeLists.txt index 8bf1ce439b..1a5f234e3b 100644 --- a/projects/hip-tests/catch/unit/cooperativeGrps/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/cooperativeGrps/CMakeLists.txt @@ -1,5 +1,7 @@ # Common Tests - Test independent of all platforms set(TEST_SRC + thread_block.cc + thread_block_tile.cc hipCGThreadBlockType_old.cc hipCGMultiGridGroupType_old.cc hipCGGridGroupType_old.cc diff --git a/projects/hip-tests/catch/unit/cooperativeGrps/thread_block_tile.cc b/projects/hip-tests/catch/unit/cooperativeGrps/thread_block_tile.cc new file mode 100644 index 0000000000..599d7b6da6 --- /dev/null +++ b/projects/hip-tests/catch/unit/cooperativeGrps/thread_block_tile.cc @@ -0,0 +1,553 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "cooperative_groups_common.hh" + +#include +#include + +#include +#include +#include +#include +#include +#include + + +/** + * @addtogroup thread_block_tile thread_block_tile + * @{ + * @ingroup DeviceLanguageTest + * Contains unit tests for all thread_block_tile APIs and dynamic block partitioning + */ + +namespace cg = cooperative_groups; + +template +__global__ void thread_block_partition_size_getter(unsigned int* sizes) { + const auto group = cg::this_thread_block(); + if constexpr (dynamic) { + sizes[thread_rank_in_grid()] = cg::tiled_partition(group, tile_size).size(); + } else { + cg::thread_block_tile tiled_partition = cg::tiled_partition(group); + sizes[thread_rank_in_grid()] = tiled_partition.size(); + } +} + +template +__global__ void thread_block_partition_thread_rank_getter(unsigned int* thread_ranks) { + const auto group = cg::this_thread_block(); + if constexpr (dynamic) { + thread_ranks[thread_rank_in_grid()] = cg::tiled_partition(group, tile_size).thread_rank(); + } else { + cg::thread_block_tile tiled_partition = cg::tiled_partition(group); + thread_ranks[thread_rank_in_grid()] = tiled_partition.thread_rank(); + } +} + +template void BlockPartitionGettersBasicTestImpl() { + DYNAMIC_SECTION("Tile size: " << tile_size) { + auto blocks = GenerateBlockDimensions(); + auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(unsigned int); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + thread_block_partition_size_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + thread_block_partition_thread_rank_getter + <<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, [&grid](unsigned int i) { + if constexpr (!dynamic) { + return tile_size; + } + + const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size; + const auto rank_in_block = grid.thread_rank_in_block(i).value(); + + const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_; + return tile_size - tail * (rank_in_block >= (partitions_in_block - 1) * tile_size); + }); + + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, [&grid](unsigned int i) { + return grid.thread_rank_in_block(i).value() % tile_size; + }); + } +} + +template void BlockPartitionGettersBasicTest() { + static_cast((BlockPartitionGettersBasicTestImpl(), ...)); +} + +/** + * Test Description + * ------------------------ + * - Creates tiled partitions for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes + * the return values of size and thread_rank member functions to an output array that is validated + * on the host side. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Thread_Block_Tile_Getters_Positive_Basic") { + BlockPartitionGettersBasicTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockPartitionGettersBasicTest(); +#endif +} + +/** + * Test Description + * ------------------------ + * - Creates tiled partitions for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} via the + * dynamic tiled partition api and writes the return values of size and thread_rank member functions + * to an output array that is validated on host. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Thread_Block_Tile_Dynamic_Getters_Positive_Basic") { + BlockPartitionGettersBasicTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockPartitionGettersBasicTest(); +#endif +} + + +template +__global__ void block_tile_shfl_up(T* const out, const unsigned int delta) { + const cg::thread_block_tile partition = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(partition.thread_rank()); + out[thread_rank_in_grid()] = partition.shfl_up(var, delta); +} + +template void BlockTileShflUpTestImpl() { + DYNAMIC_SECTION("Tile size: " << tile_size) { + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + auto delta = GENERATE(range(static_cast(0), tile_size)); + INFO("Delta: " << delta); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + + block_tile_shfl_up<<>>(arr_dev.ptr(), delta); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + ArrayAllOf(arr.ptr(), grid.thread_count_, [delta, &grid](unsigned int i) -> std::optional { + const int rank_in_partition = grid.thread_rank_in_block(i).value() % tile_size; + const int target = rank_in_partition - delta; + return target < 0 ? rank_in_partition : target; + }); + } +} + +template void BlockTileShflUpTest() { + static_cast((BlockTileShflUpTestImpl(), ...)); +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle up behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The test is run for all overloads of shfl_up. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Up_Positive_Basic", "", int, unsigned int, long, + unsigned long, long long, unsigned long long, float, double) { + BlockTileShflUpTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockTileShflUpTest(); +#endif +} + + +template +__global__ void block_tile_shfl_down(T* const out, const unsigned int delta) { + const cg::thread_block_tile partition = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(partition.thread_rank()); + out[thread_rank_in_grid()] = partition.shfl_down(var, delta); +} + +template void BlockTileShflDownTestImpl() { + DYNAMIC_SECTION("Tile size: " << tile_size) { + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + auto delta = GENERATE(range(static_cast(0), tile_size)); + INFO("Delta: " << delta); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + + block_tile_shfl_down<<>>(arr_dev.ptr(), delta); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + ArrayAllOf(arr.ptr(), grid.thread_count_, [delta, &grid](unsigned int i) -> std::optional { + const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size; + const auto rank_in_block = grid.thread_rank_in_block(i).value(); + const auto rank_in_group = rank_in_block % tile_size; + const auto target = rank_in_group + delta; + if (rank_in_block < (partitions_in_block - 1) * tile_size) { + return target < tile_size ? target : rank_in_group; + } else { + // If the number of threads in a block is not an integer multiple of tile_size, the + // final(tail end) tile will contain inactive threads. + // Shuffling from an inactive thread returns an undefined value, accordingly threads that + // shuffle from one must be skipped + const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_; + return target < tile_size - tail ? std::optional(target) : std::nullopt; + } + }); + } +} + +template void BlockTileShflDownTest() { + static_cast((BlockTileShflDownTestImpl(), ...)); +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle down behavior of thread block tiles of all valid sizes{2, 4, 8, 16, + * 32, 64(if AMD)} for delta values of [0, tile size). The test is run for all overloads of + * shfl_down. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Down_Positive_Basic", "", int, unsigned int, long, + unsigned long, long long, unsigned long long, float, double) { + BlockTileShflDownTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockTileShflDownTest(); +#endif +} + + +template +__global__ void block_tile_shfl_xor(T* const out, const unsigned mask) { + const cg::thread_block_tile partition = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(partition.thread_rank()); + out[thread_rank_in_grid()] = partition.shfl_xor(var, mask); +} + +template void BlockTileShflXORTestImpl() { + DYNAMIC_SECTION("Tile size: " << tile_size) { + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + const auto mask = GENERATE(range(static_cast(0), tile_size)); + INFO("Mask: 0x" << std::hex << mask); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + + block_tile_shfl_xor<<>>(arr_dev.ptr(), mask); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto f = [mask, &grid](unsigned int i) -> std::optional { + const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size; + const auto rank_in_block = grid.thread_rank_in_block(i).value(); + const int rank_in_partition = rank_in_block % tile_size; + const auto target = rank_in_partition ^ mask; + if (rank_in_block < (partitions_in_block - 1) * tile_size) { + return target; + } + const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_; + return target < tile_size - tail ? std::optional(target) : std::nullopt; + }; + ArrayAllOf(arr.ptr(), grid.thread_count_, f); + } +} + +template void BlockTileShflXORTest() { + static_cast((BlockTileShflXORTestImpl(), ...)); +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle xor behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for mask values of [0, tile size). The test is run for all overloads of shfl_xor. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_XOR_Positive_Basic", "", int, unsigned int, long, + unsigned long, long long, unsigned long long, float, double) { + BlockTileShflXORTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockTileShflXORTest(); +#endif +} + +template +__global__ void block_tile_shfl(T* const out, uint8_t* target_lanes) { + const cg::thread_block_tile partition = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(partition.thread_rank()); + out[thread_rank_in_grid()] = partition.shfl(var, target_lanes[partition.thread_rank()]); +} + +static inline std::mt19937& GetRandomGenerator() { + static std::mt19937 mt(11); + return mt; +} + +template static inline T GenerateRandomInteger(const T min, const T max) { + std::uniform_int_distribution dist(min, max); + return dist(GetRandomGenerator()); +} + +template void BlockTileShflTestImpl() { + DYNAMIC_SECTION("Tile size: " << tile_size) { + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + + LinearAllocGuard target_lanes_dev(LinearAllocs::hipMalloc, + tile_size * sizeof(uint8_t)); + LinearAllocGuard target_lanes(LinearAllocs::hipHostMalloc, + tile_size * sizeof(uint8_t)); + std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size, + [] { return GenerateRandomInteger(0, static_cast(2 * tile_size)); }); + + HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t), + hipMemcpyHostToDevice)); + block_tile_shfl<<>>(arr_dev.ptr(), target_lanes_dev.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto f = [&target_lanes, &grid](unsigned int i) -> std::optional { + const auto partitions_in_block = (grid.threads_in_block_count_ + tile_size - 1) / tile_size; + const auto rank_in_block = grid.thread_rank_in_block(i).value(); + const int rank_in_partition = rank_in_block % tile_size; + const auto target = target_lanes.ptr()[rank_in_partition] % tile_size; + if (rank_in_block < (partitions_in_block - 1) * tile_size) { + return target; + } + const auto tail = partitions_in_block * tile_size - grid.threads_in_block_count_; + return target < tile_size - tail ? std::optional(target) : std::nullopt; + }; + ArrayAllOf(arr.ptr(), grid.thread_count_, f); + } +} + +template void BlockTileShflTest() { + static_cast((BlockTileShflTestImpl(), ...)); +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle behavior of thread block tiles of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for generated shuffle target lanes. The test is run for all overloads of shfl. Test + * source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Shfl_Positive_Basic", "", int, unsigned int, long, + unsigned long, long long, unsigned long long, float, double) { + BlockTileShflTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockTileShflTest(); +#endif +} + + +template +__global__ void block_tile_sync_check(T* global_data, unsigned int* wait_modifiers) { + extern __shared__ uint8_t shared_data[]; + T* const data = use_global ? global_data : reinterpret_cast(shared_data); + const auto tid = cg::this_grid().thread_rank(); + const auto block = cg::this_thread_block(); + const cg::thread_block_tile partition = + cg::tiled_partition(cg::this_thread_block()); + + const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); }; + + const auto partitions_in_block = (block.size() + partition.size() - 1) / partition.size(); + const auto partition_rank = block.thread_rank() / partition.size(); + const auto tail = partitions_in_block * partition.size() - block.size(); + const auto window_size = partition.size() - tail * (partition_rank == partitions_in_block - 1); + + const auto block_base_idx = tid / block.size() * block.size(); + const auto tile_base_idx = block_base_idx + partition_rank * partition.size(); + + const auto wait_modifier = wait_modifiers[tid]; + busy_wait(wait_modifier); + data[data_idx(tid)] = partition.thread_rank(); + partition.sync(); + bool valid = true; + for (auto i = 0; i < window_size; ++i) { + const auto expected = (partition.thread_rank() + i) % window_size; + + if (!(valid &= (data[data_idx(tile_base_idx + expected)] == expected))) { + break; + } + } + partition.sync(); + data[data_idx(tid)] = valid; + if constexpr (!use_global) { + global_data[tid] = data[data_idx(tid)]; + } +} + +template void BlockTileSyncTestImpl() { + DYNAMIC_SECTION("Tile size: " << tile_size) { + const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations)); + INFO("Run number: " << randomized_run_count + 1); + auto blocks = GenerateBlockDimensions(); + auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + const auto alloc_size_per_block = alloc_size / grid.block_count_; + int max_shared_mem_per_block = 0; + HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block, + hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) { + return; + } + + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + LinearAllocGuard wait_modifiers_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard wait_modifiers(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + if (randomized_run_count != 0) { + std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_, + [] { return GenerateRandomInteger(0u, 1500u); }); + } else { + std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u); + } + + const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block; + HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(), + grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice)); + + block_tile_sync_check + <<>>(arr_dev.ptr(), wait_modifiers_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE( + std::all_of(arr.ptr(), arr.ptr() + grid.thread_count_, [](unsigned int e) { return e; })); + } +} + +template void BlockTileSyncTest() { + static_cast((BlockTileSyncTestImpl(), ...)); +} + +/** + * Test Description + * ------------------------ + * - Launches a kernel wherein blocks are divided into tiled partitions(size of 2, 4, 8, 16, 32, + * 64 if AMD) and every thread writes its intra-tile rank into an array slot determined by its + * grid-wide linear index. The array is either in global or dynamic shared memory based on a compile + * time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the write each + * thread executes a busy wait loop for a random amount of clock cycles, the amount being read from + * an input array. After the write a tile-wide sync is performed and each thread validates that it + * can read the expected values that other threads within the same tile have written to their + * respective array slots. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block_tile.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Thread_Block_Tile_Sync_Positive_Basic", "", uint8_t, uint16_t, uint32_t) { + SECTION("Global memory") { + BlockTileSyncTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockTileSyncTest(); +#endif + } + SECTION("Shared memory") { + BlockTileSyncTest(); +#if HT_AMD && (__GFX8__ || __GFX9__) + BlockTileSyncTest(); +#endif + } +} From a2e616488ed9272dde3e99e80d2abbc44e12f06a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:24:26 +0100 Subject: [PATCH 12/19] EXSWHTEC-252 - Implement Kernel Launch Performance Tests #139 Change-Id: Ib27db722a31ac0cd7ad1942722c6eba62087defb [ROCm/hip-tests commit: 2be686091614da09f728b686fa4542e77a15f7f5] --- .../catch/performance/CMakeLists.txt | 3 + .../performance/kernelLaunch/CMakeLists.txt | 37 +++++ .../kernelLaunch/hipExtLaunchKernel.cc | 120 ++++++++++++++++ .../hipLaunchCooperativeKernel.cc | 130 ++++++++++++++++++ .../kernelLaunch/hipLaunchKernel.cc | 118 ++++++++++++++++ .../kernelLaunch/kernel_launch_common.cc | 39 ++++++ .../kernelLaunch/kernel_launch_common.hh | 116 ++++++++++++++++ .../kernelLaunch/triple_chevron.cc | 105 ++++++++++++++ 8 files changed, 668 insertions(+) create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/CMakeLists.txt create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/hipExtLaunchKernel.cc create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/hipLaunchCooperativeKernel.cc create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/hipLaunchKernel.cc create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.cc create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.hh create mode 100644 projects/hip-tests/catch/performance/kernelLaunch/triple_chevron.cc diff --git a/projects/hip-tests/catch/performance/CMakeLists.txt b/projects/hip-tests/catch/performance/CMakeLists.txt index 0c6962c596..2778dab03d 100644 --- a/projects/hip-tests/catch/performance/CMakeLists.txt +++ b/projects/hip-tests/catch/performance/CMakeLists.txt @@ -18,6 +18,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +add_subdirectory(memset) +add_subdirectory(memcpy) +add_subdirectory(kernelLaunch) add_subdirectory(stream) add_subdirectory(event) add_subdirectory(example) diff --git a/projects/hip-tests/catch/performance/kernelLaunch/CMakeLists.txt b/projects/hip-tests/catch/performance/kernelLaunch/CMakeLists.txt new file mode 100644 index 0000000000..a768fc4fd3 --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/CMakeLists.txt @@ -0,0 +1,37 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + kernel_launch_common.cc + triple_chevron.cc + hipLaunchKernel.cc + hipLaunchCooperativeKernel.cc +) + +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC ${TEST_SRC} + hipExtLaunchKernel.cc + ) +endif() + +hip_add_exe_to_target(NAME KernelLaunchPerformance + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS -std=c++17) diff --git a/projects/hip-tests/catch/performance/kernelLaunch/hipExtLaunchKernel.cc b/projects/hip-tests/catch/performance/kernelLaunch/hipExtLaunchKernel.cc new file mode 100644 index 0000000000..f40bcf2d57 --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/hipExtLaunchKernel.cc @@ -0,0 +1,120 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "kernel_launch_common.hh" + +#include + +/** + * @addtogroup kernelLaunch kernel launch + * @{ + * @ingroup PerformanceTest + * Contains performance tests for kernel launch overhead benchmarking. + */ + +template +class ExtLaunchKernelBenchmark + : public KernelLaunchBenchmark, timer_type> { + public: + constexpr void LaunchKernel() { + if constexpr (kernel_type == KernelType::kNull) { + error_ = hipExtLaunchKernel(reinterpret_cast(NullKernel), 1, 1, nullptr, 0, nullptr, + events_[0], events_[1], 0u); + } else if constexpr (kernel_type == KernelType::kSmall) { + error_ = hipExtLaunchKernel(reinterpret_cast(KernelWithSmallArgs), 1, 1, + small_kernel_args_, 0, nullptr, events_[0], events_[1], 0u); + } else if constexpr (kernel_type == KernelType::kMedium) { + error_ = hipExtLaunchKernel(reinterpret_cast(KernelWithMediumArgs), 1, 1, + medium_kernel_args_, 0, nullptr, events_[0], events_[1], 0u); + } else if constexpr (kernel_type == KernelType::kLarge) { + error_ = hipExtLaunchKernel(reinterpret_cast(KernelWithLargeArgs), 1, 1, + large_kernel_args_, 0, nullptr, events_[0], events_[1], 0u); + } else + ; + } + + hipError_t GetError() { return error_; } + + private: + EventsGuard events_{2}; + hipError_t error_; + + char* out_ = nullptr; + void* small_kernel_args_[2] = {&small_kernel_args, &out_}; + void* medium_kernel_args_[2] = {&medium_kernel_args, &out_}; + void* large_kernel_args_[2] = {&large_kernel_args, &out_}; +}; + +template static void RunBenchmark(bool sync) { + ExtLaunchKernelBenchmark benchmark; + benchmark.AddSectionName(GetSynchronizationSectionName(sync)); + benchmark.AddSectionName(GetKernelTypeSectionName()); + benchmark.AddSectionName(GetTimerTypeSectionName()); + benchmark.Run(sync); + HIP_CHECK(benchmark.GetError()); +} + +/** + * Test Description + * ------------------------ + * - Calls an empty kernel using hipExtLaunchKernel: + * -# With different timing methods: + * - CPU-based + * - Event-based + * -# With different synchronization behavior: + * - Using a stream synchronization between each iteration + * - Without any synchronization between iterations + * -# With different kernel argument sizes + * Test source + * ------------------------ + * - performance/kernelLaunch/hipExtLaunchKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipExtLaunchKernel") { + bool sync = GENERATE(true, false); + + SECTION("null kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("small kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("medium kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("large kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } +} diff --git a/projects/hip-tests/catch/performance/kernelLaunch/hipLaunchCooperativeKernel.cc b/projects/hip-tests/catch/performance/kernelLaunch/hipLaunchCooperativeKernel.cc new file mode 100644 index 0000000000..2881422ddf --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/hipLaunchCooperativeKernel.cc @@ -0,0 +1,130 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "kernel_launch_common.hh" + +#include +#include + +/** + * @addtogroup kernelLaunch kernel launch + * @{ + * @ingroup PerformanceTest + * Contains performance tests for kernel launch overhead benchmarking. + */ + +template +class LaunchCooperativeKernelBenchmark + : public KernelLaunchBenchmark, + timer_type> { + public: + constexpr void LaunchKernel() { + if constexpr (kernel_type == KernelType::kNull) { + error_ = hipLaunchCooperativeKernel(reinterpret_cast(NullKernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr); + } else if constexpr (kernel_type == KernelType::kSmall) { + error_ = + hipLaunchCooperativeKernel(reinterpret_cast(KernelWithSmallArgs), dim3{1, 1, 1}, + dim3{1, 1, 1}, small_kernel_args_, 0, nullptr); + } else if constexpr (kernel_type == KernelType::kMedium) { + error_ = + hipLaunchCooperativeKernel(reinterpret_cast(KernelWithMediumArgs), dim3{1, 1, 1}, + dim3{1, 1, 1}, medium_kernel_args_, 0, nullptr); + } else if constexpr (kernel_type == KernelType::kLarge) { + error_ = + hipLaunchCooperativeKernel(reinterpret_cast(KernelWithLargeArgs), dim3{1, 1, 1}, + dim3{1, 1, 1}, large_kernel_args_, 0, nullptr); + } else + ; + } + + hipError_t GetError() { return error_; } + + private: + hipError_t error_; + + char* out_ = nullptr; + void* small_kernel_args_[2] = {&small_kernel_args, &out_}; + void* medium_kernel_args_[2] = {&medium_kernel_args, &out_}; + void* large_kernel_args_[2] = {&large_kernel_args, &out_}; +}; + +template static void RunBenchmark(bool sync) { + LaunchCooperativeKernelBenchmark benchmark; + benchmark.AddSectionName(GetSynchronizationSectionName(sync)); + benchmark.AddSectionName(GetKernelTypeSectionName()); + benchmark.AddSectionName(GetTimerTypeSectionName()); + benchmark.Run(sync); + HIP_CHECK(benchmark.GetError()); +} + +/** + * Test Description + * ------------------------ + * - Calls an empty kernel using hipLaunchCooperativeKernel: + * -# With different timing methods: + * - CPU-based + * - Event-based + * -# With different synchronization behavior: + * - Using a stream synchronization between each iteration + * - Without any synchronization between iterations + * -# With different kernel argument sizes + * Test source + * ------------------------ + * - performance/kernelLaunch/hipLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - Device supports CooperativeLaunch + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipLaunchCooperativeKernel") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + bool sync = GENERATE(true, false); + + SECTION("null kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("small kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("medium kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("large kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } +} diff --git a/projects/hip-tests/catch/performance/kernelLaunch/hipLaunchKernel.cc b/projects/hip-tests/catch/performance/kernelLaunch/hipLaunchKernel.cc new file mode 100644 index 0000000000..db874d292e --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/hipLaunchKernel.cc @@ -0,0 +1,118 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "kernel_launch_common.hh" + +#include + +/** + * @addtogroup kernelLaunch kernel launch + * @{ + * @ingroup PerformanceTest + * Contains performance tests for kernel launch overhead benchmarking. + */ + +template +class LaunchKernelBenchmark + : public KernelLaunchBenchmark, timer_type> { + public: + constexpr void LaunchKernel() { + if constexpr (kernel_type == KernelType::kNull) { + error_ = hipLaunchKernel(reinterpret_cast(NullKernel), 1, 1, nullptr, 0, nullptr); + } else if constexpr (kernel_type == KernelType::kSmall) { + error_ = hipLaunchKernel(reinterpret_cast(KernelWithSmallArgs), 1, 1, + small_kernel_args_, 0, nullptr); + } else if constexpr (kernel_type == KernelType::kMedium) { + error_ = hipLaunchKernel(reinterpret_cast(KernelWithMediumArgs), 1, 1, + medium_kernel_args_, 0, nullptr); + } else if constexpr (kernel_type == KernelType::kLarge) { + error_ = hipLaunchKernel(reinterpret_cast(KernelWithLargeArgs), 1, 1, + large_kernel_args_, 0, nullptr); + } else + ; + } + + hipError_t GetError() { return error_; } + + private: + hipError_t error_; + + char* out_ = nullptr; + void* small_kernel_args_[2] = {&small_kernel_args, &out_}; + void* medium_kernel_args_[2] = {&medium_kernel_args, &out_}; + void* large_kernel_args_[2] = {&large_kernel_args, &out_}; +}; + +template static void RunBenchmark(bool sync) { + LaunchKernelBenchmark benchmark; + benchmark.AddSectionName(GetSynchronizationSectionName(sync)); + benchmark.AddSectionName(GetKernelTypeSectionName()); + benchmark.AddSectionName(GetTimerTypeSectionName()); + benchmark.Run(sync); + HIP_CHECK(benchmark.GetError()); +} + +/** + * Test Description + * ------------------------ + * - Calls an empty kernel using hipLaunchKernel: + * -# With different timing methods: + * - CPU-based + * - Event-based + * -# With different synchronization behavior: + * - Using a stream synchronization between each iteration + * - Without any synchronization between iterations + * -# With different kernel argument sizes + * Test source + * ------------------------ + * - performance/kernelLaunch/hipLaunchKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipLaunchKernel") { + bool sync = GENERATE(true, false); + + SECTION("null kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("small kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("medium kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("large kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } +} diff --git a/projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.cc b/projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.cc new file mode 100644 index 0000000000..28b6c29b82 --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.cc @@ -0,0 +1,39 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "kernel_launch_common.hh" + +#define DO_NOT_OPTIMIZE_AWAY \ + unsigned i = blockIdx.x * blockDim.x + threadIdx.x; \ + if (out) *out = args.args[i]; + +__global__ void NullKernel() {} + +__global__ void KernelWithSmallArgs(SmallKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; } + +__global__ void KernelWithMediumArgs(MediumKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; } + +__global__ void KernelWithLargeArgs(LargeKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; } + +SmallKernelArgs small_kernel_args; +MediumKernelArgs medium_kernel_args; +LargeKernelArgs large_kernel_args; diff --git a/projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.hh b/projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.hh new file mode 100644 index 0000000000..1f58205e70 --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/kernel_launch_common.hh @@ -0,0 +1,116 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +struct SmallKernelArgs { + char args[16]; +}; + +struct MediumKernelArgs { + char args[256]; +}; + +struct LargeKernelArgs { + char args[4096]; +}; + +extern SmallKernelArgs small_kernel_args; +extern MediumKernelArgs medium_kernel_args; +extern LargeKernelArgs large_kernel_args; + +__global__ void NullKernel(); + +__global__ void KernelWithSmallArgs(SmallKernelArgs, char*); + +__global__ void KernelWithMediumArgs(MediumKernelArgs, char*); + +__global__ void KernelWithLargeArgs(LargeKernelArgs, char*); + +enum class KernelType { kNull = 0, kSmall, kMedium, kLarge }; + +template +class KernelLaunchBenchmark : public Benchmark> { + public: + void operator()(bool sync = true) { + auto& derived = static_cast(*this); + + if (sync) { + TIMED_SECTION(timer_type) { derived.LaunchKernel(); } + } else { + if (this->current() != this->kWarmup) // if not warmup + RunWithoutSynchronization(); + } + } + + private: + void RunWithoutSynchronization() { + auto iterations = this->iterations(); + auto warmups = this->warmups(); + + // manually handle iterations here to avoid synchronization after each iteration + this->Configure(1, 0); + + this->RegisterModifier([iterations](float time) { return time / iterations; }); + + auto& derived = static_cast(*this); + + for (size_t i = 0u; i < warmups; ++i) { + derived.LaunchKernel(); + } + + TIMED_SECTION(timer_type) { + for (size_t i = 0u; i < iterations; ++i) { + derived.LaunchKernel(); + } + } + } +}; + +static std::string GetSynchronizationSectionName(bool sync) { + return sync ? "with synchronization" : "without synchronization"; +} + +template std::string GetKernelTypeSectionName() { + if constexpr (kernel_type == KernelType::kNull) { + return "null kernel"; + } else if constexpr (kernel_type == KernelType::kSmall) { + return "small kernel"; + } else if constexpr (kernel_type == KernelType::kMedium) { + return "medium kernel"; + } else if constexpr (kernel_type == KernelType::kLarge) { + return "large kernel"; + } else { + return "unknown kernel type"; + } +} + +template std::string GetTimerTypeSectionName() { + if constexpr (timer_type == kTimerTypeEvent) { + return "event based"; + } else { + return "cpu based"; + } +} diff --git a/projects/hip-tests/catch/performance/kernelLaunch/triple_chevron.cc b/projects/hip-tests/catch/performance/kernelLaunch/triple_chevron.cc new file mode 100644 index 0000000000..b6abe45027 --- /dev/null +++ b/projects/hip-tests/catch/performance/kernelLaunch/triple_chevron.cc @@ -0,0 +1,105 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "kernel_launch_common.hh" + +#include + +/** + * @addtogroup kernelLaunch kernel launch + * @{ + * @ingroup PerformanceTest + * Contains performance tests for kernel launch overhead benchmarking. + */ + +template +class TripleChevronBenchmark + : public KernelLaunchBenchmark, timer_type> { + public: + constexpr void LaunchKernel() { + if constexpr (kernel_type == KernelType::kNull) { + NullKernel<<<1, 1>>>(); + } else if constexpr (kernel_type == KernelType::kSmall) { + KernelWithSmallArgs<<<1, 1>>>(small_kernel_args, nullptr); + } else if constexpr (kernel_type == KernelType::kMedium) { + KernelWithMediumArgs<<<1, 1>>>(medium_kernel_args, nullptr); + } else if constexpr (kernel_type == KernelType::kLarge) { + KernelWithLargeArgs<<<1, 1>>>(large_kernel_args, nullptr); + } else + ; + } +}; + +template static void RunBenchmark(bool sync) { + TripleChevronBenchmark benchmark; + benchmark.AddSectionName(GetSynchronizationSectionName(sync)); + benchmark.AddSectionName(GetKernelTypeSectionName()); + benchmark.AddSectionName(GetTimerTypeSectionName()); + benchmark.Run(sync); + HIP_CHECK(hipGetLastError()); +} + +/** + * Test Description + * ------------------------ + * - Calls an empty kernel using triple chevron annotation: + * -# With different timing methods: + * - CPU-based + * - Event-based + * -# With different synchronization behavior: + * - Using a stream synchronization between each iteration + * - Without any synchronization between iterations + * -# With different kernel argument sizes + * Test source + * ------------------------ + * - performance/kernelLaunch/triple_chevron.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_Triple_Chevron") { + bool sync = GENERATE(true, false); + + SECTION("null kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("small kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("medium kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } + + SECTION("large kernel") { + SECTION("cpu-based timing") { RunBenchmark(sync); } + + SECTION("event-based timing") { RunBenchmark(sync); } + } +} From db334cac973f407eb9a2a3db071f737633504f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:26:22 +0100 Subject: [PATCH 13/19] EXSWHTEC-248 - Implement Performance Tests for Memset APIs #99 Change-Id: I6b4a0758299b0fd6d70c75508a4a0d67c62187ff [ROCm/hip-tests commit: 95a954e63ab4520f5ca1ecce3d66d528ac6e356e] --- .../catch/include/hip_test_defgroups.hh | 7 ++ .../catch/performance/memset/CMakeLists.txt | 39 +++++++++ .../catch/performance/memset/hipMemset.cc | 79 ++++++++++++++++++ .../catch/performance/memset/hipMemset2D.cc | 71 ++++++++++++++++ .../performance/memset/hipMemset2DAsync.cc | 74 +++++++++++++++++ .../catch/performance/memset/hipMemset3D.cc | 72 ++++++++++++++++ .../performance/memset/hipMemset3DAsync.cc | 74 +++++++++++++++++ .../performance/memset/hipMemsetAsync.cc | 81 ++++++++++++++++++ .../catch/performance/memset/hipMemsetD16.cc | 80 ++++++++++++++++++ .../performance/memset/hipMemsetD16Async.cc | 82 +++++++++++++++++++ .../catch/performance/memset/hipMemsetD32.cc | 80 ++++++++++++++++++ .../performance/memset/hipMemsetD32Async.cc | 82 +++++++++++++++++++ .../catch/performance/memset/hipMemsetD8.cc | 80 ++++++++++++++++++ .../performance/memset/hipMemsetD8Async.cc | 82 +++++++++++++++++++ 14 files changed, 983 insertions(+) create mode 100644 projects/hip-tests/catch/performance/memset/CMakeLists.txt create mode 100644 projects/hip-tests/catch/performance/memset/hipMemset.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemset2D.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemset2DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemset3D.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemset3DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetAsync.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetD16.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetD16Async.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetD32.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetD32Async.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetD8.cc create mode 100644 projects/hip-tests/catch/performance/memset/hipMemsetD8Async.cc diff --git a/projects/hip-tests/catch/include/hip_test_defgroups.hh b/projects/hip-tests/catch/include/hip_test_defgroups.hh index fdf289e8c5..ff26989966 100644 --- a/projects/hip-tests/catch/include/hip_test_defgroups.hh +++ b/projects/hip-tests/catch/include/hip_test_defgroups.hh @@ -173,6 +173,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup PerformanceTest Performance tests + * @{ + * This section describes performance tests for the target API groups and use-cases. + * @} + */ + /** * @defgroup TextureTest Texture Management * @{ diff --git a/projects/hip-tests/catch/performance/memset/CMakeLists.txt b/projects/hip-tests/catch/performance/memset/CMakeLists.txt new file mode 100644 index 0000000000..f55683298b --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/CMakeLists.txt @@ -0,0 +1,39 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + hipMemset.cc + hipMemsetAsync.cc + hipMemsetD8.cc + hipMemsetD8Async.cc + hipMemsetD16.cc + hipMemsetD16Async.cc + hipMemsetD32.cc + hipMemsetD32Async.cc + hipMemset2D.cc + hipMemset2DAsync.cc + hipMemset3D.cc + hipMemset3DAsync.cc +) + +hip_add_exe_to_target(NAME MemsetPerformance + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS -std=c++17) \ No newline at end of file diff --git a/projects/hip-tests/catch/performance/memset/hipMemset.cc b/projects/hip-tests/catch/performance/memset/hipMemset.cc new file mode 100644 index 0000000000..0063eeca51 --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemset.cc @@ -0,0 +1,79 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + * Contains performance tests for all memset HIP APIs. + */ + +class MemsetBenchmark : public Benchmark { + public: + MemsetBenchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size) {} + + void operator()() { + TIMED_SECTION(kTimerTypeEvent) { HIP_CHECK(hipMemset(dst_.ptr(), 17, size_)); } + } + + private: + LinearAllocGuard dst_; + const size_t size_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetBenchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemset`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemset.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemset") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/performance/memset/hipMemset2D.cc b/projects/hip-tests/catch/performance/memset/hipMemset2D.cc new file mode 100644 index 0000000000..8384be095d --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemset2D.cc @@ -0,0 +1,71 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class Memset2DBenchmark : public Benchmark { + public: + Memset2DBenchmark(size_t width, size_t height) : dst_(width, height) {} + + void operator()() { + TIMED_SECTION(kTimerTypeEvent) { + HIP_CHECK(hipMemset2D(dst_.ptr(), dst_.pitch(), 17, dst_.width(), dst_.height())); + } + } + + private: + LinearAllocGuard2D dst_; +}; + +static void RunBenchmark(size_t width, size_t height) { + Memset2DBenchmark benchmark(width, height); + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemset2D`: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memset/hipMemset2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemset2D") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemset2DAsync.cc b/projects/hip-tests/catch/performance/memset/hipMemset2DAsync.cc new file mode 100644 index 0000000000..4f6c68d418 --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemset2DAsync.cc @@ -0,0 +1,74 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class Memset2DAsyncBenchmark : public Benchmark { + public: + Memset2DAsyncBenchmark(size_t width, size_t height) + : dst_(width, height), stream_(Streams::created) {} + + void operator()(size_t width, size_t height) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) { + HIP_CHECK(hipMemset2DAsync(dst_.ptr(), dst_.pitch(), 17, dst_.width(), dst_.height(), + stream_.stream())); + } + } + + private: + LinearAllocGuard2D dst_; + StreamGuard stream_; +}; + +static void RunBenchmark(size_t width, size_t height) { + Memset2DAsyncBenchmark benchmark(width, height); + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + benchmark.Run(width, height); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemset2DAsync`: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memset/hipMemset2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemset2DAsync") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemset3D.cc b/projects/hip-tests/catch/performance/memset/hipMemset3D.cc new file mode 100644 index 0000000000..69acea51c6 --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemset3D.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class Memset3DBenchmark : public Benchmark { + public: + Memset3DBenchmark(size_t width, size_t height, size_t depth) : dst_(width, height, depth) {} + + void operator()() { + TIMED_SECTION(kTimerTypeEvent) { + HIP_CHECK(hipMemset3D(dst_.pitched_ptr(), 17, dst_.extent())); + } + } + + private: + LinearAllocGuard3D dst_; +}; + +static void RunBenchmark(size_t width, size_t height, size_t depth) { + Memset3DBenchmark benchmark(width, height, depth); + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ", " + + std::to_string(depth) + ")"); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemset3D`: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memset/hipMemset3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemset3D") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 16, 4); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemset3DAsync.cc b/projects/hip-tests/catch/performance/memset/hipMemset3DAsync.cc new file mode 100644 index 0000000000..7efb344f9f --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemset3DAsync.cc @@ -0,0 +1,74 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class Memset3DAsyncBenchmark : public Benchmark { + public: + Memset3DAsyncBenchmark(size_t width, size_t height, size_t depth) + : dst_(width, height, depth), stream_(Streams::created) {} + + void operator()() { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) { + HIP_CHECK(hipMemset3DAsync(dst_.pitched_ptr(), 17, dst_.extent(), stream_.stream())); + } + } + + private: + LinearAllocGuard3D dst_; + StreamGuard stream_; +}; + +static void RunBenchmark(size_t width, size_t height, size_t depth) { + Memset3DAsyncBenchmark benchmark(width, height, depth); + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ", " + + std::to_string(depth) + ")"); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemset3DAsync`: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memset/hipMemset3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemset3DAsync") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 16, 4); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetAsync.cc b/projects/hip-tests/catch/performance/memset/hipMemsetAsync.cc new file mode 100644 index 0000000000..ec82b27887 --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetAsync.cc @@ -0,0 +1,81 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetAsyncBenchmark : public Benchmark { + public: + MemsetAsyncBenchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size), stream_(Streams::created) {} + + void operator()() { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) { + HIP_CHECK(hipMemsetAsync(dst_.ptr(), 17, size_, stream_.stream())); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; + StreamGuard stream_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetAsyncBenchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetAsync`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetAsync") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetD16.cc b/projects/hip-tests/catch/performance/memset/hipMemsetD16.cc new file mode 100644 index 0000000000..1f0e50cc6a --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetD16.cc @@ -0,0 +1,80 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetD16Benchmark : public Benchmark { + public: + MemsetD16Benchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size) {} + + void operator()() { + TIMED_SECTION(kTimerTypeEvent) { + HIP_CHECK(hipMemsetD16(reinterpret_cast(dst_.ptr()), 311, size_)); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetD16Benchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetD16`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetD16.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetD16") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetD16Async.cc b/projects/hip-tests/catch/performance/memset/hipMemsetD16Async.cc new file mode 100644 index 0000000000..24aa103218 --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetD16Async.cc @@ -0,0 +1,82 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetD16AsyncBenchmark : public Benchmark { + public: + MemsetD16AsyncBenchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size), stream_(Streams::created) {} + + void operator()() { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) { + HIP_CHECK(hipMemsetD16Async(reinterpret_cast(dst_.ptr()), 311, size_, + stream_.stream())); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; + StreamGuard stream_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetD16AsyncBenchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetD16Async`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetD16Async.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetD16Async") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetD32.cc b/projects/hip-tests/catch/performance/memset/hipMemsetD32.cc new file mode 100644 index 0000000000..c64573a54e --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetD32.cc @@ -0,0 +1,80 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetD32Benchmark : public Benchmark { + public: + MemsetD32Benchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size) {} + + void operator()() { + TIMED_SECTION(kTimerTypeEvent) { + HIP_CHECK(hipMemsetD32(reinterpret_cast(dst_.ptr()), 123'456, size_)); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetD32Benchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetD32`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetD32.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetD32") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetD32Async.cc b/projects/hip-tests/catch/performance/memset/hipMemsetD32Async.cc new file mode 100644 index 0000000000..d755bee5b0 --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetD32Async.cc @@ -0,0 +1,82 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetD32AsyncBenchmark : public Benchmark { + public: + MemsetD32AsyncBenchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size), stream_(Streams::created) {} + + void operator()() { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) { + HIP_CHECK(hipMemsetD32Async(reinterpret_cast(dst_.ptr()), 123'456, size_, + stream_.stream())); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; + StreamGuard stream_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetD32AsyncBenchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetD32Async`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetD32Async.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetD32Async") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetD8.cc b/projects/hip-tests/catch/performance/memset/hipMemsetD8.cc new file mode 100644 index 0000000000..b05a30d11a --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetD8.cc @@ -0,0 +1,80 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetD8Benchmark : public Benchmark { + public: + MemsetD8Benchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size) {} + + void operator()() { + TIMED_SECTION(kTimerTypeEvent) { + HIP_CHECK(hipMemsetD8(reinterpret_cast(dst_.ptr()), 17, size_)); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetD8Benchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetD8`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetD8.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetD8") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} diff --git a/projects/hip-tests/catch/performance/memset/hipMemsetD8Async.cc b/projects/hip-tests/catch/performance/memset/hipMemsetD8Async.cc new file mode 100644 index 0000000000..73d734530b --- /dev/null +++ b/projects/hip-tests/catch/performance/memset/hipMemsetD8Async.cc @@ -0,0 +1,82 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup memset memset + * @{ + * @ingroup PerformanceTest + */ + +class MemsetD8AsyncBenchmark : public Benchmark { + public: + MemsetD8AsyncBenchmark(LinearAllocs allocation_type, size_t size) + : dst_(allocation_type, size), size_(size), stream_(Streams::created) {} + + void operator()() { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream_.stream()) { + HIP_CHECK(hipMemsetD8Async(reinterpret_cast(dst_.ptr()), 17, size_, + stream_.stream())); + } + } + + private: + LinearAllocGuard dst_; + const size_t size_; + StreamGuard stream_; +}; + +static void RunBenchmark(LinearAllocs allocation_type, size_t size) { + MemsetD8AsyncBenchmark benchmark(allocation_type, size); + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(allocation_type)); + benchmark.Run(); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemsetD8Async`: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - device + * - host + * - managed + * Test source + * ------------------------ + * - performance/memset/hipMemsetD8Async.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemsetD8Async") { + const auto size = GENERATE(4_KB, 4_MB, 16_MB); + const auto allocation_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipHostMalloc, + LinearAllocs::hipMallocManaged); + RunBenchmark(allocation_type, size); +} From b66fc2b672e116779958048369741f6c8afb5a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:32:13 +0100 Subject: [PATCH 14/19] EXSWHTEC-114 - Implement tests for hipExtLaunchKernel APIs #60 Change-Id: I9152c31c2c07ddbfb48865cd68a42557e763be28 [ROCm/hip-tests commit: 3f65d7a4104d7a181a79d0f37a9c70abf0421fc8] --- .../catch/hipTestMain/config/config_amd_linux | 4 + .../hipTestMain/config/config_amd_windows | 4 + .../unit/executionControl/CMakeLists.txt | 7 + .../executionControl/hipExtLaunchKernel.cc | 176 ++++++++++++++++++ .../hipExtLaunchMultiKernelMultiDevice.cc | 144 ++++++++++++++ 5 files changed, 335 insertions(+) create mode 100644 projects/hip-tests/catch/unit/executionControl/hipExtLaunchKernel.cc create mode 100644 projects/hip-tests/catch/unit/executionControl/hipExtLaunchMultiKernelMultiDevice.cc diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index 8d89762cf3..5cf8241d8d 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -53,6 +53,10 @@ "Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported", "Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported", "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported", + "NOTE: The following test is disabled due to defect - EXSWHTEC-243", + "Unit_hipExtLaunchKernel_Negative_Parameters", + "NOTE: The following test is disabled due to defect - EXSWHTEC-244", + "Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters", "Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters", "Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters", "Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 623b25dfba..44a6dd0e0c 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -218,6 +218,10 @@ "Unit_hipVectorTypes_test_on_device", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", + "NOTE: The following test is disabled due to defect - EXSWHTEC-243", + "Unit_hipExtLaunchKernel_Negative_Parameters", + "NOTE: The following test is disabled due to defect - EXSWHTEC-244", + "Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemCreate_ChkWithKerLaunch", diff --git a/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt b/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt index 4c28438009..293da7cb99 100644 --- a/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt @@ -5,6 +5,13 @@ set(TEST_SRC hipFuncSetAttribute.cc ) +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC ${TEST_SRC} + hipExtLaunchKernel.cc + hipExtLaunchMultiKernelMultiDevice.cc + ) +endif() + hip_add_exe_to_target(NAME ExecutionControlTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests diff --git a/projects/hip-tests/catch/unit/executionControl/hipExtLaunchKernel.cc b/projects/hip-tests/catch/unit/executionControl/hipExtLaunchKernel.cc new file mode 100644 index 0000000000..8b85507de5 --- /dev/null +++ b/projects/hip-tests/catch/unit/executionControl/hipExtLaunchKernel.cc @@ -0,0 +1,176 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include +#include +#include + +TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") { + SECTION("Kernel with no arguments") { + HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr, nullptr, nullptr, 0u)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel_42), dim3{1, 1, 1}, dim3{1, 1, 1}, + kernel_args, 0, nullptr, nullptr, nullptr, 0u)); + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") { + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, + nullptr, 0, nullptr, nullptr, nullptr, 0u)); + } + + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, + nullptr, 0, nullptr, nullptr, nullptr, 0u)); + } + + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, + nullptr, 0, nullptr, nullptr, nullptr, 0u)); + } +} + +TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") { + SECTION("f == nullptr") { + HIP_CHECK_ERROR(hipExtLaunchKernel(nullptr, dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, 0, nullptr, + nullptr, nullptr, 0u), + hipErrorInvalidDeviceFunction); + } + + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{0, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 0, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 0}, + dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{0, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 0, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 0}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR( + hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim}, + nullptr, 0, nullptr, nullptr, nullptr, 0u), + hipErrorInvalidConfiguration); + } + + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u), + hipErrorOutOfMemory); + } + + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, stream, nullptr, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("Invalid startEvent") { + hipEvent_t event = nullptr; + HIP_CHECK(hipEventCreate(&event)); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr, event, nullptr, 0u), + hipErrorInvalidValue); + } + + SECTION("Invalid endEvent") { + hipEvent_t event = nullptr; + HIP_CHECK(hipEventCreate(&event)); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr, nullptr, event, 0u), + hipErrorInvalidValue); + } +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/executionControl/hipExtLaunchMultiKernelMultiDevice.cc b/projects/hip-tests/catch/unit/executionControl/hipExtLaunchMultiKernelMultiDevice.cc new file mode 100644 index 0000000000..97b1420b9c --- /dev/null +++ b/projects/hip-tests/catch/unit/executionControl/hipExtLaunchMultiKernelMultiDevice.cc @@ -0,0 +1,144 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include +#include +#include + +TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Positive_Basic") { + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.func = reinterpret_cast(kernel); + params.gridDim = dim3{1, 1, 1}; + params.blockDim = dim3{1, 1, 1}; + params.args = nullptr; + params.sharedMem = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.stream)); + } + + HIP_CHECK(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u)); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamSynchronize(params.stream)); + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.stream)); + } +} + +TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters") { + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.func = reinterpret_cast(kernel); + params.gridDim = dim3{1, 1, 1}; + params.blockDim = dim3{1, 1, 1}; + params.args = nullptr; + params.sharedMem = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.stream)); + } + + SECTION("launchParamsList == nullptr") { + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(nullptr, device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices == 0") { + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), 0, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices > device count") { + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count + 1, 0u), + hipErrorInvalidValue); + } + + SECTION("invalid flags") { + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 999), + hipErrorInvalidValue); + } + + if (device_count > 1) { + SECTION("launchParamsList.func doesn't match across all devices") { + params_list[1].func = reinterpret_cast(kernel2); + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.gridDim doesn't match across all kernels") { + params_list[1].gridDim = dim3{2, 2, 2}; + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.blockDim doesn't match across all kernels") { + params_list[1].blockDim = dim3{2, 2, 2}; + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.sharedMem doesn't match across all kernels") { + params_list[1].sharedMem = 1024; + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.stream)); + } +} + +TEST_CASE("Unit_hipExtLaunchMultiKernelMultiDevice_Negative_MultiKernelSameDevice") { + HIP_CHECK(hipSetDevice(0)); + + std::vector params_list(2); + + for (auto& params : params_list) { + params.func = reinterpret_cast(kernel); + params.gridDim = dim3{1, 1, 1}; + params.blockDim = dim3{1, 1, 1}; + params.args = nullptr; + params.sharedMem = 0; + HIP_CHECK(hipStreamCreate(¶ms.stream)); + } + + HIP_CHECK_ERROR(hipExtLaunchMultiKernelMultiDevice(params_list.data(), 2, 0u), + hipErrorInvalidValue); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.stream)); + } +} \ No newline at end of file From f3a4512a78fa29582a8393bf9641325b9b2b8c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:31:47 +0100 Subject: [PATCH 15/19] EXSWHTEC-115 - Implement tests for hipLaunchCooperativeKernel APIs #59 Change-Id: I5bda5ee3787a73aeeab5c25c05278e7aa2f8bfa2 [ROCm/hip-tests commit: 380ace735f1a373a90690a410de8164f6d962e5d] --- .../catch/hipTestMain/config/config_amd_linux | 5 - .../hipTestMain/config/config_amd_windows | 5 - .../unit/executionControl/CMakeLists.txt | 5 +- .../execution_control_common.cc | 12 +- .../execution_control_common.hh | 8 +- .../hipLaunchCooperativeKernel.cc | 188 ++++++++++++++++++ .../hipLaunchCooperativeKernelMultiDevice.cc | 159 +++++++++++++++ 7 files changed, 369 insertions(+), 13 deletions(-) create mode 100644 projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel.cc create mode 100644 projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernelMultiDevice.cc diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index 5cf8241d8d..f479b39e15 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -48,11 +48,6 @@ "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout", "Unit_hipFuncSetAttribute_Positive_Parameters", "Unit_hipFuncSetAttribute_Negative_Parameters", - "NOTE: The following 4 tests are disabled due to defect - EXSWHTEC-240", - "Unit_hipFuncSetCacheConfig_Negative_Not_Supported", - "Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported", - "Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported", - "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported", "NOTE: The following test is disabled due to defect - EXSWHTEC-243", "Unit_hipExtLaunchKernel_Negative_Parameters", "NOTE: The following test is disabled due to defect - EXSWHTEC-244", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 44a6dd0e0c..097f165983 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -115,11 +115,6 @@ "Unit_hipEventCreateWithFlags_DefaultFlg_NonCohHstMem", "Unit_hipEventCreateWithFlags_DisableSystemFence_CohHstMem", "Unit_hipEventCreateWithFlags_DefaultFlg_CohHstMem", - "NOTE: The following 4 tests are disabled due to defect - EXSWHTEC-240", - "Unit_hipFuncSetCacheConfig_Negative_Not_Supported", - "Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported", - "Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported", - "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported", "Unit_hipOccupancyMaxActiveBlocksPerMultiprocessor_Negative_Parameters", "Unit_hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags_Negative_Parameters", "Unit_hipModuleOccupancyMaxPotentialBlockSizeWithFlags_Negative_Parameters", diff --git a/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt b/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt index 293da7cb99..a27f9dc4f1 100644 --- a/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/executionControl/CMakeLists.txt @@ -3,6 +3,9 @@ set(TEST_SRC hipFuncSetCacheConfig.cc hipFuncSetSharedMemConfig.cc hipFuncSetAttribute.cc + hipFuncGetAttributes.cc + hipLaunchCooperativeKernel.cc + hipLaunchCooperativeKernelMultiDevice.cc ) if(HIP_PLATFORM MATCHES "amd") @@ -15,4 +18,4 @@ endif() hip_add_exe_to_target(NAME ExecutionControlTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++17) \ No newline at end of file + COMPILE_OPTIONS -std=c++17) diff --git a/projects/hip-tests/catch/unit/executionControl/execution_control_common.cc b/projects/hip-tests/catch/unit/executionControl/execution_control_common.cc index f9a2cc1675..e4c04de1f3 100644 --- a/projects/hip-tests/catch/unit/executionControl/execution_control_common.cc +++ b/projects/hip-tests/catch/unit/executionControl/execution_control_common.cc @@ -23,5 +23,15 @@ THE SOFTWARE. #include "execution_control_common.hh" #include +#include -__global__ void kernel() {} \ No newline at end of file +__global__ void kernel() {} + +__global__ void kernel2() {} + +__global__ void kernel_42(int* val) { *val = 42; } + +__global__ void coop_kernel() { + cooperative_groups::grid_group grid = cooperative_groups::this_grid(); + grid.sync(); +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/executionControl/execution_control_common.hh b/projects/hip-tests/catch/unit/executionControl/execution_control_common.hh index e66fcf28f3..425c5309ed 100644 --- a/projects/hip-tests/catch/unit/executionControl/execution_control_common.hh +++ b/projects/hip-tests/catch/unit/executionControl/execution_control_common.hh @@ -22,4 +22,10 @@ THE SOFTWARE. #pragma once -__global__ void kernel(); \ No newline at end of file +__global__ void kernel(); + +__global__ void kernel2(); + +__global__ void kernel_42(int* val); + +__global__ void coop_kernel(); \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel.cc b/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel.cc new file mode 100644 index 0000000000..eb7eb2293f --- /dev/null +++ b/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernel.cc @@ -0,0 +1,188 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include +#include +#include + +TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + SECTION("Cooperative kernel with no arguments") { + HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(coop_kernel), dim3{2, 2, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel_42), dim3{1, 1, 1}, + dim3{1, 1, 1}, kernel_args, 0, nullptr)); + + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{x, 1, 1}, nullptr, 0, nullptr)); + } + + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{y, 1, 1}, nullptr, 0, nullptr)); + } + + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{z, 1, 1}, nullptr, 0, nullptr)); + } +} + +TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + SECTION("f == nullptr") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(static_cast(nullptr), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidDeviceFunction); + } + + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{0, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 0, 1}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 0}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{0, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 0, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 0}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{x, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, y, 1}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, z}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{dim, dim, dim}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION( + "gridDim.x * gridDim.y * gridDim.z > maxActiveBlocksPerMultiprocessor * " + "multiProcessorCount") { + int max_blocks; + HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, + reinterpret_cast(kernel), 1, 0)); + const unsigned int multiproc_count = + GetDeviceAttribute(0, hipDeviceAttributeMultiprocessorCount); + const unsigned int dim = std::ceil(std::cbrt(max_blocks * multiproc_count)); + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{dim, dim, dim}, + dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorCooperativeLaunchTooLarge); + } + + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, max, nullptr), + hipErrorCooperativeLaunchTooLarge); + } + + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{1, 1, 1}, nullptr, 0, stream), + hipErrorContextIsDestroyed); + } +} \ No newline at end of file diff --git a/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernelMultiDevice.cc b/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernelMultiDevice.cc new file mode 100644 index 0000000000..c6b8503203 --- /dev/null +++ b/projects/hip-tests/catch/unit/executionControl/hipLaunchCooperativeKernelMultiDevice.cc @@ -0,0 +1,159 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include +#include +#include + +TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.func = reinterpret_cast(coop_kernel); + params.gridDim = dim3{1, 1, 1}; + params.blockDim = dim3{1, 1, 1}; + params.args = nullptr; + params.sharedMem = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.stream)); + } + + HIP_CHECK(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u)); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamSynchronize(params.stream)); + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.stream)); + } +} + +TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.func = reinterpret_cast(coop_kernel); + params.gridDim = dim3{1, 1, 1}; + params.blockDim = dim3{1, 1, 1}; + params.args = nullptr; + params.sharedMem = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.stream)); + } + + SECTION("launchParamsList == nullptr") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(nullptr, device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices == 0") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), 0, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices > device count") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count + 1, 0u), + hipErrorInvalidValue); + } + + SECTION("invalid flags") { + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 999), + hipErrorInvalidValue); + } + + if (device_count > 1) { + SECTION("launchParamsList.func doesn't match across all devices") { + params_list[1].func = reinterpret_cast(kernel); + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.gridDim doesn't match across all kernels") { + params_list[1].gridDim = dim3{2, 2, 2}; + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.blockDim doesn't match across all kernels") { + params_list[1].blockDim = dim3{2, 2, 2}; + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.sharedMem doesn't match across all kernels") { + params_list[1].sharedMem = 1024; + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.stream)); + } +} + +TEST_CASE("Unit_hipLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + HIP_CHECK(hipSetDevice(0)); + + std::vector params_list(2); + + for (auto& params : params_list) { + params.func = reinterpret_cast(coop_kernel); + params.gridDim = dim3{1, 1, 1}; + params.blockDim = dim3{1, 1, 1}; + params.args = nullptr; + params.sharedMem = 0; + HIP_CHECK(hipStreamCreate(¶ms.stream)); + } + + HIP_CHECK_ERROR(hipLaunchCooperativeKernelMultiDevice(params_list.data(), 2, 0u), + hipErrorInvalidValue); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.stream)); + } +} \ No newline at end of file From 9d48901e139954ffeccb9cec488b3fcf7505104a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:26:42 +0100 Subject: [PATCH 16/19] EXSWHTEC-108 - Implement tests for hipFuncGetAttributes #58 Change-Id: I7aa979434724aa3d39343a6c69f67de83e61de4c [ROCm/hip-tests commit: 053c58ab2da663221aaad6265ce2dfdd23cd6695] --- .../catch/hipTestMain/config/config_amd_linux | 4 + .../hipTestMain/config/config_amd_windows | 5 ++ projects/hip-tests/catch/include/utils.hh | 6 ++ .../executionControl/hipFuncGetAttributes.cc | 73 +++++++++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 projects/hip-tests/catch/unit/executionControl/hipFuncGetAttributes.cc diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux index f479b39e15..2f89c545c7 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_linux +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_linux @@ -48,6 +48,10 @@ "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout", "Unit_hipFuncSetAttribute_Positive_Parameters", "Unit_hipFuncSetAttribute_Negative_Parameters", + "NOTE: The following test is disabled due to defect - EXSWHTEC-241", + "Unit_hipFuncGetAttributes_Negative_Parameters", + "NOTE: The following test is disabled due to defect - EXSWHTEC-242", + "Unit_hipFuncGetAttributes_Positive_Basic", "NOTE: The following test is disabled due to defect - EXSWHTEC-243", "Unit_hipExtLaunchKernel_Negative_Parameters", "NOTE: The following test is disabled due to defect - EXSWHTEC-244", diff --git a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows index 097f165983..df0f98d53c 100644 --- a/projects/hip-tests/catch/hipTestMain/config/config_amd_windows +++ b/projects/hip-tests/catch/hipTestMain/config/config_amd_windows @@ -213,10 +213,15 @@ "Unit_hipVectorTypes_test_on_device", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", + "NOTE: The following test is disabled due to defect - EXSWHTEC-241", + "Unit_hipFuncGetAttributes_Negative_Parameters", + "NOTE: The following test is disabled due to defect - EXSWHTEC-242", + "Unit_hipFuncGetAttributes_Positive_Basic", "NOTE: The following test is disabled due to defect - EXSWHTEC-243", "Unit_hipExtLaunchKernel_Negative_Parameters", "NOTE: The following test is disabled due to defect - EXSWHTEC-244", "Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters", + "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemCreate_ChkWithKerLaunch", diff --git a/projects/hip-tests/catch/include/utils.hh b/projects/hip-tests/catch/include/utils.hh index 5efd6a5125..f025768c14 100644 --- a/projects/hip-tests/catch/include/utils.hh +++ b/projects/hip-tests/catch/include/utils.hh @@ -169,3 +169,9 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes) }; return (... && DeviceAttributeSupport(device, attributes)); } + +inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) { + int value = 0; + HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); + return value; +} diff --git a/projects/hip-tests/catch/unit/executionControl/hipFuncGetAttributes.cc b/projects/hip-tests/catch/unit/executionControl/hipFuncGetAttributes.cc new file mode 100644 index 0000000000..e97f44300e --- /dev/null +++ b/projects/hip-tests/catch/unit/executionControl/hipFuncGetAttributes.cc @@ -0,0 +1,73 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +constexpr size_t kConstSizeBytes = 128; +__constant__ char const_data[kConstSizeBytes]; + +__global__ void attribute_test_kernel() {} + +TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { + hipFuncAttributes attr; + HIP_CHECK(hipFuncGetAttributes(&attr, reinterpret_cast(attribute_test_kernel))); + + SECTION("binaryVersion") { +#if HT_NVIDIA + const auto major = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMajor); + const auto minor = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMinor); + REQUIRE(attr.binaryVersion == major * 10 + minor); +#elif HT_AMD + REQUIRE(attr.binaryVersion > 0); +#endif + } + + SECTION("cacheModeCA") { REQUIRE((attr.cacheModeCA == 0 || attr.cacheModeCA == 1)); } + + SECTION("constSizeBytes") { REQUIRE(attr.constSizeBytes == kConstSizeBytes); } + + SECTION("maxThreadsPerBlock") { + REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock)); + } + + SECTION("numRegs") { REQUIRE(attr.numRegs >= 0); } + + SECTION("ptxVersion") { REQUIRE(attr.ptxVersion > 0); } + + SECTION("sharedSizeBytes") { + REQUIRE(attr.sharedSizeBytes <= + GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock)); + } +} + +TEST_CASE("Unit_hipFuncGetAttributes_Negative_Parameters") { + SECTION("attr == nullptr") { + HIP_CHECK_ERROR(hipFuncGetAttributes(nullptr, reinterpret_cast(attribute_test_kernel)), + hipErrorInvalidValue); + } + SECTION("func == nullptr") { + hipFuncAttributes attr; + HIP_CHECK_ERROR(hipFuncGetAttributes(&attr, nullptr), hipErrorInvalidDeviceFunction); + } +} \ No newline at end of file From 20ee07a6d7fcb0aa45c5a1d0ad73efdb72ecf3c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:25:01 +0100 Subject: [PATCH 17/19] EXSWHTEC-107 - Implement tests for Execution Control setter APIs #57 Change-Id: I5d7e6471237b4a5726e09e4de06dafe2aa78cd28 [ROCm/hip-tests commit: 4507ee6531027edd6916a7a4b2b3585308a0fe16] --- .../hipTestMain/config/config_nvidia_linux_common.json | 9 +++++++++ .../hipTestMain/config/config_nvidia_windows_common.json | 9 +++++++++ projects/hip-tests/catch/unit/CMakeLists.txt | 1 + 3 files changed, 19 insertions(+) create mode 100644 projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux_common.json create mode 100644 projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows_common.json diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux_common.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux_common.json new file mode 100644 index 0000000000..8247b49e3a --- /dev/null +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_linux_common.json @@ -0,0 +1,9 @@ +{ + "DisabledTests": + [ + "Unit_hipFuncSetCacheConfig_Negative_Not_Supported", + "Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported", + "Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported", + "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported" + ] +} diff --git a/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows_common.json b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows_common.json new file mode 100644 index 0000000000..8247b49e3a --- /dev/null +++ b/projects/hip-tests/catch/hipTestMain/config/config_nvidia_windows_common.json @@ -0,0 +1,9 @@ +{ + "DisabledTests": + [ + "Unit_hipFuncSetCacheConfig_Negative_Not_Supported", + "Unit_hipFuncSetSharedMemConfig_Negative_Not_Supported", + "Unit_hipFuncSetAttribute_Positive_MaxDynamicSharedMemorySize_Not_Supported", + "Unit_hipFuncSetAttribute_Positive_PreferredSharedMemoryCarveout_Not_Supported" + ] +} diff --git a/projects/hip-tests/catch/unit/CMakeLists.txt b/projects/hip-tests/catch/unit/CMakeLists.txt index ab39835390..bfb390924a 100644 --- a/projects/hip-tests/catch/unit/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/CMakeLists.txt @@ -47,6 +47,7 @@ add_subdirectory(atomics) add_subdirectory(complex) add_subdirectory(p2p) add_subdirectory(gcc) +add_subdirectory(vector_types) if(HIP_PLATFORM STREQUAL "amd") add_subdirectory(callback) From 48c932afad3145b6fc60f8977dac510cf53f4e68 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Fri, 29 Dec 2023 16:02:50 +0000 Subject: [PATCH 18/19] EXSWHTEC-259 - Implement new and extend existing tests for thread_block #151 Change-Id: I4d85af8e5f805a1c8ff7b7efd3b3400f38343ea9 [ROCm/hip-tests commit: 4fa177dd2bc490ec7d31fba785791331081ec8c9] --- projects/hip-tests/catch/hipTestMain/main.cc | 3 + .../catch/include/hip_test_defgroups.hh | 23 -- .../catch/performance/CMakeLists.txt | 1 - projects/hip-tests/catch/unit/CMakeLists.txt | 1 - .../cooperative_groups_common.hh | 10 +- .../unit/cooperativeGrps/thread_block.cc | 350 ++++++++++++++++++ 6 files changed, 362 insertions(+), 26 deletions(-) create mode 100644 projects/hip-tests/catch/unit/cooperativeGrps/thread_block.cc diff --git a/projects/hip-tests/catch/hipTestMain/main.cc b/projects/hip-tests/catch/hipTestMain/main.cc index 109b0593fc..5c7112788c 100644 --- a/projects/hip-tests/catch/hipTestMain/main.cc +++ b/projects/hip-tests/catch/hipTestMain/main.cc @@ -30,6 +30,9 @@ int main(int argc, char** argv) { | Opt(cmd_options.progress) ["-P"]["--progress"] ("Show progress bar when running performance tests") + | Opt(cmd_options.cg_iterations, "cg_iterations") + ["-E"]["--cg-iterations"] + ("Number of iterations used for cooperative groups sync tests (default: 5)") | Opt(cmd_options.cg_extended_run, "cg_extened_run") ["-E"]["--cg-extended-run"] ("TODO: Description goes here") diff --git a/projects/hip-tests/catch/include/hip_test_defgroups.hh b/projects/hip-tests/catch/include/hip_test_defgroups.hh index ff26989966..3b276b6897 100644 --- a/projects/hip-tests/catch/include/hip_test_defgroups.hh +++ b/projects/hip-tests/catch/include/hip_test_defgroups.hh @@ -144,21 +144,6 @@ THE SOFTWARE. * @} */ - /** - * @defgroup StreamOTest Ordered Memory Allocator - * @{ - * This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime - * API. - * @} - */ - -/** - * @defgroup StreamOTest Ordered Memory Allocator - * @{ - * This section describes the tests for Stream Ordered Memory Allocator functions of HIP runtime - * API. - */ - /** * @defgroup StreamOTest Ordered Memory Allocator * @{ @@ -173,13 +158,6 @@ THE SOFTWARE. * @} */ -/** - * @defgroup PerformanceTest Performance tests - * @{ - * This section describes performance tests for the target API groups and use-cases. - * @} - */ - /** * @defgroup TextureTest Texture Management * @{ @@ -209,7 +187,6 @@ THE SOFTWARE. */ /** - * @defgroup ComplexTest Complex type * @{ * This section describes tests for the Complex type functions. * @} diff --git a/projects/hip-tests/catch/performance/CMakeLists.txt b/projects/hip-tests/catch/performance/CMakeLists.txt index 2778dab03d..c9242ecebc 100644 --- a/projects/hip-tests/catch/performance/CMakeLists.txt +++ b/projects/hip-tests/catch/performance/CMakeLists.txt @@ -19,7 +19,6 @@ # THE SOFTWARE. add_subdirectory(memset) -add_subdirectory(memcpy) add_subdirectory(kernelLaunch) add_subdirectory(stream) add_subdirectory(event) diff --git a/projects/hip-tests/catch/unit/CMakeLists.txt b/projects/hip-tests/catch/unit/CMakeLists.txt index bfb390924a..ab39835390 100644 --- a/projects/hip-tests/catch/unit/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/CMakeLists.txt @@ -47,7 +47,6 @@ add_subdirectory(atomics) add_subdirectory(complex) add_subdirectory(p2p) add_subdirectory(gcc) -add_subdirectory(vector_types) if(HIP_PLATFORM STREQUAL "amd") add_subdirectory(callback) diff --git a/projects/hip-tests/catch/unit/cooperativeGrps/cooperative_groups_common.hh b/projects/hip-tests/catch/unit/cooperativeGrps/cooperative_groups_common.hh index 25922c5eb2..20d0d4aa44 100644 --- a/projects/hip-tests/catch/unit/cooperativeGrps/cooperative_groups_common.hh +++ b/projects/hip-tests/catch/unit/cooperativeGrps/cooperative_groups_common.hh @@ -31,6 +31,14 @@ constexpr size_t kWarpSize = 64; constexpr int kMaxGPUs = 8; } // namespace +constexpr int MaxGPUs = 8; + +inline bool operator==(const dim3& l, const dim3& r) { + return l.x == r.x && l.y == r.y && l.z == r.z; +} + +inline bool operator!=(const dim3& l, const dim3& r) { return !(l == r); } + __device__ inline unsigned int thread_rank_in_grid() { const auto block_size = blockDim.x * blockDim.y * blockDim.z; const auto block_rank_in_grid = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; @@ -67,4 +75,4 @@ template bool CheckDimensions(unsigned int device, T kernel, dim3 bloc } return true; -} \ No newline at end of file +} diff --git a/projects/hip-tests/catch/unit/cooperativeGrps/thread_block.cc b/projects/hip-tests/catch/unit/cooperativeGrps/thread_block.cc new file mode 100644 index 0000000000..c85f7974cd --- /dev/null +++ b/projects/hip-tests/catch/unit/cooperativeGrps/thread_block.cc @@ -0,0 +1,350 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "cooperative_groups_common.hh" + +#include +#include +#include +#include + +#include + +/** + * @addtogroup thread_block thread_block + * @{ + * @ingroup DeviceLanguageTest + * Contains unit tests for all thread_block APIs + */ + +namespace cg = cooperative_groups; + +template +static __global__ void thread_block_size_getter(unsigned int* sizes) { + const BaseType group = cg::this_thread_block(); + sizes[thread_rank_in_grid()] = group.size(); +} + +template +static __global__ void thread_block_thread_rank_getter(unsigned int* thread_ranks) { + const BaseType group = cg::this_thread_block(); + thread_ranks[thread_rank_in_grid()] = group.thread_rank(); +} + +static __global__ void thread_block_group_indices_getter(dim3* group_indices) { + group_indices[thread_rank_in_grid()] = cg::this_thread_block().group_index(); +} + +static __global__ void thread_block_thread_indices_getter(dim3* thread_indices) { + thread_indices[thread_rank_in_grid()] = cg::this_thread_block().thread_index(); +} + +static __global__ void thread_block_non_member_size_getter(unsigned int* sizes) { + sizes[thread_rank_in_grid()] = cg::group_size(cg::this_thread_block()); +} + +static __global__ void thread_block_non_member_thread_rank_getter(unsigned int* thread_ranks) { + thread_ranks[thread_rank_in_grid()] = cg::thread_rank(cg::this_thread_block()); +} + +/** + * Test Description + * ------------------------ + * - Launches kernels that write the return values of size, thread_rank, group_index, and + * thread_index member functions to an output array that is validated on the host side. The kernels + * are run sequentially, reusing the output array, to avoid running out of device memory for large + * kernel launches. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Thread_Block_Getters_Positive_Basic") { + const auto blocks = GenerateBlockDimensions(); + const auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + const CPUGrid grid(blocks, threads); + + { + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + + thread_block_size_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), + grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + thread_block_thread_rank_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + // Validate thread_block.size() values + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, + [size = grid.threads_in_block_count_](uint32_t) { return size; }); + + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), + grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Validate thread_block.thread_rank() values + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, + [&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); }); + } + + { + LinearAllocGuard dim3_arr_dev(LinearAllocs::hipMalloc, grid.thread_count_ * sizeof(dim3)); + LinearAllocGuard dim3_arr(LinearAllocs::hipHostMalloc, grid.thread_count_ * sizeof(dim3)); + + thread_block_group_indices_getter<<>>(dim3_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(dim3_arr.ptr(), dim3_arr_dev.ptr(), + grid.thread_count_ * sizeof(*dim3_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + thread_block_thread_indices_getter<<>>(dim3_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + // Validate thread_block.group_index() values + ArrayAllOf(dim3_arr.ptr(), grid.thread_count_, + [&grid](uint32_t i) { return grid.block_idx(i).value(); }); + + HIP_CHECK(hipMemcpy(dim3_arr.ptr(), dim3_arr_dev.ptr(), + grid.thread_count_ * sizeof(*dim3_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Validate thread_block.thread_index() values + ArrayAllOf(dim3_arr.ptr(), grid.thread_count_, + [&grid](uint32_t i) { return grid.thread_idx(i).value(); }); + } +} + +/** + * Test Description + * ------------------------ + * - Launches kernels that write the return values of size and thread_rank member functions to an + * output array that is validated on the host side, while treating the thread block as a thread + * group. The kernels are run sequentially, reusing the output array, to avoid running out of device + * memory for large kernel launches. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Thread_Block_Getters_Via_Base_Type_Positive_Basic") { + const auto blocks = GenerateBlockDimensions(); + const auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + + const CPUGrid grid(blocks, threads); + + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + + thread_block_size_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), + grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + thread_block_thread_rank_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + // Validate thread_block.size() values + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, + [size = grid.threads_in_block_count_](uint32_t) { return size; }); + + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), + grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Validate thread_block.thread_rank() values + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, + [&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); }); +} + +/** + * Test Description + * ------------------------ + * - Launches kernels that write the return values of size and thread_rank non-member functions + * to an output array that is validated on the host side. The kernels are run sequentially, reusing + * the output array, to avoid running out of device memory for large kernel launches. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Thread_Block_Getters_Via_Non_Member_Functions_Positive_Basic") { + const auto blocks = GenerateBlockDimensions(); + const auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + + const CPUGrid grid(blocks, threads); + + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + + thread_block_non_member_size_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), + grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + thread_block_non_member_thread_rank_getter<<>>(uint_arr_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + // Validate thread_block.size() values + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, + [size = grid.threads_in_block_count_](uint32_t) { return size; }); + + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), + grid.thread_count_ * sizeof(*uint_arr.ptr()), hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // Validate thread_block.thread_rank() values + ArrayAllOf(uint_arr.ptr(), grid.thread_count_, + [&grid](uint32_t i) { return grid.thread_rank_in_block(i).value(); }); +} + + +template +__global__ void thread_block_sync_check(T* global_data, unsigned int* wait_modifiers, + unsigned int* read_offsets) { + extern __shared__ uint8_t shared_data[]; + T* const data = use_global ? global_data : reinterpret_cast(shared_data); + const auto block = cg::this_thread_block(); + constexpr T divisor = 255; + const auto tid = block.thread_rank(); + const auto wait_modifier = wait_modifiers[tid]; + const auto read_offset = read_offsets[tid]; + busy_wait(wait_modifier); + data[tid] = tid % divisor; + block.sync(); + bool valid = true; + for (auto i = 0; i < block.size(); ++i) { + const auto offset = block.size() + read_offset; + const auto expected = (tid + offset + i) % block.size(); + if (!(valid &= (data[expected] == expected % divisor))) { + break; + } + } + block.sync(); + data[tid] = valid; + if constexpr (!use_global) { + global_data[tid] = data[tid]; + } +} + +static inline std::mt19937& GetRandomGenerator() { + // With a static seed the tests will remain consistent between runs, yet it relieves the problem + // of predetermining a set of modifiers by hand. The sets of modifiers could actually be + // determined at compile time if std::random objects could operate in a constexpr context. + static std::mt19937 mt(17); + return mt; +} + +template static inline T GenerateRandomInteger(const T min, const T max) { + std::uniform_int_distribution dist(min, max); + return dist(GetRandomGenerator()); +} + +template void ThreadBlockSyncTest() { + const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations)); + INFO("Run number: " << randomized_run_count + 1); + const auto blocks = dim3(1, 1, 1); + const auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + int max_shared_mem_per_block = 0; + HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block, + hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + if (!global_memory && max_shared_mem_per_block < alloc_size) { + return; + } + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + + LinearAllocGuard wait_modifiers_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard wait_modifiers(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_, + [&] { return GenerateRandomInteger(0u, 1500u); }); + + LinearAllocGuard read_offsets_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + std::vector read_offsets(grid.thread_count_, 0u); + if (randomized_run_count != 0) { + std::generate(read_offsets.begin(), read_offsets.end(), + [&] { return GenerateRandomInteger(0u, grid.thread_count_); }); + } + + const auto shared_memory_size = global_memory ? 0u : alloc_size; + HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(), + grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(read_offsets_dev.ptr(), read_offsets.data(), + grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice)); + + thread_block_sync_check<<>>( + arr_dev.ptr(), wait_modifiers_dev.ptr(), read_offsets_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(std::all_of(arr.ptr(), arr.ptr() + grid.thread_count_, [](unsigned int e) { return e; })); +} + +/** + * Test Description + * ------------------------ + * - Launches a kernel wherein every thread writes its grid-wide linear index into an array. The + * array is either in global or dynamic shared memory based on a compile time switch, and the test + * is run for arrays of 1, 2, and 4 byte elements. Before the write each thread executes a busy wait + * loop for a random amount of clock cycles, the amount being read from an input array. After the + * write a block-wide sync is performed and each thread validates that it can read the expected + * values that other threads have written to their respective array slots. Each thread begins the + * validation from a given offset from its own index. For the first run of the test, all the offsets + * are zero, so memory reads should be coalesced as adjacent threads read from adjacent memory + * locations. On subsequent runs the offsets are randomized for each thread, leading to + * non-coalesced reads and cache thrashing. + * Test source + * ------------------------ + * - unit/cooperativeGrps/thread_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Thread_Block_Sync_Positive_Basic", "", uint8_t, uint16_t, uint32_t) { + SECTION("Global memory") { ThreadBlockSyncTest(); } + SECTION("Shared memory") { ThreadBlockSyncTest(); } +} From d1bebf302f02c76cab6063353e92ef3e17ee6244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:30:45 +0100 Subject: [PATCH 19/19] EXSWHTEC-249 - Implement Performance Tests for Memcpy APIs #119 Change-Id: Ib04fe4dd3efce92d7c7bfc8f0c75abd8e9dfe7be [ROCm/hip-tests commit: e3bac85a3cb9c026d6d61d889f17ac05c89fe355] --- .../catch/include/hip_test_defgroups.hh | 5 +- .../catch/performance/CMakeLists.txt | 1 + .../catch/performance/memcpy/CMakeLists.txt | 52 +++++ .../catch/performance/memcpy/hipMemcpy.cc | 190 +++++++++++++++++ .../catch/performance/memcpy/hipMemcpy2D.cc | 183 +++++++++++++++++ .../performance/memcpy/hipMemcpy2DAsync.cc | 188 +++++++++++++++++ .../memcpy/hipMemcpy2DFromArray.cc | 127 ++++++++++++ .../memcpy/hipMemcpy2DFromArrayAsync.cc | 133 ++++++++++++ .../performance/memcpy/hipMemcpy2DToArray.cc | 127 ++++++++++++ .../memcpy/hipMemcpy2DToArrayAsync.cc | 133 ++++++++++++ .../catch/performance/memcpy/hipMemcpy3D.cc | 189 +++++++++++++++++ .../performance/memcpy/hipMemcpy3DAsync.cc | 192 +++++++++++++++++ .../performance/memcpy/hipMemcpyAsync.cc | 192 +++++++++++++++++ .../catch/performance/memcpy/hipMemcpyAtoH.cc | 69 +++++++ .../catch/performance/memcpy/hipMemcpyDtoD.cc | 103 ++++++++++ .../performance/memcpy/hipMemcpyDtoDAsync.cc | 106 ++++++++++ .../catch/performance/memcpy/hipMemcpyDtoH.cc | 72 +++++++ .../performance/memcpy/hipMemcpyDtoHAsync.cc | 75 +++++++ .../performance/memcpy/hipMemcpyFromSymbol.cc | 116 +++++++++++ .../memcpy/hipMemcpyFromSymbolAsync.cc | 122 +++++++++++ .../catch/performance/memcpy/hipMemcpyHtoA.cc | 69 +++++++ .../catch/performance/memcpy/hipMemcpyHtoD.cc | 70 +++++++ .../performance/memcpy/hipMemcpyHtoDAsync.cc | 74 +++++++ .../performance/memcpy/hipMemcpyParam2D.cc | 188 +++++++++++++++++ .../memcpy/hipMemcpyParam2DAsync.cc | 193 ++++++++++++++++++ .../performance/memcpy/hipMemcpyToSymbol.cc | 109 ++++++++++ .../memcpy/hipMemcpyToSymbolAsync.cc | 116 +++++++++++ .../performance/memcpy/hipMemcpyWithStream.cc | 192 +++++++++++++++++ .../memcpy/memcpy_performance_common.hh | 117 +++++++++++ 29 files changed, 3501 insertions(+), 2 deletions(-) create mode 100644 projects/hip-tests/catch/performance/memcpy/CMakeLists.txt create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc create mode 100644 projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh diff --git a/projects/hip-tests/catch/include/hip_test_defgroups.hh b/projects/hip-tests/catch/include/hip_test_defgroups.hh index 3b276b6897..680dfa8a04 100644 --- a/projects/hip-tests/catch/include/hip_test_defgroups.hh +++ b/projects/hip-tests/catch/include/hip_test_defgroups.hh @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2021 - 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -186,7 +186,8 @@ THE SOFTWARE. * @} */ - /** +/** + * @defgroup ComplexTest Complex type * @{ * This section describes tests for the Complex type functions. * @} diff --git a/projects/hip-tests/catch/performance/CMakeLists.txt b/projects/hip-tests/catch/performance/CMakeLists.txt index c9242ecebc..2778dab03d 100644 --- a/projects/hip-tests/catch/performance/CMakeLists.txt +++ b/projects/hip-tests/catch/performance/CMakeLists.txt @@ -19,6 +19,7 @@ # THE SOFTWARE. add_subdirectory(memset) +add_subdirectory(memcpy) add_subdirectory(kernelLaunch) add_subdirectory(stream) add_subdirectory(event) diff --git a/projects/hip-tests/catch/performance/memcpy/CMakeLists.txt b/projects/hip-tests/catch/performance/memcpy/CMakeLists.txt new file mode 100644 index 0000000000..e3ed71d9b5 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/CMakeLists.txt @@ -0,0 +1,52 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + hipMemcpy.cc + hipMemcpyAsync.cc + hipMemcpyWithStream.cc + hipMemcpyAtoH.cc + hipMemcpyHtoA.cc + hipMemcpyDtoD.cc + hipMemcpyDtoDAsync.cc + hipMemcpyDtoH.cc + hipMemcpyDtoHAsync.cc + hipMemcpyHtoD.cc + hipMemcpyHtoDAsync.cc + hipMemcpyToSymbol.cc + hipMemcpyToSymbolAsync.cc + hipMemcpyFromSymbol.cc + hipMemcpyFromSymbolAsync.cc + hipMemcpy2D.cc + hipMemcpy2DAsync.cc + hipMemcpy2DToArray.cc + hipMemcpy2DToArrayAsync.cc + hipMemcpy2DFromArray.cc + hipMemcpy2DFromArrayAsync.cc + hipMemcpyParam2D.cc + hipMemcpyParam2DAsync.cc + hipMemcpy3D.cc + hipMemcpy3DAsync.cc +) + +hip_add_exe_to_target(NAME MemcpyPerformance + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS -std=c++17) diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc new file mode 100644 index 0000000000..d9fd8cf6ba --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy.cc @@ -0,0 +1,190 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + * Contains performance tests for all memcpy HIP APIs. + */ + +class MemcpyBenchmark : public Benchmark { + public: + void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy(dst, src, size, kind)); + } + } +}; + +static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type, + size_t size, hipMemcpyKind kind, bool enable_peer_access=false) { + MemcpyBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type)); + benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type)); + + if (kind != hipMemcpyDeviceToDevice) { + LinearAllocGuard src_allocation(src_allocation_type, size); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind); + } else { + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(src_allocation_type, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_DeviceToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_HostToDevice") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Host to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_HostToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy_DeviceToDevice_DisablePeerAccess") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc new file mode 100644 index 0000000000..e4f0302d88 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2D.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy2D(dst, dst_pitch, src, src_pitch, width, height, kind)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy2DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + hipMemcpyDeviceToHost); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), + hipMemcpyHostToDevice); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(), + width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + hipMemcpyDeviceToDevice); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2D.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2D_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc new file mode 100644 index 0000000000..a93a4aadcf --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DAsync.cc @@ -0,0 +1,188 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy2DAsync(dst, dst_pitch, src, src_pitch, width, height, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy2DAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + hipMemcpyDeviceToHost, stream); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, + device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), + hipMemcpyHostToDevice, stream); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), src_allocation.ptr(), + width * sizeof(int), width * sizeof(int), height, hipMemcpyHostToHost, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + hipMemcpyDeviceToDevice, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc new file mode 100644 index 0000000000..15c61f6e43 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArray.cc @@ -0,0 +1,127 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DFromArrayBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy2DFromArray(dst, dst_pitch, src, 0, 0, width, height, kind)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DFromArrayBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(host_allocation.ptr(), width * sizeof(int), array_allocation.ptr(), + width * sizeof(int), height, hipMemcpyDeviceToHost); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + array_allocation.ptr(), device_allocation.width(), + device_allocation.height(), hipMemcpyDeviceToDevice); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArray` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToHost") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArray` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArray` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArray.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArray_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc new file mode 100644 index 0000000000..9a56d16e74 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DFromArrayAsync.cc @@ -0,0 +1,133 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DFromArrayAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, hipArray_const_t src, size_t width, size_t height, + hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy2DFromArrayAsync(dst, dst_pitch, src, 0, 0, width, height, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DFromArrayAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(host_allocation.ptr(), width * sizeof(int), + array_allocation.ptr(), width * sizeof(int), + height, hipMemcpyDeviceToHost, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + array_allocation.ptr(), device_allocation.width(), + device_allocation.height(), hipMemcpyDeviceToDevice, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArrayAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DFromArrayAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DFromArrayAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DFromArrayAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc new file mode 100644 index 0000000000..5aed8c5a20 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArray.cc @@ -0,0 +1,127 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DToArrayBenchmark : public Benchmark { + public: + void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy2DToArray(dst, 0, 0, src, src_pitch, width, height, kind)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DToArrayBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyHostToDevice) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), width * sizeof(int), + width * sizeof(int), height, hipMemcpyHostToDevice); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), hipMemcpyDeviceToDevice); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArray` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArray_HostToDevice") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArray` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArray.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArray` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArray.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArray_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc new file mode 100644 index 0000000000..c418f1b039 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy2DToArrayAsync.cc @@ -0,0 +1,133 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy2DToArrayAsyncBenchmark : public Benchmark { + public: + void operator()(hipArray* dst, const void* src, size_t src_pitch, size_t width, + size_t height, hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy2DToArrayAsync(dst, 0, 0, src, src_pitch, width, height, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + Memcpy2DToArrayAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyHostToDevice) { + size_t allocation_size = width * height * sizeof(int); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), + width * sizeof(int), width * sizeof(int), height, + hipMemcpyHostToDevice, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D device_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + ArrayAllocGuard array_allocation(make_hipExtent(width, height, 0), hipArrayDefault); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(array_allocation.ptr(), device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + hipMemcpyDeviceToDevice, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArrayAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArrayAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy2DToArrayAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 8 KB x 32 B + * - Large: 16 KB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy2DToArrayAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy2DToArrayAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 8_KB, 16_KB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc new file mode 100644 index 0000000000..312043b186 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3D.cc @@ -0,0 +1,189 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy3DBenchmark : public Benchmark { + public: + void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr, + const hipExtent extent, hipMemcpyKind kind) { + hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0), + src_ptr, make_hipPos(0, 0, 0), + extent, kind); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpy3D(¶ms)); + } + } +}; + +static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy3DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height) + + ", " + std::to_string(extent.depth) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height()), + device_allocation.pitched_ptr(), device_allocation.extent(), kind); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(device_allocation.pitched_ptr(), + make_hipPitchedPtr(host_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height()), + device_allocation.extent(), kind); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height), + make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height), + extent, kind); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard3D src_allocation(extent); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard3D dst_allocation(extent); + + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(), + dst_allocation.extent(), kind); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3D.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3D_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc new file mode 100644 index 0000000000..e1733e11c1 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpy3DAsync.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class Memcpy3DAsyncBenchmark : public Benchmark { + public: + void operator()(const hipPitchedPtr& dst_ptr, const hipPitchedPtr& src_ptr, + const hipExtent extent, hipMemcpyKind kind, const hipStream_t& stream) { + hipMemcpy3DParms params = CreateMemcpy3DParam(dst_ptr, make_hipPos(0, 0, 0), + src_ptr, make_hipPos(0, 0, 0), + extent, kind); + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpy3DAsync(¶ms, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(const hipExtent extent, hipMemcpyKind kind, bool enable_peer_access=false) { + Memcpy3DAsyncBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(extent.width) + ", " + std::to_string(extent.height) + + ", " + std::to_string(extent.depth) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(make_hipPitchedPtr(host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height()), + device_allocation.pitched_ptr(), device_allocation.extent(), kind, stream); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.pitch() * + device_allocation.height() * device_allocation.depth()); + benchmark.Run(device_allocation.pitched_ptr(), + make_hipPitchedPtr(host_allocation.ptr(), + device_allocation.pitch(), + device_allocation.width(), + device_allocation.height()), + device_allocation.extent(), kind, stream); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard3D device_allocation(extent); + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, extent.width * + extent.height * extent.depth); + benchmark.Run(make_hipPitchedPtr(dst_allocation.ptr(), extent.width, extent.width, extent.height), + make_hipPitchedPtr(src_allocation.ptr(), extent.width, extent.width, extent.height), + extent, kind, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard3D src_allocation(extent); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard3D dst_allocation(extent); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.pitched_ptr(), src_allocation.pitched_ptr(), + dst_allocation.extent(), kind, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy3DAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 16 B x 4 B + * - Medium: 4 MB x 16 B x 4 B + * - Large: 16 MB x 16 B x 4 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpy3DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpy3DAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(make_hipExtent(width, 16, 4), hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc new file mode 100644 index 0000000000..b04bbd1b11 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAsync.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyAsync(dst, src, size, kind, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type, + size_t size, hipMemcpyKind kind, bool enable_peer_access=false) { + MemcpyAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type)); + benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type)); + + const StreamGuard stream_guard{Streams::created}; + const hipStream_t stream = stream_guard.stream(); + if (kind != hipMemcpyDeviceToDevice) { + LinearAllocGuard src_allocation(src_allocation_type, size); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } else { + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(src_allocation_type, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_DeviceToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_HostToDevice") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_HostToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_DisablePeerAccess") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc new file mode 100644 index 0000000000..f30f1b39fb --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyAtoH.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyAtoHBenchmark : public Benchmark { + public: + void operator()(void* dst, hipArray* src_array, size_t allocation_size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyAtoH(dst, src_array, 0, allocation_size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) { + MemcpyAtoHBenchmark benchmark; + benchmark.AddSectionName(std::to_string(width)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + size_t allocation_size = width * sizeof(int); + LinearAllocGuard host_allocation(host_allocation_type, allocation_size); + ArrayAllocGuard array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault); + benchmark.Run(host_allocation.ptr(), array_allocation.ptr(), allocation_size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyAtoH` from Device array to Host: + * -# Allocation size + * - Small: 512 B + * - Medium: 1024 B + * - Large: 4096 B + * -# Allocation type + * - Host: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyAtoH.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyAtoH") { + const auto allocation_size = GENERATE(512, 1024, 4096); + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc new file mode 100644 index 0000000000..fc300d1755 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoD.cc @@ -0,0 +1,103 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoDBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyDtoD(dst, src, size)); + } + } +}; + +static void RunBenchmark(size_t size, bool enable_peer_access=false) { + MemcpyDtoDBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(src_device)); + + benchmark.Run(reinterpret_cast(dst_allocation.ptr()), + reinterpret_cast(src_allocation.ptr()), size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoD` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoD.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessEnabled") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size, true); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoD.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoD_PeerAccessDisabled") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc new file mode 100644 index 0000000000..c7b9a86e38 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoDAsync.cc @@ -0,0 +1,106 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoDAsyncBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyDtoDAsync(dst, src, size, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t size, bool enable_peer_access=false) { + MemcpyDtoDAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(reinterpret_cast(dst_allocation.ptr()), + reinterpret_cast(src_allocation.ptr()), + size, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoDAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoDAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessEnabled") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size, true); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoD` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoDAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoDAsync_PeerAccessDisabled") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc new file mode 100644 index 0000000000..9f5c0503d2 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoH.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoHBenchmark : public Benchmark { + public: + void operator()(void* dst, const hipDeviceptr_t& src, size_t size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyDtoH(dst, src, size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyDtoHBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(host_allocation.ptr(), + reinterpret_cast(device_allocation.ptr()), + size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoH` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoH.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoH") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc new file mode 100644 index 0000000000..e07b8c2125 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyDtoHAsync.cc @@ -0,0 +1,75 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyDtoHAsyncBenchmark : public Benchmark { + public: + void operator()(void* dst, const hipDeviceptr_t& src, size_t size, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyDtoHAsync(dst, src, size, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyDtoHAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(host_allocation.ptr(), + reinterpret_cast(device_allocation.ptr()), + size, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyDtoHAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyDtoHAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyDtoHAsync") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc new file mode 100644 index 0000000000..62adee7597 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbol.cc @@ -0,0 +1,116 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyFromSymbolBenchmark : public Benchmark { + public: + void operator()(const void* source, void* result, size_t size, size_t offset) { + HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset)); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyFromSymbol(result, HIP_SYMBOL(devSymbol), size, offset)); + } + } +}; + +static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) { + MemcpyFromSymbolBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + benchmark.Run(source, result, size, offset); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbol` from Device to Host. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbol_SingularValue") { + int set{42}; + int result{0}; + RunBenchmark(&set, &result); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbol` from Device to Host. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbol_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + RunBenchmark(array, result, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbol` from Device to Host. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbol_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc new file mode 100644 index 0000000000..c10a66e48a --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyFromSymbolAsync.cc @@ -0,0 +1,122 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyFromSymbolAsyncBenchmark : public Benchmark { + public: + void operator()(const void* source, void* result, size_t size, size_t offset, const hipStream_t& stream) { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset, + hipMemcpyHostToDevice, stream)); + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyFromSymbolAsync(result, HIP_SYMBOL(devSymbol), size, offset, + hipMemcpyDeviceToHost, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(const void* source, void* result, size_t size=1, size_t offset=0) { + MemcpyFromSymbolAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + benchmark.Run(source, result, size, offset, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbolAsync` from Device to Host. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbolAsync_SingularValue") { + int set{42}; + int result{0}; + RunBenchmark(&set, &result); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbolAsync` from Device to Host. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbolAsync_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + RunBenchmark(array, result, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyFromSymbolAsync` from Device to Host. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 512 KB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyFromSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyFromSymbolAsync_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 512_KB); + int array[size]; + std::fill_n(array, size, 42); + int result[size]; + std::fill_n(result, size, 0); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, result + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc new file mode 100644 index 0000000000..0f89845fb2 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoA.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyHtoABenchmark : public Benchmark { + public: + void operator()(hipArray* dst_array, const void* src, size_t allocation_size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyHtoA(dst_array, 0, src, allocation_size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, size_t width) { + MemcpyHtoABenchmark benchmark; + benchmark.AddSectionName(std::to_string(width)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + size_t allocation_size = width * sizeof(int); + ArrayAllocGuard array_allocation(make_hipExtent(width, 0, 0), hipArrayDefault); + LinearAllocGuard host_allocation(host_allocation_type, allocation_size); + benchmark.Run(array_allocation.ptr(), host_allocation.ptr(), allocation_size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyHtoA` from Host to Device array: + * -# Allocation size + * - Small: 512 B + * - Medium: 1024 B + * - Large: 4096 B + * -# Allocation type + * - Host: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyHtoA.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyHtoA") { + const auto allocation_size = GENERATE(512, 1024, 4096); + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc new file mode 100644 index 0000000000..48989e51f9 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoD.cc @@ -0,0 +1,70 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyHtoDBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, void* src, size_t size) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyHtoD(dst, src, size)); + } + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyHtoDBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(reinterpret_cast(device_allocation.ptr()), host_allocation.ptr(), size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyHtoD` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyHtoD.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyHtoD") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc new file mode 100644 index 0000000000..f9a468eb8f --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyHtoDAsync.cc @@ -0,0 +1,74 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyHtoDAsyncBenchmark : public Benchmark { + public: + void operator()(hipDeviceptr_t& dst, void* src, size_t size, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyHtoDAsync(dst, src, size, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(LinearAllocs host_allocation_type, LinearAllocs device_allocation_type, size_t size) { + MemcpyHtoDAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(host_allocation_type)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + LinearAllocGuard device_allocation(device_allocation_type, size); + LinearAllocGuard host_allocation(host_allocation_type, size); + benchmark.Run(reinterpret_cast(device_allocation.ptr()), + host_allocation.ptr(), size, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyHtoD` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyHtoDAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyHtoDAsync") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto device_allocation_type = LinearAllocs::hipMalloc; + const auto host_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(host_allocation_type, device_allocation_type, allocation_size); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc new file mode 100644 index 0000000000..ca41c74818 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2D.cc @@ -0,0 +1,188 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyParam2DBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch, + size_t width, size_t height, hipMemcpyKind kind) { + hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch, + width, height, kind); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyParam2D(¶ms)); + } + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + MemcpyParam2DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), kind); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), kind); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), + src_allocation.ptr(), width * sizeof(int), + width * sizeof(int), height, kind); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + HIP_CHECK(hipSetDevice(src_device)); + + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + kind); + } +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2D` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2D.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2D_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc new file mode 100644 index 0000000000..b83009cc01 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyParam2DAsync.cc @@ -0,0 +1,193 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyParam2DBenchmark : public Benchmark { + public: + void operator()(void* dst, size_t dst_pitch, void* src, size_t src_pitch, + size_t width, size_t height, hipMemcpyKind kind, const hipStream_t& stream) { + hip_Memcpy2D params = CreateMemcpy2DParam(dst, dst_pitch, src, src_pitch, + width, height, kind); + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyParam2DAsync(¶ms, stream)); + } + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(size_t width, size_t height, hipMemcpyKind kind, + bool enable_peer_access=false) { + MemcpyParam2DBenchmark benchmark; + benchmark.AddSectionName("(" + std::to_string(width) + ", " + std::to_string(height) + ")"); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind == hipMemcpyDeviceToHost) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(host_allocation.ptr(), device_allocation.width(), + device_allocation.ptr(), device_allocation.pitch(), + device_allocation.width(), device_allocation.height(), + kind, stream); + } else if (kind == hipMemcpyHostToDevice) { + LinearAllocGuard2D device_allocation(width, height); + LinearAllocGuard host_allocation(LinearAllocs::hipHostMalloc, device_allocation.width() * height); + benchmark.Run(device_allocation.ptr(), device_allocation.pitch(), + host_allocation.ptr(), device_allocation.width(), + device_allocation.width(), device_allocation.height(), + kind, stream); + } else if (kind == hipMemcpyHostToHost) { + LinearAllocGuard src_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + LinearAllocGuard dst_allocation(LinearAllocs::hipHostMalloc, width * sizeof(int) * height); + benchmark.Run(dst_allocation.ptr(), width * sizeof(int), + src_allocation.ptr(), width * sizeof(int), + width * sizeof(int), height, kind, stream); + } else { + // hipMemcpyDeviceToDevice + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard2D src_allocation(width, height); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard2D dst_allocation(width, height); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), dst_allocation.pitch(), + src_allocation.ptr(), src_allocation.pitch(), + dst_allocation.width(), dst_allocation.height(), + kind, stream); + } +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Device to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Host to Device: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToDevice") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToDevice); +} + +#if HT_NVIDIA +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Host to Host: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_HostToHost") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyHostToHost); +} +#endif + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_DisablePeerAccess") { + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyParam2DAsync` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB x 32 B + * - Medium: 4 MB x 32 B + * - Large: 16 MB x 32 B + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyParam2DAsync.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyParam2DAsync_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto width = GENERATE(4_KB, 4_MB, 16_MB); + RunBenchmark(width, 32, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc new file mode 100644 index 0000000000..ba8c8456cf --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbol.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyToSymbolBenchmark : public Benchmark { + public: + void operator()(const void* source, size_t size, size_t offset) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(devSymbol), source, size, offset)); + } + } +}; + +static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) { + MemcpyToSymbolBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + benchmark.Run(source, size, offset); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbol` from Host to Device. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbol_SingularValue") { + int set{42}; + RunBenchmark(&set); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbol` from Host to Device. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbol_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + RunBenchmark(array, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbol` from Host to Device. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbol.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbol_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc new file mode 100644 index 0000000000..bdeb73a4a5 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyToSymbolAsync.cc @@ -0,0 +1,116 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" +#pragma clang diagnostic ignored "-Wvla-extension" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +__device__ int devSymbol[1_MB]; + +class MemcpyToSymbolAsyncBenchmark : public Benchmark { + public: + void operator()(const void* source, size_t size, size_t offset, const hipStream_t& stream) { + TIMED_SECTION_STREAM(kTimerTypeEvent, stream) { + HIP_CHECK(hipMemcpyToSymbolAsync(HIP_SYMBOL(devSymbol), source, size, offset, + hipMemcpyHostToDevice, stream)); + } + + HIP_CHECK(hipStreamSynchronize(stream)); + } +}; + +static void RunBenchmark(const void* source, size_t size=1, size_t offset=0) { + MemcpyToSymbolAsyncBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(std::to_string(offset)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + benchmark.Run(source, size, offset, stream); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbolAsync` from Host to Device. + * - Utilizes sigular integer values. + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbolAsync_SingularValue") { + int set{42}; + RunBenchmark(&set); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbolAsync` from Host to Device. + * - Utilizes array integers: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbolAsync_ArrayValue") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + RunBenchmark(array, sizeof(int) * size); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyToSymbolAsync` from Host to Device. + * - Utilizes array integers with offsets: + * - Small: 1 KB + * - Medium: 4 KB + * - Large: 1 MB + * - Offset: 0 and size/2 + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyToSymbolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyToSymbolAsync_WithOffset") { + size_t size = GENERATE(1_KB, 4_KB, 1_MB); + int array[size]; + std::fill_n(array, size, 42); + + size_t offset = GENERATE_REF(0, size / 2); + RunBenchmark(array + offset, sizeof(int) * (size - offset), offset * sizeof(int)); +} diff --git a/projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc b/projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc new file mode 100644 index 0000000000..3ec12ef4f6 --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/hipMemcpyWithStream.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "memcpy_performance_common.hh" + +/** + * @addtogroup memcpy memcpy + * @{ + * @ingroup PerformanceTest + */ + +class MemcpyWithStreamBenchmark : public Benchmark { + public: + void operator()(void* dst, const void* src, size_t size, hipMemcpyKind kind, hipStream_t stream) { + TIMED_SECTION(kTimerTypeCpu) { + HIP_CHECK(hipMemcpyWithStream(dst, src, size, kind, stream)); + } + } +}; + +static void RunBenchmark(LinearAllocs dst_allocation_type, LinearAllocs src_allocation_type, + size_t size, hipMemcpyKind kind, bool enable_peer_access=false) { + MemcpyWithStreamBenchmark benchmark; + benchmark.AddSectionName(std::to_string(size)); + benchmark.AddSectionName(GetAllocationSectionName(src_allocation_type)); + benchmark.AddSectionName(GetAllocationSectionName(dst_allocation_type)); + + const StreamGuard stream_guard(Streams::created); + const hipStream_t stream = stream_guard.stream(); + + if (kind != hipMemcpyDeviceToDevice) { + LinearAllocGuard src_allocation(src_allocation_type, size); + LinearAllocGuard dst_allocation(dst_allocation_type, size); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } else { + int src_device = std::get<0>(GetDeviceIds(enable_peer_access)); + int dst_device = std::get<1>(GetDeviceIds(enable_peer_access)); + + LinearAllocGuard src_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(dst_device)); + LinearAllocGuard dst_allocation(LinearAllocs::hipMalloc, size); + HIP_CHECK(hipSetDevice(src_device)); + benchmark.Run(dst_allocation.ptr(), src_allocation.ptr(), size, kind, stream); + } +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Device to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_DeviceToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Host to Device: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_HostToDevice") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Host to Host: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: host pinned and pageable + * - Destination: host pinned and pageable + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_HostToHost") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + const auto dst_allocation_type = GENERATE(LinearAllocs::malloc, LinearAllocs::hipHostMalloc); + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyHostToHost); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpy` from Device to Device with peer access disabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_DisablePeerAccess") { + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice); +} + +/** + * Test Description + * ------------------------ + * - Executes `hipMemcpyWithStream` from Device to Device with peer access enabled: + * -# Allocation size + * - Small: 4 KB + * - Medium: 4 MB + * - Large: 16 MB + * -# Allocation type + * - Source: device malloc + * - Destination: device malloc + * Test source + * ------------------------ + * - performance/memcpy/hipMemcpyWithStream.cc + * Test requirements + * ------------------------ + * - Multi-device + * - Device supports Peer-to-Peer access + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Performance_hipMemcpyWithStream_DeviceToDevice_EnablePeerAccess") { + if (HipTest::getDeviceCount() < 2) { + HipTest::HIP_SKIP_TEST("This test requires 2 GPUs. Skipping."); + return; + } + const auto allocation_size = GENERATE(4_KB, 4_MB, 16_MB); + const auto src_allocation_type = LinearAllocs::hipMalloc; + const auto dst_allocation_type = LinearAllocs::hipMalloc; + RunBenchmark(dst_allocation_type, src_allocation_type, allocation_size, hipMemcpyDeviceToDevice, true); +} diff --git a/projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh b/projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh new file mode 100644 index 0000000000..6ab6e26bae --- /dev/null +++ b/projects/hip-tests/catch/performance/memcpy/memcpy_performance_common.hh @@ -0,0 +1,117 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +static hip_Memcpy2D CreateMemcpy2DParam(void* dst, size_t dpitch, void* src, size_t spitch, + size_t width, size_t height, hipMemcpyKind kind) { + hip_Memcpy2D params = {}; + const hipExtent src_offset = {}; + const hipExtent dst_offset = {}; + params.dstPitch = dpitch; + switch (kind) { + case hipMemcpyDeviceToHost: + case hipMemcpyHostToHost: + #if HT_AMD + params.dstMemoryType = hipMemoryTypeHost; + #else + params.dstMemoryType = CU_MEMORYTYPE_HOST; + #endif + params.dstHost = dst; + break; + case hipMemcpyDeviceToDevice: + case hipMemcpyHostToDevice: + #if HT_AMD + params.dstMemoryType = hipMemoryTypeDevice; + #else + params.dstMemoryType = CU_MEMORYTYPE_DEVICE; + #endif + params.dstDevice = reinterpret_cast(dst); + break; + default: + REQUIRE(false); + } + + params.srcPitch = dpitch; + switch (kind) { + case hipMemcpyDeviceToHost: + case hipMemcpyHostToHost: + #if HT_AMD + params.srcMemoryType = hipMemoryTypeHost; + #else + params.srcMemoryType = CU_MEMORYTYPE_HOST; + #endif + params.srcHost = src; + break; + case hipMemcpyDeviceToDevice: + case hipMemcpyHostToDevice: + #if HT_AMD + params.srcMemoryType = hipMemoryTypeDevice; + #else + params.srcMemoryType = CU_MEMORYTYPE_DEVICE; + #endif + params.srcDevice = reinterpret_cast(src); + break; + default: + REQUIRE(false); + } + + params.WidthInBytes = width; + params.Height = height; + params.srcXInBytes = src_offset.width; + params.srcY = src_offset.height; + params.dstXInBytes = dst_offset.width; + params.dstY = dst_offset.height; + + return params; +} + +static hipMemcpy3DParms CreateMemcpy3DParam(hipPitchedPtr dst_ptr, hipPos dst_pos, + hipPitchedPtr src_ptr, hipPos src_pos, + hipExtent extent, hipMemcpyKind kind) { + hipMemcpy3DParms params = {}; + params.dstPtr = dst_ptr; + params.dstPos = dst_pos; + params.srcPtr = src_ptr; + params.srcPos = src_pos; + params.extent = extent; + params.kind = kind; + return params; +} + +static std::tuple GetDeviceIds(bool enable_peer_access) { + int src_device = 0; + int dst_device = 1; + + if (enable_peer_access) { + int can_access_peer = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); + if (!can_access_peer) { + INFO("Peer access cannot be enabled between devices " << src_device << " and " << dst_device); + REQUIRE(can_access_peer); + } + HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); + } else { + dst_device = 0; + } + + return {src_device, dst_device}; +}