From ff14fb30bf8bd074b4dbc77c164dcc17ff3fd320 Mon Sep 17 00:00:00 2001 From: "Gollamandala, Srinivasarao" Date: Tue, 6 May 2025 08:14:52 +0530 Subject: [PATCH] SWDEV-513197-[catch2][dtest]PerfTest-Improve launch performance for Device Heap kernels (#159) [ROCm/hip-tests commit: 327edf98b357e0b1830179e878d423d917f5b3f9] --- .../catch/perftests/memory/CMakeLists.txt | 1 + .../memory/hipPerfDeviceHeapMemory.cc | 96 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 projects/hip-tests/catch/perftests/memory/hipPerfDeviceHeapMemory.cc diff --git a/projects/hip-tests/catch/perftests/memory/CMakeLists.txt b/projects/hip-tests/catch/perftests/memory/CMakeLists.txt index 2a8bb2cac9..053d2f0efc 100644 --- a/projects/hip-tests/catch/perftests/memory/CMakeLists.txt +++ b/projects/hip-tests/catch/perftests/memory/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_SRC hipPerfBufferCopySpeedP2P.cc hipPerfBufferCopySpeedAll2All.cc hipPerfMempool.cc + hipPerfDeviceHeapMemory.cc ) if(HIP_PLATFORM MATCHES "amd") diff --git a/projects/hip-tests/catch/perftests/memory/hipPerfDeviceHeapMemory.cc b/projects/hip-tests/catch/perftests/memory/hipPerfDeviceHeapMemory.cc new file mode 100644 index 0000000000..2b8e7dfa08 --- /dev/null +++ b/projects/hip-tests/catch/perftests/memory/hipPerfDeviceHeapMemory.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include +__global__ void mallocTest() { + size_t size = 123; + char* ptr = (char*)malloc(size); + memset(ptr, 0, size); + free(ptr); +} +__global__ void mallocTest_1() +{ + size_t size = 1024; + int* ptr = (int*)malloc(size); + memset(ptr, 0, size); + free(ptr); +} +/** + * The tests in this file are added to see the performance improvement with the + * Improve launch perf for Device Heap kernels task : SWDEV-513197 + */ +/** + * @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL + * @{ + * @ingroup PerformanceTest + */ +/** + * Test Description + * ------------------------ + * - This test case, tests the following scenario : + * - 1) Create kernel call + * - 2) In the kernel allocoate device heap memory + * - 3) If any kernel uses device heap, the launch needs to be preceeded by an + init kernel, Save on the extra barrier packet launch/flush between the + init heap kernel and user kernel + * - 4) Capture above Kernel Latency. + * - 5) Kernel Laterncy has to be improved with the feature + * 'Improve launch perf for Device Heap kernels. Task : SWDEV-513197' + * - 6) Sencond Kernel Latency will be less compared to first + * launch latency due to the absense of init kernel launch. + * Test source + * ------------------------ + * - catch/perftests/memory/hipPerfDeviceHeapMemory.cc + * + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.5 + */ +TEST_CASE("Unit_Perf_Device_Heap_Memory_Allocation") { + HIP_CHECK(hipDeviceSetLimit(hipLimitMallocHeapSize, 128*1024*1024)); + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + REQUIRE(event != nullptr); + hipStream_t stream{nullptr}; + HIP_CHECK(hipStreamCreate(&stream)); + REQUIRE(stream != nullptr); + HIP_CHECK(hipEventRecord(event, stream)); + HIP_CHECK(hipEventSynchronize(event)); + HIP_CHECK(hipStreamSynchronize(stream)); + // First Kernel Launch + auto start = std::chrono::system_clock::now(); + mallocTest<<<1, 5, 0, stream>>>(); + auto end = std::chrono::system_clock::now(); + // Second Kernel Launch + mallocTest_1<<<1, 5, 0, stream>>>(); + HIP_CHECK(hipDeviceSynchronize()); + auto end_1 = std::chrono::system_clock::now(); + auto time = std::chrono::duration_cast(end - start).count(); + auto time_1 = std::chrono::duration_cast(end_1 - end).count(); + REQUIRE(time > time_1); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK(hipStreamDestroy(stream)); + std::cout<<"First Kernel Latency: "<