From 456001c3085ebe22207fa391493faa52077cdfe8 Mon Sep 17 00:00:00 2001 From: Rupam Chetia Date: Mon, 6 May 2024 16:16:55 +0530 Subject: [PATCH] SWDEV-311271 - [catch2][dtest] Adding test for mempool and stream ordered memory APIs Change-Id: Iddeb111e4b512bfc7422abc8e784b0a8e8fb133d --- catch/hipTestMain/config/config_amd_linux | 4 + catch/hipTestMain/config/config_amd_windows | 4 + catch/unit/memory/CMakeLists.txt | 2 + catch/unit/memory/hipDeviceGetMemPool.cc | 181 +++++ catch/unit/memory/hipDeviceSetMemPool.cc | 195 +++++ catch/unit/memory/hipMallocAsync.cc | 551 ++++++++++++- catch/unit/memory/hipMallocFromPoolAsync.cc | 746 +++++++++++++++++- catch/unit/memory/hipMemPoolCreate.cc | 87 +- catch/unit/memory/hipMemPoolDestroy.cc | 11 +- catch/unit/memory/hipMemPoolSetGetAccess.cc | 373 ++++++++- .../unit/memory/hipMemPoolSetGetAttribute.cc | 494 +++++++++++- catch/unit/memory/hipMemPoolTrimTo.cc | 164 +++- catch/unit/memory/mempool_common.hh | 166 +++- 13 files changed, 2851 insertions(+), 127 deletions(-) create mode 100644 catch/unit/memory/hipDeviceGetMemPool.cc create mode 100644 catch/unit/memory/hipDeviceSetMemPool.cc diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 97a2c2a0a8..7b3d444c4d 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -9,6 +9,10 @@ ], "DisabledTests": [ #if defined COMMON + "Unit_hipMallocFromPoolAsync_MThread_MaxThresh", + "Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool", + "Unit_hipMemPoolTrimTo_Multithreaded", + "Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU", "Unit_hipStreamPerThread_DeviceReset_1", "Unit_hipDeviceGetSharedMemConfig_Positive_Basic", "Unit_hipDeviceGetSharedMemConfig_Positive_Threaded", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index a0320e1ef2..a2a12ae969 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -8,6 +8,10 @@ ], "DisabledTests": [ #if defined COMMON + "Unit_hipMallocFromPoolAsync_MThread_MaxThresh", + "Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool", + "Unit_hipMemPoolTrimTo_Multithreaded", + "Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU", "Unit_hipMalloc_CoherentTst", "Unit_hipTextureMipmapObj2D_Check", "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode", diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index db202f258d..5acb41357b 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -114,6 +114,8 @@ set(TEST_SRC hipMemcpyFromSymbol.cc hipPtrGetAttribute.cc hipMemPoolApi.cc + hipDeviceGetMemPool.cc + hipDeviceSetMemPool.cc hipMemPoolSetGetAccess.cc hipMemPoolSetGetAttribute.cc hipMemPoolCreate.cc diff --git a/catch/unit/memory/hipDeviceGetMemPool.cc b/catch/unit/memory/hipDeviceGetMemPool.cc new file mode 100644 index 0000000000..64906a7a08 --- /dev/null +++ b/catch/unit/memory/hipDeviceGetMemPool.cc @@ -0,0 +1,181 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipDeviceGetMemPool hipDeviceGetMemPool + * @{ + * @ingroup MemoryTest + * `hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, + * int device)` - + * Gets the current memory pool for the specified device. + */ + +#include "mempool_common.hh" // NOLINT + +#define THREADS_PER_BLOCK 512 +static constexpr auto NUM_ELM {1024 * 1024}; + +/** + * Common function to allocate memory using hipMallocAsync API through a stream, + * launch kernel and perform vectorADD and validate results. Free memory using + * hipFreeAsync. +*/ +static bool checkMallocAsync() { + streamMemAllocTest testObj(NUM_ELM); + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + // Create host buffer with test data. + testObj.createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on stream. + testObj.allocFromDefMempool(stream); + testObj.transferToMempool(stream); + // Execute kernel and transfer result back to host asynchronously on stream. + testObj.runKernel(stream); + testObj.transferFromMempool(stream); + // Free Buffer Asynchronously on stream. + testObj.freeDevBuf(stream); + HIP_CHECK(hipStreamSynchronize(stream)); + // verify and validate + REQUIRE(true == testObj.validateResult()); + // Destroy resources + HIP_CHECK(hipStreamDestroy(stream)); + testObj.freeHostBuf(); + return true; +} +/** + * Test Description + * ------------------------ + * - Test case to perform basic scenario, get device mem pool + * and default mem pool and validate both are same. + * ------------------------ + * - catch\unit\memory\hipDeviceGetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceGetMemPool_Basic") { + checkMempoolSupported(0) + hipMemPool_t mem_pool_device = nullptr, mem_pool_default = nullptr; + SECTION("Check current mempool is default mempool") { + // assign default mem pool to device + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, 0)); + // assign device mem pool to device + HIP_CHECK(hipDeviceGetMemPool(&mem_pool_device, 0)); + // validate both are same + REQUIRE(mem_pool_device == mem_pool_default); + } + SECTION("Allocating a mempool does not impact default mempool ctx") { + hipMemPoolProps PoolProps{}; + PoolProps.allocType = hipMemAllocationTypePinned; + PoolProps.location.id = 0; + PoolProps.location.type = hipMemLocationTypeDevice; + // assign default mem pool to device + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, 0)); + // create explicit mem pool + hipMemPool_t user_mempool; + HIP_CHECK(hipMemPoolCreate(&user_mempool, &PoolProps)); + // assign device mem pool to device + HIP_CHECK(hipDeviceGetMemPool(&mem_pool_device, 0)); + // validate both are same + REQUIRE(mem_pool_device == mem_pool_default); + HIP_CHECK(hipMemPoolDestroy(user_mempool)); + } +} + +/** + * Test Description + * ------------------------ + * - Test case to check functional scenario, Get the current mempool using + * hipDeviceGetMempool. Set attribute hipMemPoolAttrReleaseThreshold to + * UINT64_MAX. call checkMallocAsync(). + * ------------------------ + * - catch\unit\memory\hipDeviceGetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceGetMemPool_Functional") { + hipMemPool_t mem_pool = nullptr; + checkMempoolSupported(0) + // assign current mem pool to device + HIP_CHECK(hipDeviceGetMemPool(&mem_pool, 0)); + // set attribute hipMemPoolAttrReleaseThreshold as UINT64_MAX + hipMemPoolAttr attr = hipMemPoolAttrReleaseThreshold; + std::uint64_t value = UINT64_MAX; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value)); + // call checkMallocAsync() and validate + REQUIRE(true == checkMallocAsync()); +} + +/** + * Test Description + * ------------------------ + * - Test case to verify multi device, get number of devices available + * and verify device mem pool and default mem pool are same. + * ------------------------ + * - catch\unit\memory\hipDeviceGetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceGetMemPool_Multidevice") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + + for (int i = 0; i < num_devices; i++) { + checkMempoolSupported(i) + HIP_CHECK(hipSetDevice(i)); + hipMemPool_t mem_pool_device = nullptr, mem_pool_default = nullptr; + // assign default mem pool to device + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, i)); + // assign device mem pool to device + HIP_CHECK(hipDeviceGetMemPool(&mem_pool_device, i)); + // validate both are same + REQUIRE(mem_pool_device == mem_pool_default); + } +} + +/** + * Test Description + * ------------------------ + * - Test case to check functional scenario, Get the current mempool using + * hipDeviceGetDefaultMemPool. Set attribute hipMemPoolAttrReleaseThreshold + * to UINT64_MAX. call checkMallocAsync(). + * ------------------------ + * - catch\unit\memory\hipDeviceGetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceGetDefaultMemPool_Functional") { + hipMemPool_t mem_pool = nullptr; + checkMempoolSupported(0) + // assign current mem pool to device + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool, 0)); + // set attribute hipMemPoolAttrReleaseThreshold as UINT64_MAX + hipMemPoolAttr attr = hipMemPoolAttrReleaseThreshold; + std::uint64_t value = UINT64_MAX; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value)); + // call checkMallocAsync() and validate + REQUIRE(true == checkMallocAsync()); +} diff --git a/catch/unit/memory/hipDeviceSetMemPool.cc b/catch/unit/memory/hipDeviceSetMemPool.cc new file mode 100644 index 0000000000..8f4273e379 --- /dev/null +++ b/catch/unit/memory/hipDeviceSetMemPool.cc @@ -0,0 +1,195 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipDeviceSetMemPool hipDeviceSetMemPool + * @{ + * @ingroup MemoryTest + * `hipError_t hipDeviceSetMemPool(int device, + * hipMemPool_t mem_pool)` - + * Sets the current memory pool for the specified device. + */ + +#include "mempool_common.hh" // NOLINT + +#define THREADS_PER_BLOCK 512 +static constexpr auto NUM_ELM {1024 * 1024}; + +/** + * Common function to allocate memory using hipMallocAsync API through a stream, + * launch kernel and perform vectorADD and validate results. Free memory using + * hipFreeAsync. +*/ +static bool checkMallocAsync() { + streamMemAllocTest testObj(NUM_ELM); + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + // Create host buffer with test data. + testObj.createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on stream. + testObj.allocFromDefMempool(stream); + testObj.transferToMempool(stream); + // Execute kernel and transfer result back to host asynchronously on stream. + testObj.runKernel(stream); + testObj.transferFromMempool(stream); + // Free Buffer Asynchronously on stream. + testObj.freeDevBuf(stream); + HIP_CHECK(hipStreamSynchronize(stream)); + // verify and validate + REQUIRE(true == testObj.validateResult()); + // Destroy resources + HIP_CHECK(hipStreamDestroy(stream)); + testObj.freeHostBuf(); + return true; +} + +/** + * Test Description + * ------------------------ + * - Test case to verify Basic scenario, create an explicit mem pool + * and validate current pool is same as created mem pool. + * ------------------------ + * - catch\unit\memory\hipDeviceSetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceSetMemPool_Basic") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + for (int dev = 0; dev < num_devices; dev++) { + checkMempoolSupported(dev) + hipMemPool_t mem_pool_device = nullptr, curr_mem_pool = nullptr; + // create explicit mem pool + hipMemPoolProps prop{}; + prop.allocType = hipMemAllocationTypePinned; + prop.location.id = dev; + prop.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool_device, &prop)); + HIP_CHECK(hipDeviceSetMemPool(dev, mem_pool_device)); + // get current mem pool + HIP_CHECK(hipDeviceGetMemPool(&curr_mem_pool, dev)); + // validate both memory are same. + REQUIRE(curr_mem_pool == mem_pool_device); + // free mem pool + HIP_CHECK(hipMemPoolDestroy(mem_pool_device)); + } +} + +/** + * Test Description + * ------------------------ + * - Create a mempool and set it as the current mempool of the + * device. Validate that destroying the current mempool of a device + * sets the default mempool of that device as the current mempool + * for that device. + * ------------------------ + * - catch\unit\memory\hipDeviceSetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceSetMemPool_DestroyCurrentMempool") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + for (int dev = 0; dev < num_devices; dev++) { + checkMempoolSupported(dev) + HIP_CHECK(hipSetDevice(dev)); + hipMemPool_t mem_pool_device, curr_mem_pool, def_mem_pool; + hipMemPoolProps prop{}; + prop.allocType = hipMemAllocationTypePinned; + prop.location.id = dev; + prop.location.type = hipMemLocationTypeDevice; + // Create explicit mempool + HIP_CHECK(hipMemPoolCreate(&mem_pool_device, &prop)); + // Set mempool + HIP_CHECK(hipDeviceSetMemPool(dev, mem_pool_device)); + // Destroy mem pool + HIP_CHECK(hipMemPoolDestroy(mem_pool_device)); + // Get current mem pool + HIP_CHECK(hipDeviceGetMemPool(&curr_mem_pool, dev)); + // Get default mempool + HIP_CHECK(hipDeviceGetDefaultMemPool(&def_mem_pool, dev)); + // validate the mempool is the default mempool + REQUIRE(curr_mem_pool == def_mem_pool); + } +} + +/** + * Test Description + * ------------------------ + * - Create explicit memory pool on default GPU. Set this as the current mempool + * call checkMallocAsync() and destroy the mem pool. + * ------------------------ + * - catch\unit\memory\hipDeviceSetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceSetMemPool_functional") { + checkMempoolSupported(0) + hipMemPool_t mem_pool = nullptr; + // create explicit mem pool + hipMemPoolProps PoolProps{}; + PoolProps.allocType = hipMemAllocationTypePinned; + PoolProps.location.id = 0; + PoolProps.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &PoolProps)); + HIP_CHECK(hipDeviceSetMemPool(0, mem_pool)); + // call checkMallocAsync function + REQUIRE(true == checkMallocAsync()); + // destroy the mem pool. + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} + +/** + * Test Description + * ------------------------ + * - Create explicit memory pool on default GPU. Set this as the current mempool + * Set attribute hipMemPoolAttrReleaseThreshold to UINT64_MAX. call checkMallocAsync() + * and destroy the mem pool. + * ------------------------ + * - catch\unit\memory\hipDeviceSetMemPool.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipDeviceSetMemPool_functionalAttribute") { + checkMempoolSupported(0) + hipMemPool_t mem_pool = nullptr; + // create explicit mem pool + hipMemPoolProps PoolProps{}; + PoolProps.allocType = hipMemAllocationTypePinned; + PoolProps.location.id = 0; + PoolProps.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &PoolProps)); + HIP_CHECK(hipDeviceSetMemPool(0, mem_pool)); + // set attribute hipMemPoolAttrReleaseThreshold as UINT64_MAX + hipMemPoolAttr attr = hipMemPoolAttrReleaseThreshold; + std::uint64_t value = UINT64_MAX; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value)); + // call checkMallocAsync function + REQUIRE(true == checkMallocAsync()); + // destroy the mem pool. + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} diff --git a/catch/unit/memory/hipMallocAsync.cc b/catch/unit/memory/hipMallocAsync.cc index c7ff4ab1ec..39990b83d5 100644 --- a/catch/unit/memory/hipMallocAsync.cc +++ b/catch/unit/memory/hipMallocAsync.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -23,6 +23,10 @@ #pragma clang diagnostic ignored "-Wunused-parameter" +static bool thread_results[NUMBER_OF_THREADS]; +static constexpr auto NUM_ELM {1024 * 1024}; +static constexpr int streamPerAsic = 2; + /** * @addtogroup hipMallocAsync hipMallocAsync * @{ @@ -41,7 +45,7 @@ * - /unit/memory/hipMallocAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocAsync_Basic_OneAlloc") { MallocMemPoolAsync_OneAlloc( @@ -61,7 +65,7 @@ TEST_CASE("Unit_hipMallocAsync_Basic_OneAlloc") { * - /unit/memory/hipMallocAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocAsync_Basic_TwoAllocs") { MallocMemPoolAsync_TwoAllocs( @@ -80,7 +84,7 @@ TEST_CASE("Unit_hipMallocAsync_Basic_TwoAllocs") { * - /unit/memory/hipMallocAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocAsync_Basic_Reuse") { MallocMemPoolAsync_Reuse([](void** dev_ptr, size_t size, hipMemPool_t mem_pool, @@ -102,18 +106,12 @@ TEST_CASE("Unit_hipMallocAsync_Basic_Reuse") { * - /unit/memory/hipMallocAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocAsync_Negative_Parameters") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(0) int* p = nullptr; size_t max_size = std::numeric_limits::max(); @@ -137,6 +135,535 @@ TEST_CASE("Unit_hipMallocAsync_Negative_Parameters") { } } +/** + * Common function to allocate memory using hipMallocAsync API through a stream, + * launch kernel and perform vectorADD and validate results. Free memory using + * hipFreeAsync. +*/ +static bool checkMallocAsync(hipStream_t stream) { + streamMemAllocTest testObj(NUM_ELM); + // Create host buffer with test data. + testObj.createHostBufferWithData(); + // Allocate device memory. + testObj.allocFromDefMempool(stream); + // Transfer data to it asyncronously on stream. + testObj.transferToMempool(stream); + // Execute kernel and transfer result back to host asynchronously on stream. + testObj.runKernel(stream); + testObj.transferFromMempool(stream); + // Free Buffer Asynchronously on stream. + testObj.freeDevBuf(stream); + HIP_CHECK(hipStreamSynchronize(stream)); + // verify and validate + REQUIRE(true == testObj.validateResult()); + // Destroy resources + testObj.freeHostBuf(); + return true; +} +/** + * Test Description + * ------------------------ + * - Test case to perform basic scenario. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_basic") { + checkMempoolSupported(0) + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + REQUIRE(true == checkMallocAsync(stream)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - Test case to perform multi stream, allocate memory using + * hipMallocAsync API for a stream1 and stream2, launch kernel and + * perform vectorADD, synchronize stream1 and stream2 and validate + * results. Free memory using hipFreeAsync. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_Multistream_Concurrent") { + checkMempoolSupported(0) + streamMemAllocTest testObj1(NUM_ELM), testObj2(NUM_ELM); + // create multiple streams + hipStream_t stream1, stream2; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + // Create host buffer with test data. + testObj1.createHostBufferWithData(); + testObj2.createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on streams. + testObj1.allocFromDefMempool(stream1); + testObj2.allocFromDefMempool(stream2); + testObj1.transferToMempool(stream1); + testObj2.transferToMempool(stream2); + // Execute kernel and transfer result back to host asynchronously on streams. + testObj1.runKernel(stream1); + testObj2.runKernel(stream2); + testObj1.transferFromMempool(stream1); + testObj2.transferFromMempool(stream2); + // Free Buffer Asynchronously on streams. + testObj1.freeDevBuf(stream1); + testObj2.freeDevBuf(stream2); + // synchronize both stream1 and stream2 + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipStreamSynchronize(stream2)); + // verify and validate + REQUIRE(true == testObj1.validateResult()); + REQUIRE(true == testObj2.validateResult()); + // Destroy resources + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + testObj1.freeHostBuf(); + testObj2.freeHostBuf(); +} + +/** + * Test Description + * ------------------------ + * - Allocate memory using hipMallocAsync API through stream1 and record event1, + * allocate event1 to stream2 and put stream2 to wait, launch kernel through + * stream2 and perform vectorADD and validate results. Free memory using hipFreeAsync. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_StreamEvent_CrissCross") { + checkMempoolSupported(0) + streamMemAllocTest testObj1(NUM_ELM), testObj2(NUM_ELM); + // create two streams. + hipStream_t stream1, stream2; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipStreamCreate(&stream2)); + // create an event + hipEvent_t event1 = nullptr, event2 = nullptr; + HIP_CHECK(hipEventCreate(&event1)); + HIP_CHECK(hipEventCreate(&event2)); + // Create host buffer with test data. + testObj1.createHostBufferWithData(); + testObj2.createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on streams. + testObj1.allocFromDefMempool(stream1); + testObj2.allocFromDefMempool(stream2); + testObj1.transferToMempool(stream1); + testObj2.transferToMempool(stream2); + // create event record + HIP_CHECK(hipEventRecord(event1, stream1)); + HIP_CHECK(hipEventRecord(event2, stream2)); + HIP_CHECK(hipStreamWaitEvent(stream2, event1, 0)); + HIP_CHECK(hipStreamWaitEvent(stream1, event2, 0)); + // Execute kernel and transfer result back to host asynchronously on streams. + testObj1.runKernel(stream2); + testObj2.runKernel(stream1); + testObj1.transferFromMempool(stream2); + testObj2.transferFromMempool(stream1); + // Free Buffer Asynchronously on streams. + testObj1.freeDevBuf(stream2); + testObj2.freeDevBuf(stream1); + // Wait for stream2. + HIP_CHECK(hipStreamSynchronize(stream2)); + HIP_CHECK(hipStreamSynchronize(stream1)); + // verify and validate + REQUIRE(true == testObj1.validateResult()); + REQUIRE(true == testObj2.validateResult()); + // Destroy resources + HIP_CHECK(hipStreamDestroy(stream1)); + HIP_CHECK(hipStreamDestroy(stream2)); + HIP_CHECK(hipEventDestroy(event1)); + HIP_CHECK(hipEventDestroy(event2)); + testObj1.freeHostBuf(); + testObj2.freeHostBuf(); +} + +/** + * Test Description + * ------------------------ + * - Test case to perform multi device scenario, get number of devices available + * and call checkMallocAsync function for each device available. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_Multidevice") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + for (int i = 0; i < num_devices; i++) { + checkMempoolSupported(i) + HIP_CHECK(hipSetDevice(i)); + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + REQUIRE(true == checkMallocAsync(stream)); + HIP_CHECK(hipStreamDestroy(stream)); + } +} + +/** + * Test Description + * ------------------------ + * - Queue the following commands hipMallocAsync, transfer data + * to it asynchrously, launch Kernel, transfer results back to host + * asynchronously and free buffer async in streams across all GPUs. + * The execution in of the queued commands must happen concurrently. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +#if HT_AMD +static void threadQAsyncCommands(streamMemAllocTest* testObj, + hipStream_t strm) { + // Create host buffer with test data. + testObj->createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on stream. + testObj->allocFromDefMempool(strm); + testObj->transferToMempool(strm); + // Execute kernel and transfer result back to host asynchronously on stream. + testObj->runKernel(strm); + testObj->transferFromMempool(strm); + // Free Buffer Asynchronously on stream. + testObj->freeDevBuf(strm); +} + +TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + checkIfMultiDev(num_devices) + hipStream_t *stream_buf = new hipStream_t[num_devices]; + std::vector tesObjBuf; + // Allocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + checkMempoolSupported(idx) + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamCreate(&stream_buf[idx])); + streamMemAllocTest *testObj = new streamMemAllocTest(NUM_ELM); + tesObjBuf.push_back(testObj); + } + // Queue commands in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx]); + test.join(); + } + // Wait for the streams + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamSynchronize(stream_buf[idx])); + // verify and validate + REQUIRE(true == tesObjBuf[idx]->validateResult()); + } + // Deallocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + // Destroy resources + tesObjBuf[idx]->freeHostBuf(); + HIP_CHECK(hipStreamDestroy(stream_buf[idx])); + delete tesObjBuf[idx]; + } + delete[] stream_buf; +} + +/** + * Test Description + * ------------------------ + * - Queue the following commands hipMallocAsync, transfer data + * to it asynchrously, launch Kernel, transfer results back to host + * asynchronously and free buffer async in streams across all GPUs + * using multiple streams per GPU. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream") { + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + checkIfMultiDev(num_devices) + // 2 stream per ASIC + hipStream_t *stream_buf = new hipStream_t[streamPerAsic*num_devices]; + std::vector tesObjBuf; + // Allocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + checkMempoolSupported(idx) + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx])); + HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx + 1])); + streamMemAllocTest *testObj1 = new streamMemAllocTest(NUM_ELM); + tesObjBuf.push_back(testObj1); + streamMemAllocTest *testObj2 = new streamMemAllocTest(NUM_ELM); + tesObjBuf.push_back(testObj2); + } + // Queue commands in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx], + stream_buf[streamPerAsic*idx]); + std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx + 1], + stream_buf[streamPerAsic*idx + 1]); + test1.join(); + test2.join(); + } + // Wait for the streams + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx])); + HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx + 1])); + // verify and validate + REQUIRE(true == tesObjBuf[streamPerAsic*idx]->validateResult()); + REQUIRE(true == tesObjBuf[streamPerAsic*idx + 1]->validateResult()); + } + // Deallocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + // Destroy resources + tesObjBuf[streamPerAsic*idx]->freeHostBuf(); + tesObjBuf[streamPerAsic*idx + 1]->freeHostBuf(); + HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx])); + HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx + 1])); + delete tesObjBuf[streamPerAsic*idx]; + delete tesObjBuf[streamPerAsic*idx + 1]; + } + delete[] stream_buf; +} +#endif +/** + * Test Description + * ------------------------ + * - Assign device memory using hipMalloc, launch kernel and perform + * vector square and validate. Free memory using hipFreeAsync API. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_ByUsinghipMalloc") { + checkMempoolSupported(0) + size_t byte_size = NUM_ELM * sizeof(float); + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + float *A_h, *C_h; + float *A_d, *C_d; + // assign memory to host pointers + A_h = reinterpret_cast(malloc(byte_size)); + C_h = reinterpret_cast(malloc(byte_size)); + // set data to host + for (int i = 0; i < NUM_ELM; i++) { + A_h[i] = 7.0f; + C_h[i] = 0; + } + // assign memory to device pointers + HIP_CHECK(hipMalloc(reinterpret_cast(&A_d), byte_size)); + HIP_CHECK(hipMalloc(reinterpret_cast(&C_d), byte_size)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, byte_size, hipMemcpyHostToDevice, stream)); + hipLaunchKernelGGL(HipTest::vector_square, dim3(NUM_ELM / THREADS_PER_BLOCK), + dim3(THREADS_PER_BLOCK), 0, stream, + static_cast(A_d), C_d, NUM_ELM); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, byte_size, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), stream)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(C_d), stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // verify and validate + for (int i = 0; i < NUM_ELM; i++) { + REQUIRE(C_h[i] == (A_h[i] * A_h[i])); + } + HIP_CHECK(hipStreamDestroy(stream)); + free(A_h); + free(C_h); +} + +/** + * Test Description + * ------------------------ + * - Assign device memory using hipMallocAsync, launch kernel and perform + * vector square and validate. Free memory using hipFree API. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_ByUsinghipFree") { + size_t byte_size = NUM_ELM * sizeof(float); + checkMempoolSupported(0) + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + float *A_h, *C_h; + float *A_d, *C_d; + // assign memory to host pointers + A_h = reinterpret_cast(malloc(byte_size)); + C_h = reinterpret_cast(malloc(byte_size)); + // set data to host + for (int i = 0; i < NUM_ELM; i++) { + A_h[i] = 5.0f; + C_h[i] = 0; + } + // assign memory to device pointers + HIP_CHECK(hipMallocAsync(reinterpret_cast(&A_d), byte_size, stream)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&C_d), byte_size, stream)); + HIP_CHECK(hipMemcpyAsync(A_d, A_h, byte_size, hipMemcpyHostToDevice, stream)); + hipLaunchKernelGGL(HipTest::vector_square, dim3(NUM_ELM / THREADS_PER_BLOCK), + dim3(THREADS_PER_BLOCK), 0, stream, + static_cast(A_d), C_d, NUM_ELM); + HIP_CHECK(hipMemcpyAsync(C_h, C_d, byte_size, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipFree(reinterpret_cast(A_d))); + HIP_CHECK(hipFree(reinterpret_cast(C_d))); + // verify and validate + for (int i = 0; i < NUM_ELM; i++) { + REQUIRE(C_h[i] == (A_h[i] * A_h[i])); + } + HIP_CHECK(hipStreamDestroy(stream)); + free(A_h); + free(C_h); +} + +/** + * Test Description + * ------------------------ + * - Test case to check hipMallocAsync allocation and usage in multiple + * threads. Each thread will use a local stream. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +static void threadTestLocalStream(int threadNum) { + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + thread_results[threadNum] = checkMallocAsync(stream); + HIP_CHECK(hipStreamDestroy(stream)); +} + +static bool testhipMallocAsyncMThreadLocalStrm() { + std::vector tests; + // Spawn the test threads + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + thread_results[idx] = false; + tests.push_back(std::thread(threadTestLocalStream, idx)); + } + // Wait for all threads to complete + for (std::thread &t : tests) { + t.join(); + } + // Wait for thread + bool status = true; + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + status = status & thread_results[idx]; + } + return status; +} + +TEST_CASE("Unit_hipMallocAsync_MThread_ThreadLocalStream") { + checkMempoolSupported(0) + REQUIRE(true == testhipMallocAsyncMThreadLocalStrm()); +} + +/** + * Test Description + * ------------------------ + * - Test case to check hipMallocAsync allocation and usage in multiple + * threads. Threads will use a common shared stream. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +static void threadTestCommonStream(int threadNum, hipStream_t stream) { + thread_results[threadNum] = checkMallocAsync(stream); +} + +static bool testhipMallocAsyncMThreadLocalStrm(hipStream_t stream) { + std::vector tests; + // Spawn the test threads + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + thread_results[idx] = false; + tests.push_back(std::thread(threadTestCommonStream, idx, stream)); + } + // Wait for all threads to complete + for (std::thread &t : tests) { + t.join(); + } + // Wait for thread + bool status = true; + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + status = status & thread_results[idx]; + } + return status; +} + +TEST_CASE("Unit_hipMallocAsync_MThread_ThreadSharedStream") { + checkMempoolSupported(0) + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + REQUIRE(true == testhipMallocAsyncMThreadLocalStrm(stream)); + HIP_CHECK(hipStreamDestroy(stream)); +} +/** + * Test Description + * ------------------------ + * - Test case to check MallocAsync functionality on user created stream, + * null stream and hipstreamperthread concurrently. launch kernel and wait + * for all streams to complete and validate results. + * ------------------------ + * - catch\unit\memory\hipMallocAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocAsync_DefaultStreams_Concurrent") { + checkMempoolSupported(0) + streamMemAllocTest testObj[3] = {streamMemAllocTest(NUM_ELM), + streamMemAllocTest(NUM_ELM), + streamMemAllocTest(NUM_ELM)}; + // create multiple streams + hipStream_t stream[3]; + HIP_CHECK(hipStreamCreate(&stream[0])); + stream[1] = 0; // Null stream + stream[2] = hipStreamPerThread; + // Queue operations on the 3 streams + for (int idx = 0; idx < 3; idx++) { + // Create host buffer with test data. + testObj[idx].createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on stream. + testObj[idx].allocFromDefMempool(stream[idx]); + testObj[idx].transferToMempool(stream[idx]); + // Execute kernel and transfer result back to host asynchronously on stream. + testObj[idx].runKernel(stream[idx]); + testObj[idx].transferFromMempool(stream[idx]); + // Free Buffer Asynchronously on stream. + testObj[idx].freeDevBuf(stream[idx]); + } + // Wait for the 3 streams + for (int idx = 0; idx < 3; idx++) { + HIP_CHECK(hipStreamSynchronize(stream[idx])); + // verify and validate + REQUIRE(true == testObj[idx].validateResult()); + // Destroy resources + testObj[idx].freeHostBuf(); + } + HIP_CHECK(hipStreamDestroy(stream[0])); +} + /** * End doxygen group StreamOTest. * @} diff --git a/catch/unit/memory/hipMallocFromPoolAsync.cc b/catch/unit/memory/hipMallocFromPoolAsync.cc index a309df4729..d12c7391da 100644 --- a/catch/unit/memory/hipMallocFromPoolAsync.cc +++ b/catch/unit/memory/hipMallocFromPoolAsync.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -21,6 +21,10 @@ #include +static bool thread_results[NUMBER_OF_THREADS]; +static constexpr int streamPerAsic = 2; +static hipMemPool_t mem_pool_common; + /** * @addtogroup hipMallocFromPoolAsync hipMallocFromPoolAsync * @{ @@ -40,7 +44,7 @@ * - /unit/memory/hipMallocFromPoolAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_OneAlloc") { MallocMemPoolAsync_OneAlloc( @@ -61,7 +65,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_OneAlloc") { * - /unit/memory/hipMallocFromPoolAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_TwoAllocs") { MallocMemPoolAsync_TwoAllocs( @@ -80,7 +84,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_TwoAllocs") { * - /unit/memory/hipMallocFromPoolAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_Reuse") { MallocMemPoolAsync_Reuse( @@ -104,18 +108,13 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_Reuse") { * - /unit/memory/hipMallocFromPoolAsync.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMallocFromPoolAsync_Negative_Parameters") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(0); void* p = nullptr; size_t max_size = std::numeric_limits::max(); @@ -148,6 +147,731 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Negative_Parameters") { } } +/** + * Local function to test mempool allocation, usage and freeing on + * multiple user created Streams with inter Stream synchonization. + */ +static bool checkMempoolMultStreamSync(int N) { + streamMemAllocTest testObj(N); + // create multiple streams + hipStream_t streamMemCreate, streamMemAccess, streamMemDestroy; + HIP_CHECK(hipStreamCreate(&streamMemCreate)); + HIP_CHECK(hipStreamCreate(&streamMemAccess)); + HIP_CHECK(hipStreamCreate(&streamMemDestroy)); + // Create host buffer with test data + testObj.createHostBufferWithData(); + // Create mempool in current device = 0 + testObj.createMempool(hipMemPoolAttrReleaseThreshold, testdefault, 0); + hipEvent_t Event1, Event2; + HIP_CHECK(hipEventCreate(&Event1)); + HIP_CHECK(hipEventCreate(&Event2)); + // Allocate memory and initialize it on streamMemCreate + testObj.allocFromMempool(streamMemCreate); + testObj.transferToMempool(streamMemCreate); + HIP_CHECK(hipEventRecord(Event1, streamMemCreate)); + // Launch Kernel on streamMemAccess + HIP_CHECK(hipStreamWaitEvent(streamMemAccess, Event1, 0)); + testObj.runKernel(streamMemAccess); + testObj.transferFromMempool(streamMemAccess); + HIP_CHECK(hipEventRecord(Event2, streamMemAccess)); + // Launch Kernel on streamMemAccess + HIP_CHECK(hipStreamWaitEvent(streamMemDestroy, Event2, 0)); + testObj.freeDevBuf(streamMemDestroy); + HIP_CHECK(hipStreamSynchronize(streamMemDestroy)); + // Validate test result and clean all host buffers and mempool + bool results = false; + results = testObj.validateResult(); + testObj.freeMempool(); + testObj.freeHostBuf(); + HIP_CHECK(hipEventDestroy(Event2)); + HIP_CHECK(hipEventDestroy(Event1)); + HIP_CHECK(hipStreamDestroy(streamMemDestroy)); + HIP_CHECK(hipStreamDestroy(streamMemAccess)); + HIP_CHECK(hipStreamDestroy(streamMemCreate)); + return results; +} + +/** + * Local function to test mempool functionality on a user created + * stream, null stream and hipStreamPerThread concurrently. Wait + * for all the streams to complete and validate result. + */ +static bool checkMempoolMultStreamConcurrentExec(int N, + bool useDefStrm = true) { + streamMemAllocTest testObj[3] = {streamMemAllocTest(N), + streamMemAllocTest(N), + streamMemAllocTest(N)}; + // create multiple streams + hipStream_t testStreams[3]; + HIP_CHECK(hipStreamCreate(&testStreams[0])); + if (useDefStrm) { + testStreams[1] = 0; // null stream + testStreams[2] = hipStreamPerThread; + } else { + HIP_CHECK(hipStreamCreate(&testStreams[1])); + HIP_CHECK(hipStreamCreate(&testStreams[2])); + } + // Create common mempool + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool_common, &pool_props)); + bool results = true; + for (int idx = 0; idx < 3; idx++) { + // Create mempool in current device = 0 + testObj[idx].useCommonMempool(mem_pool_common); + // Create host buffer with test data + testObj[idx].createHostBufferWithData(); + // Allocate memory and initialize it on testStreams[idx] + testObj[idx].allocFromMempool(testStreams[idx]); + testObj[idx].transferToMempool(testStreams[idx]); + // Launch Kernel on testStreams[idx] + testObj[idx].runKernel(testStreams[idx]); + testObj[idx].transferFromMempool(testStreams[idx]); + testObj[idx].freeDevBuf(testStreams[idx]); + } + for (int idx = 0; idx < 3; idx++) { + HIP_CHECK(hipStreamSynchronize(testStreams[idx])); + // Validate test result and clean all host buffers and mempool + results &= testObj[idx].validateResult(); + testObj[idx].freeHostBuf(); + } + HIP_CHECK(hipStreamDestroy(testStreams[0])); + if (!useDefStrm) { + HIP_CHECK(hipStreamDestroy(testStreams[1])); + HIP_CHECK(hipStreamDestroy(testStreams[2])); + } + // Destroy common mempool + HIP_CHECK(hipMemPoolDestroy(mem_pool_common)); + return results; +} + +/** + * Local function to test hipMemPoolAttrReleaseThreshold. + */ +static bool checkMaximumAndDefaultThreshold(hipStream_t stream, int N, + enum eTestValue testtype, int dev = 0) { + streamMemAllocTest testObj(N); + // Create host buffer with test data + testObj.createHostBufferWithData(); + // Create mempool in current device = dev + testObj.createMempool(hipMemPoolAttrReleaseThreshold, testtype, dev); + bool results = true; + for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) { + // Allocate memory and initialize it on stream + testObj.allocFromMempool(stream); + testObj.transferToMempool(stream); + testObj.runKernel(stream); + testObj.transferFromMempool(stream); + // validate + testObj.freeDevBuf(stream); + HIP_CHECK(hipStreamSynchronize(stream)); + results = testObj.validateResult(); + if (!results) { + break; + } + } + testObj.freeMempool(); + testObj.freeHostBuf(); + return results; +} + +/** + * Test Description + * ------------------------ + * - Create explicit mempool1 on default GPU and set attribute + * hipMemPoolAttrReleaseThreshold to UINT64_MAX. Create another explicit + * mempool2 on default GPU with default attribute. + * LOOP for 10 times: {Allocate A_d1, B_d1, C_d1 from pool1, memcpy data to + * (A_d1, B_d1). Launch kernel to perform C_d1(x)=A_d1(x)+B_d1(x), verify + * result and free the memory.} After loop free the pool. + * LOOP for 10 times: {Allocate A_d2, B_d2, C_d2 from pool2, memcpy data to + * (A_d2, B_d2). Launch kernel to perform C_d2(x)=A_d2(x)+B_d2(x), verify + * result and free the memory.} After loop free the pool. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold") { + checkMempoolSupported(0) + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + constexpr int N = 1 << 20; + REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, + testdefault)); + REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, + testMaximum)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - Validate hipMallocFromPoolAsync functionality on null stream. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_NullStream") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkMaximumAndDefaultThreshold(0, N, + testdefault)); + REQUIRE(true == checkMaximumAndDefaultThreshold(0, N, + testMaximum)); +} + +/** + * Test Description + * ------------------------ + * - Validate hipMallocFromPoolAsync functionality on hipStreamPerThread. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_hipStreamPerThread") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkMaximumAndDefaultThreshold(hipStreamPerThread, N, + testdefault)); + REQUIRE(true == checkMaximumAndDefaultThreshold(hipStreamPerThread, N, + testMaximum)); +} + +/** + * Test Description + * ------------------------ + * - Check Release Threshold for multiple device. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu") { + constexpr int N = 1 << 20; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + HIP_CHECK(hipSetDevice(dev)); + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, + testdefault, dev)); + REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, + testMaximum, dev)); + HIP_CHECK(hipStreamDestroy(stream)); + } +} + +/** + * Local Thread Functions + */ +static void threadQAsyncCommands(streamMemAllocTest* testObj, + hipStream_t strm) { + // Create host buffer with test data. + testObj->createHostBufferWithData(); + // Allocate device memory and transfer data to it asyncronously on stream. + testObj->allocFromMempool(strm); + testObj->transferToMempool(strm); + // Execute kernel and transfer result back to host asynchronously on stream. + testObj->runKernel(strm); + testObj->transferFromMempool(strm); + // Free Buffer Asynchronously on stream. + testObj->freeDevBuf(strm); +} + +static void thread_Test1(hipStream_t stream, int N, + enum eTestValue testtype, int threadNum) { + thread_results[threadNum] = + checkMaximumAndDefaultThreshold(stream, N, testtype, 0); +} + +static bool test_hipMallocFromPoolAsync_MThread( + enum eTestValue testtype) { + // create a stream + constexpr int N = 1 << 20; + std::vector tests; + hipStream_t stream[NUMBER_OF_THREADS]; + // Initialize and create streams + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + thread_results[idx] = false; + HIP_CHECK(hipStreamCreate(&stream[idx])); + } + // Spawn the test threads + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + tests.push_back(std::thread(thread_Test1, stream[idx], + N, testtype, idx)); + } + // Wait for all threads to complete + for (std::thread &t : tests) { + t.join(); + } + // Wait for thread and destroy stream + bool status = true; + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + status = status & thread_results[idx]; + HIP_CHECK(hipStreamDestroy(stream[idx])); + } + return status; +} + +static void thread_Test2(hipMemPool_t mempool, hipStream_t stream, + int N, int threadNum) { + streamMemAllocTest testObj(N); + // Create host buffer with test data + testObj.createHostBufferWithData(); + // Use the common mempool + testObj.useCommonMempool(mempool); + bool results = true; + for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) { + // Allocate memory and initialize it on stream + testObj.allocFromMempool(stream); + testObj.transferToMempool(stream); + testObj.runKernel(stream); + testObj.transferFromMempool(stream); + testObj.freeDevBuf(stream); + // verify and validate + HIP_CHECK(hipStreamSynchronize(stream)); + results = testObj.validateResult(); + if (!results) { + break; + } + } + testObj.freeHostBuf(); + thread_results[threadNum] = results; +} + +static bool test_hipMallocFromPoolAsync_MThread_CommonMpool( + enum eTestValue testtype, bool bUseDefault = false) { + // create a stream + constexpr int N = 1 << 20; + std::vector tests; + hipStream_t stream[NUMBER_OF_THREADS]; + // Create common mempool + if (bUseDefault) { + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_common, 0)); + } else { + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool_common, &pool_props)); + } + if (testtype == testMaximum) { + uint64_t setThreshold = UINT64_MAX; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool_common, + hipMemPoolAttrReleaseThreshold, &setThreshold)); + } + // Initialize and create streams + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + thread_results[idx] = false; + HIP_CHECK(hipStreamCreate(&stream[idx])); + } + // Spawn the test threads + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + tests.push_back(std::thread(thread_Test2, mem_pool_common, + stream[idx], N, idx)); + } + // Wait for all threads to complete + for (std::thread &t : tests) { + t.join(); + } + // Wait for thread and destroy stream + bool status = true; + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + status = status & thread_results[idx]; + HIP_CHECK(hipStreamDestroy(stream[idx])); + } + // Destroy common mempool + if (!bUseDefault) { + HIP_CHECK(hipMemPoolDestroy(mem_pool_common)); + } + return status; +} + +/** + * Local function to test hipMemPoolReuseFollowEventDependencies. + */ +static bool checkReuseFollowEventDepFlag(int N, enum eTestValue testtype) { + streamMemAllocTest testObj(N); + // Create host buffer with test data + testObj.createHostBufferWithData(); + // Create mempool in current device = 0 + testObj.createMempool(hipMemPoolReuseFollowEventDependencies, + testtype, 0); + hipStream_t testStream1, testStream2; + HIP_CHECK(hipStreamCreate(&testStream1)); + HIP_CHECK(hipStreamCreate(&testStream2)); + bool results = true; + for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) { + hipEvent_t Event1; + HIP_CHECK(hipEventCreate(&Event1)); + // Allocate memory and initialize it on testStream1 + testObj.allocFromMempool(testStream1); + testObj.transferToMempool(testStream1); + testObj.runKernel(testStream1); + testObj.transferFromMempool(testStream1); + testObj.freeDevBuf(testStream1); + HIP_CHECK(hipEventRecord(Event1, testStream1)); + HIP_CHECK(hipStreamWaitEvent(testStream2, Event1, 0)); + // Allocate memory and initialize it on testStream2 + testObj.allocFromMempool(testStream2); + testObj.transferToMempool(testStream2); + testObj.runKernel(testStream2); + testObj.transferFromMempool(testStream2); + testObj.freeDevBuf(testStream2); + // validate + HIP_CHECK(hipStreamSynchronize(testStream2)); + HIP_CHECK(hipEventDestroy(Event1)); + results = testObj.validateResult(); + if (!results) { + break; + } + } + testObj.freeMempool(); + testObj.freeHostBuf(); + HIP_CHECK(hipStreamDestroy(testStream2)); + HIP_CHECK(hipStreamDestroy(testStream1)); + return results; +} + +/** + * Local function to test hipMemPoolReuseAllowOpportunistic and + * hipMemPoolReuseAllowInternalDependencies. + */ +static bool checkReuseAllowOtherFlags(int N, hipMemPoolAttr attr, + enum eTestValue testtype) { + streamMemAllocTest testObj(N); + // Create host buffer with test data + testObj.createHostBufferWithData(); + // Create mempool in current device = 0 + testObj.createMempool(attr, testtype, 0); + hipStream_t testStream1, testStream2; + HIP_CHECK(hipStreamCreate(&testStream1)); + HIP_CHECK(hipStreamCreate(&testStream2)); + bool results = true; + for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) { + // Allocate memory and initialize it on testStream1 + testObj.allocFromMempool(testStream1); + testObj.transferToMempool(testStream1); + testObj.runKernel(testStream1); + testObj.transferFromMempool(testStream1); + testObj.freeDevBuf(testStream1); + // Allocate memory and initialize it on testStream2 + testObj.allocFromMempool(testStream2); + testObj.transferToMempool(testStream2); + testObj.runKernel(testStream2); + testObj.transferFromMempool(testStream2); + testObj.freeDevBuf(testStream2); + // validate + HIP_CHECK(hipStreamSynchronize(testStream2)); + results = testObj.validateResult(); + if (!results) { + break; + } + } + testObj.freeMempool(); + testObj.freeHostBuf(); + HIP_CHECK(hipStreamDestroy(testStream2)); + HIP_CHECK(hipStreamDestroy(testStream1)); + return results; +} + +/** + * Test Description + * ------------------------ + * - Queue the following commands hipMallocFromPoolAsync, transfer data to it + * asynchrously, launch Kernel, transfer results back to host asynchronously and + * free buffer async in streams across all GPUs. The execution in of the queued + * commands must happen concurrently. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +#if HT_AMD +TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") { + auto testType = GENERATE(testdefault, testMaximum); + constexpr int N = 1 << 20; + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + checkIfMultiDev(num_devices) + hipStream_t *stream_buf = new hipStream_t[num_devices]; + std::vector tesObjBuf; + // Allocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + checkMempoolSupported(idx) + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamCreate(&stream_buf[idx])); + streamMemAllocTest *testObj = new streamMemAllocTest(N); + testObj->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx); + tesObjBuf.push_back(testObj); + } + // Queue commands in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx]); + test.join(); + } + // Wait for the streams + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamSynchronize(stream_buf[idx])); + // verify and validate + REQUIRE(true == tesObjBuf[idx]->validateResult()); + } + // Deallocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + // Destroy resources + tesObjBuf[idx]->freeMempool(); + tesObjBuf[idx]->freeHostBuf(); + HIP_CHECK(hipStreamDestroy(stream_buf[idx])); + delete tesObjBuf[idx]; + } + delete[] stream_buf; +} + +/** + * Test Description + * ------------------------ + * - Queue the following commands hipMallocFromPoolAsync, transfer data to it + * asynchrously, launch Kernel, transfer results back to host asynchronously and + * free buffer async in streams across all GPUs using multiple streams per GPU. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream") { + int num_devices; + auto testType = GENERATE(testdefault, testMaximum); + constexpr int N = 1 << 20; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + checkIfMultiDev(num_devices) + // 2 stream per ASIC + hipStream_t *stream_buf = new hipStream_t[streamPerAsic*num_devices]; + std::vector tesObjBuf; + // Allocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + checkMempoolSupported(idx) + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx])); + HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx + 1])); + streamMemAllocTest *testObj1 = new streamMemAllocTest(N); + testObj1->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx); + tesObjBuf.push_back(testObj1); + streamMemAllocTest *testObj2 = new streamMemAllocTest(N); + testObj2->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx); + tesObjBuf.push_back(testObj2); + } + // Queue commands in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx], + stream_buf[streamPerAsic*idx]); + std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx + 1], + stream_buf[streamPerAsic*idx + 1]); + test1.join(); + test2.join(); + } + // Wait for the streams + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx])); + HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx + 1])); + // verify and validate + REQUIRE(true == tesObjBuf[streamPerAsic*idx]->validateResult()); + REQUIRE(true == tesObjBuf[streamPerAsic*idx + 1]->validateResult()); + } + // Deallocate resources in each device + for (int idx = 0; idx < num_devices; idx++) { + HIP_CHECK(hipSetDevice(idx)); + // Destroy resources + tesObjBuf[streamPerAsic*idx]->freeMempool(); + tesObjBuf[streamPerAsic*idx]->freeHostBuf(); + tesObjBuf[streamPerAsic*idx + 1]->freeMempool(); + tesObjBuf[streamPerAsic*idx + 1]->freeHostBuf(); + HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx])); + HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx + 1])); + delete tesObjBuf[streamPerAsic*idx]; + delete tesObjBuf[streamPerAsic*idx + 1]; + } + delete[] stream_buf; +} +#endif +/** + * Test Description + * ------------------------ + * - Validate memory pool creation, allocation of memory from the + * memory pool and usage in multithreaded environment. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_DefaultThresh") { + checkMempoolSupported(0) + REQUIRE(true == test_hipMallocFromPoolAsync_MThread(testdefault)); +} + +TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_MaxThresh") { + checkMempoolSupported(0) + REQUIRE(true == test_hipMallocFromPoolAsync_MThread(testMaximum)); +} + +/** + * Test Description + * ------------------------ + * - Validate memory pool creation in main thread and its usage - + * device memory allocation, data transfer to and from device and + * kernel launch from multiple threads. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool") { + checkMempoolSupported(0) + REQUIRE(true == test_hipMallocFromPoolAsync_MThread_CommonMpool( + testdefault, true)); +} + +TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_CommonMpool_MaxThresh") { + checkMempoolSupported(0) + REQUIRE(true == test_hipMallocFromPoolAsync_MThread_CommonMpool( + testMaximum, false)); +} + +/** + * Test Description + * ------------------------ + * - Multiple stream scenario: Create explicit memory pool. Create 3 streams. + * Allocate device memory and initialize on 1st stream, Invoke kernel to + * perform operation on 2nd stream and Free the device memory on 3rd stream. + * Synchronize between stream1, stream2 and stream3 using events. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_Sync") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkMempoolMultStreamSync(N)); +} + +/** + * Test Description + * ------------------------ + * - Multiple stream concurrent execution scenario: Create common memory pool. + * Execute mempool functionality on a user created stream, null stream and + * hipStreamPerThread concurrently. Wait for all the streams to complete and + * validate result. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_DefaultStreams") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkMempoolMultStreamConcurrentExec(N, true)); +} + +/** + * Test Description + * ------------------------ + * - Multiple stream concurrent execution scenario: Create common memory pool. + * Execute mempool functionality on multiple user created streams concurrently. + * Wait for all the streams to complete and validate result. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_UserStreams") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkMempoolMultStreamConcurrentExec(N, false)); +} + +/** + * Test Description + * ------------------------ + * - Test to validate mempool functionality when enabling and disabling + * hipMemPoolReuseFollowEventDependencies attribute. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseFollowEventDependencies") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkReuseFollowEventDepFlag(N, testDisabled)); + REQUIRE(true == checkReuseFollowEventDepFlag(N, testEnabled)); +} + +/** + * Test Description + * ------------------------ + * - Test to validate mempool functionality when enabling and disabling + * hipMemPoolReuseAllowOpportunistic attribute. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseAllowOpportunistic") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkReuseAllowOtherFlags(N, + hipMemPoolReuseAllowOpportunistic, testDisabled)); + REQUIRE(true == checkReuseAllowOtherFlags(N, + hipMemPoolReuseAllowOpportunistic, testEnabled)); +} + +/** + * Test Description + * ------------------------ + * - Test to validate mempool functionality when enabling and disabling + * hipMemPoolReuseAllowInternalDependencies attribute. + * ------------------------ + * - catch\unit\memory\hipMallocFromPoolAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseAllowInternalDependencies") { + checkMempoolSupported(0) + constexpr int N = 1 << 20; + REQUIRE(true == checkReuseAllowOtherFlags(N, + hipMemPoolReuseAllowInternalDependencies, testDisabled)); + REQUIRE(true == checkReuseAllowOtherFlags(N, + hipMemPoolReuseAllowInternalDependencies, testEnabled)); +} + /** * End doxygen group StreamOTest. * @} diff --git a/catch/unit/memory/hipMemPoolCreate.cc b/catch/unit/memory/hipMemPoolCreate.cc index 2c4e5c7708..8f50c161e2 100644 --- a/catch/unit/memory/hipMemPoolCreate.cc +++ b/catch/unit/memory/hipMemPoolCreate.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -18,6 +18,7 @@ */ #include +#include "mempool_common.hh" /** * @addtogroup hipMemPoolCreate hipMemPoolCreate @@ -42,15 +43,10 @@ * - /unit/memory/hipMemPoolCreate.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolCreate_Negative_Parameter") { - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(0) int num_dev = 0; HIP_CHECK(hipGetDeviceCount(&num_dev)); @@ -93,12 +89,7 @@ TEST_CASE("Unit_hipMemPoolCreate_Negative_Parameter") { } TEST_CASE("Unit_hipMemPoolCreate_With_maxSize") { - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(0) hipMemPoolProps pool_props; memset(&pool_props, 0, sizeof(pool_props)); pool_props.allocType = hipMemAllocationTypePinned; @@ -126,12 +117,7 @@ TEST_CASE("Unit_hipMemPoolCreate_With_maxSize") { } TEST_CASE("Unit_hipMemPoolCreate_Without_maxSize") { - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(0) hipMemPoolProps pool_props; memset(&pool_props, 0, sizeof(pool_props)); pool_props.allocType = hipMemAllocationTypePinned; @@ -151,6 +137,67 @@ TEST_CASE("Unit_hipMemPoolCreate_Without_maxSize") { HIP_CHECK(hipStreamDestroy(stream)); } +static __global__ void setKer(int *devptr) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + devptr[tid] = tid; +} +/** + * Test Description + * ------------------------ + * - hipMemPoolCreate functionality tests + * Create mempool for current device and other devices, if they exist, and + * destroy them. + * ------------------------ + * - catch\unit\memory\hipMemPoolCreate.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolCreate_DeviceTest") { + checkMempoolSupported(0) + int num_devices = 0; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + checkIfMultiDev(num_devices) + // Scenario1 + SECTION("Simple Device Test") { + for (int dev = 0; dev < num_devices; dev++) { + hipMemPool_t mem_pool; + hipMemPoolProps prop{}; + prop.allocType = hipMemAllocationTypePinned; + prop.location.id = dev; + prop.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &prop)); + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + } + } + // Scenario2 + SECTION("Accessibility Test") { + // Allocate a memory pool in current device + constexpr int N = 1 << 12; + constexpr int numThreadsPerBlk = 64; + hipMemPool_t mem_pool; + hipMemPoolProps prop{}; + prop.allocType = hipMemAllocationTypePinned; + prop.location.id = 0; + prop.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &prop)); + // Try allocating from mempool in other device context + for (int dev = 1; dev < num_devices; dev++) { + int *A_d; + HIP_CHECK(hipSetDevice(dev)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d), + N*sizeof(int), mem_pool, 0)); + HIP_CHECK(hipStreamSynchronize(0)); + HIP_CHECK(hipSetDevice(0)); + // Launch kernel to access A_d and free it on dev 0 context + setKer<<>>(A_d); + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), 0)); + HIP_CHECK(hipStreamSynchronize(0)); + } + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + } +} + /** * End doxygen group StreamOTest. * @} diff --git a/catch/unit/memory/hipMemPoolDestroy.cc b/catch/unit/memory/hipMemPoolDestroy.cc index 121e01534a..695120bde4 100644 --- a/catch/unit/memory/hipMemPoolDestroy.cc +++ b/catch/unit/memory/hipMemPoolDestroy.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -40,15 +40,10 @@ * - /unit/memory/hipMemPoolDestroy.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolDestroy_Negative_Parameter") { - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(0) hipMemPool_t mem_pool = nullptr; diff --git a/catch/unit/memory/hipMemPoolSetGetAccess.cc b/catch/unit/memory/hipMemPoolSetGetAccess.cc index 1727b5b8f6..116c297a6b 100644 --- a/catch/unit/memory/hipMemPoolSetGetAccess.cc +++ b/catch/unit/memory/hipMemPoolSetGetAccess.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,9 +17,10 @@ THE SOFTWARE. */ -#include +#include +#include #include -#include +#include "mempool_common.hh" /** * @addtogroup hipMemPoolSetAccess hipMemPoolSetAccess @@ -63,18 +64,12 @@ static void MemPoolSetGetAccess(const MemPools mempool_type, int src_device, int * - /unit/memory/hipMemPoolSetGetAccess.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_Basic") { const auto device = GENERATE(range(0, HipTest::getDeviceCount())); - int mem_pool_support = 0; - HIP_CHECK( - hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, device)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(device) const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created); @@ -101,7 +96,7 @@ int CheckP2PMemPoolSupport(int src_device, int dst_device) { * - /unit/memory/hipMemPoolSetGetAccess.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU") { const auto device_count = HipTest::getDeviceCount(); @@ -115,13 +110,12 @@ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU") { int mem_pool_support = CheckP2PMemPoolSupport(src_device, dst_device); if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + HipTest::HIP_SKIP_TEST("Runtime doesn't support Memory Pool. Skip the test case."); return; } const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created); - const auto access_flag = GENERATE(hipMemAccessFlagsProtNone, hipMemAccessFlagsProtRead, - hipMemAccessFlagsProtReadWrite); + const auto access_flag = hipMemAccessFlagsProtReadWrite; int can_access_peer = 0; HIP_CHECK(hipSetDevice(src_device)); @@ -140,7 +134,7 @@ void MemPoolSetGetAccess_P2P(const MemPools mempool_type) { int mem_pool_support = CheckP2PMemPoolSupport(src_device, dst_device); if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + HipTest::HIP_SKIP_TEST("Runtime doesn't support Memory Pool. Skip the test case."); return; } @@ -216,7 +210,7 @@ void MemPoolSetGetAccess_P2P(const MemPools mempool_type) { * - /unit/memory/hipMemPoolSetGetAccess.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P") { const auto device_count = HipTest::getDeviceCount(); @@ -246,13 +240,13 @@ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P") { * - /unit/memory/hipMemPoolSetGetAccess.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetAccess_Negative_Parameters") { CHECK_IMAGE_SUPPORT int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - + checkMempoolSupported(device_id) MemPoolGuard mempool(MemPools::dev_default, device_id); int num_dev = 0; @@ -296,6 +290,217 @@ TEST_CASE("Unit_hipMemPoolSetAccess_Negative_Parameters") { } } +/** + * Local function to test hipMemPoolSetAccess function. + */ +static bool checkMempoolSetAccess(int N, int dev0, int dev1) { + // Set the current device context to dev0 + HIP_CHECK(hipSetDevice(dev0)); + // Create mempool in current device + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + + int *A_h, *B_h, *C_h; + size_t byte_size = N*sizeof(int); + // assign memory to host pointers + A_h = reinterpret_cast(malloc(byte_size)); + REQUIRE(A_h != nullptr); + B_h = reinterpret_cast(malloc(byte_size)); + REQUIRE(B_h != nullptr); + C_h = reinterpret_cast(malloc(byte_size)); + REQUIRE(C_h != nullptr); + // set data to host + for (int i = 0; i < N; i++) { + A_h[i] = 2*i + 1; // Odd + B_h[i] = 2*i; // Even + C_h[i] = 0; + } + // create multiple streams + hipStream_t stream0; + HIP_CHECK(hipStreamCreate(&stream0)); + int *A_d0, *B_d0; + // Allocate memory on dev0 and initialize it on stream0 + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d0), + byte_size, mem_pool, stream0)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&B_d0), + byte_size, mem_pool, stream0)); + HIP_CHECK(hipMemcpyAsync(A_d0, A_h, byte_size, hipMemcpyHostToDevice, + stream0)); + HIP_CHECK(hipMemcpyAsync(B_d0, B_h, byte_size, hipMemcpyHostToDevice, + stream0)); + HIP_CHECK(hipStreamSynchronize(stream0)); + HIP_CHECK(hipStreamDestroy(stream0)); + // Set the current device context to dev1 + HIP_CHECK(hipSetDevice(dev1)); + // if withSetAccess is true set the access of mem_pool + // to both dev0 and dev1. + hipMemAccessDesc accessDesc; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = dev1; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(mem_pool, &accessDesc, 1)); + + int *A_d1, *B_d1, *C_d1; + hipStream_t stream1; + HIP_CHECK(hipStreamCreate(&stream1)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d1), + byte_size, mem_pool, stream1)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&B_d1), + byte_size, mem_pool, stream1)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&C_d1), + byte_size, mem_pool, stream1)); + HIP_CHECK(hipMemcpyAsync(A_d1, A_d0, byte_size, + hipMemcpyDeviceToDevice, stream1)); + HIP_CHECK(hipMemcpyAsync(B_d1, B_d0, byte_size, + hipMemcpyDeviceToDevice, stream1)); + // Launch Kernel on stream1 + hipLaunchKernelGGL(HipTest::vectorADD, dim3(N / THREADS_PER_BLOCK), + dim3(THREADS_PER_BLOCK), 0, stream1, + static_cast(A_d1), + static_cast(B_d1), C_d1, N); + HIP_CHECK(hipMemcpyAsync(C_h, C_d1, byte_size, hipMemcpyDeviceToHost, + stream1)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d1), stream1)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(B_d1), stream1)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(C_d1), stream1)); + HIP_CHECK(hipStreamSynchronize(stream1)); + HIP_CHECK(hipStreamDestroy(stream1)); + // Set the current device context back to dev0 + HIP_CHECK(hipSetDevice(dev0)); + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + // verify and validate + for (int i = 0; i < N; i++) { + REQUIRE(C_h[i] == (A_h[i] + B_h[i])); + } + free(A_h); + free(B_h); + free(C_h); + return true; +} + +/** + * Local function to get pairs of devices. + */ +static void getDevicePairs(std::vector > *p2p_pairs, + int numDevices) { + for (int i = 0; i < (numDevices - 1); i++) { + for (int j = i + 1; j < numDevices; j++) { + std::pair p2p_pair = std::make_pair(i, j); + p2p_pairs->push_back(p2p_pair); + } + } +} + +/** + * Test Description + * ------------------------ + * - P2P Access Scenario for mempool: Precondition: NUM OF GPUs >= 2 + * and P2P is enabled. Create explicit memory pool (mempool) on default GPU. + * Allocate memory on device 0 and initialize it with data. Set current GPU + * to device 1. Set the access of mempool to device 1. Allocate memory on + * device 1 and transfer data from device 0 to device 1. Launch kernel to + * perform vector add on the data. Validate the data. Destroy the mempool. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolSetAccess_SetAccess") { + constexpr int N = 1 << 14; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + checkIfMultiDev(numDevices) + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + } + std::vector > p2p_pairs; + getDevicePairs(&p2p_pairs, numDevices); + for (auto pair : p2p_pairs) { + int canAccessPeer = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, + pair.first, pair.second)); + if (canAccessPeer) { + REQUIRE(true == checkMempoolSetAccess(N, pair.first, + pair.second)); + } else { + WARN("P2P access not enabled between " << pair.first << + " and " << pair.second << " ."); + } + } +} + +/** + * Test Description + * ------------------------ + * - hipMemPoolSetAccess negative tests + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolSetAccess_NegTst") { + checkMempoolSupported(0) + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + constexpr size_t count = 1; + hipMemAccessDesc descList, descListNeg; + descList.flags = hipMemAccessFlagsProtReadWrite; + descList.location.type = hipMemLocationTypeDevice; + descList.location.id = 0; + // Scenario1 + SECTION("memPool NULL check") { + REQUIRE(hipMemPoolSetAccess(nullptr, &descList, count) == + hipErrorInvalidValue); + } + // Scenario2 + SECTION("Invalid Flag") { + descListNeg.flags = static_cast(0xffff); + descListNeg.location.type = hipMemLocationTypeDevice; + descListNeg.location.id = 0; + REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) == + hipErrorInvalidValue); + } + // Scenario3 +#if HT_AMD + SECTION("Invalid location type") { + descListNeg.flags = hipMemAccessFlagsProtReadWrite; + descListNeg.location.type = hipMemLocationTypeInvalid; + descListNeg.location.id = 0; + REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) == + hipErrorInvalidValue); + } +#endif + // Scenario4 + SECTION("Invalid device number") { + descListNeg.flags = hipMemAccessFlagsProtReadWrite; + descListNeg.location.type = hipMemLocationTypeDevice; + descListNeg.location.id = -1; + REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) == + hipErrorInvalidDevice); + } + // Scenario5 + SECTION("Unavailable device number") { + int num_devices = 0; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + descListNeg.flags = hipMemAccessFlagsProtReadWrite; + descListNeg.location.type = hipMemLocationTypeDevice; + descListNeg.location.id = num_devices; + REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) == + hipErrorInvalidDevice); + } + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} + /** * End doxygen group hipMemPoolSetAccess. * @} @@ -323,12 +528,12 @@ TEST_CASE("Unit_hipMemPoolSetAccess_Negative_Parameters") { * - /unit/memory/hipMemPoolSetGetAccess.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolGetAccess_Negative_Parameters") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - + checkMempoolSupported(device_id) MemPoolGuard mempool(MemPools::dev_default, device_id); int num_dev = 0; @@ -360,3 +565,129 @@ TEST_CASE("Unit_hipMemPoolGetAccess_Negative_Parameters") { location.id = device_id; } } + +/** + * Local function to test hipMemPoolSetAccess/hipMemPoolGetAccess + * function. + */ +static bool checkMempoolSetAccessWithGet(int dev0, int dev1) { + // Create mempool in current device + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + // Set access to dev1 + hipMemAccessDesc accessDesc; + accessDesc.location.type = hipMemLocationTypeDevice; + accessDesc.location.id = dev1; + accessDesc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(mem_pool, &accessDesc, 1)); + // Validate access for dev1 + hipMemAccessFlags flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = dev1; + HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + // Validate access for dev0 + location.id = dev0; + HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + return true; +} + +static bool checkMempoolSetAccessWithGetUsingArray(int dev0, int dev1) { + // Create mempool in current device + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + // Set access of dev0 and dev1 + hipMemAccessDesc accessDesc[2]; + accessDesc[0].location.type = hipMemLocationTypeDevice; + accessDesc[0].location.id = dev0; + accessDesc[0].flags = hipMemAccessFlagsProtReadWrite; + accessDesc[1].location.type = hipMemLocationTypeDevice; + accessDesc[1].location.id = dev1; + accessDesc[1].flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(mem_pool, accessDesc, 2)); + // Validate access for dev0 and dev1 + hipMemAccessFlags flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = dev0; + HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + location.id = dev1; + HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + return true; +} + +/** + * Test Description + * ------------------------ + * - Validate hipMemPoolSetAccess with hipMemPoolGetAccess for all + * devices on the system. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAccess_SetGet") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + checkIfMultiDev(numDevices) + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + } + std::vector > p2p_pairs; + getDevicePairs(&p2p_pairs, numDevices); + for (auto pair : p2p_pairs) { + int canAccessPeer = 0; + HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer, + pair.first, pair.second)); + if (canAccessPeer) { + REQUIRE(true == checkMempoolSetAccessWithGet(pair.first, + pair.second)); + REQUIRE(true == checkMempoolSetAccessWithGetUsingArray(pair.first, + pair.second)); + } else { + WARN("P2P access not enabled between " << pair.first << + " and " << pair.second << " ."); + } + } +} + +/** + * Test Description + * ------------------------ + * - Get the access of the default mempool of each device and verify + * its value. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAccess_GetDefMempoolOfEachDevice") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + hipMemAccessFlags flags; + hipMemLocation location; + hipMemPool_t mem_pool; + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool, dev)); + location.id = dev; + location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location)); + REQUIRE(flags == hipMemAccessFlagsProtReadWrite); + } +} diff --git a/catch/unit/memory/hipMemPoolSetGetAttribute.cc b/catch/unit/memory/hipMemPoolSetGetAttribute.cc index 6e0781afce..32b596e19d 100644 --- a/catch/unit/memory/hipMemPoolSetGetAttribute.cc +++ b/catch/unit/memory/hipMemPoolSetGetAttribute.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,7 +17,6 @@ THE SOFTWARE. */ #include "mempool_common.hh" - #include #include @@ -48,18 +47,12 @@ static void MemPoolSetGetAttribute(const hipMemPool_t mempool, const hipMemPoolA * - /unit/memory/hipMemPoolSetGetAttribute.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_Default") { const auto device = GENERATE(range(0, HipTest::getDeviceCount())); - int mem_pool_support = 0; - HIP_CHECK( - hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, device)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(device) const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created); MemPoolGuard mempool(mempool_type, device); @@ -87,18 +80,12 @@ TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_Default") { * - /unit/memory/hipMemPoolSetGetAttribute.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_MemBasic") { const auto device = GENERATE(range(0, HipTest::getDeviceCount())); - int mem_pool_support = 0; - HIP_CHECK( - hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, device)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(device) const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created); MemPoolGuard mempool(mempool_type, device); @@ -128,18 +115,13 @@ TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_MemBasic") { * - /unit/memory/hipMemPoolSetGetAttribute.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetAttribute_Opportunistic") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(device_id) MemPoolGuard mempool(MemPools::created, device_id); @@ -349,18 +331,13 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_Opportunistic") { * - /unit/memory/hipMemPoolSetGetAttribute.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetAttribute_EventDependencies") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(device_id) MemPoolGuard mempool(MemPools::created, device_id); @@ -494,12 +471,12 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_EventDependencies") { * - /unit/memory/hipMemPoolSetGetAttribute.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolSetAttribute_Negative_Parameters") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - + checkMempoolSupported(device_id) MemPoolGuard mempool(MemPools::dev_default, device_id); hipMemPoolAttr attr = hipMemPoolReuseFollowEventDependencies; @@ -536,6 +513,62 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_Negative_Parameters") { } } +/** + * Local function to reset hipMemPoolAttrReservedMemHigh and hipMemPoolAttrUsedMemHigh. + */ +static void resetHighValue(hipMemPool_t &memPool) { + uint64_t value = 0; + HIP_CHECK(hipMemPoolSetAttribute(memPool, hipMemPoolAttrReservedMemHigh, + &value)); + HIP_CHECK(hipMemPoolSetAttribute(memPool, hipMemPoolAttrUsedMemHigh, + &value)); +} + +/** + * Test Description + * ------------------------ + * - Reset hipMemPoolAttrReservedMemHigh and hipMemPoolAttrUsedMemHigh values + * and validate their values. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolSetAttribute_ResetMemHighAttr") { + checkMempoolSupported(0) + // Create mempool + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + constexpr int N = 1 << 14; + size_t byte_size = (N * sizeof(int)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + // Reset High Attributes + resetHighValue(mem_pool); + + // Allocate from mempool + int *A_d; + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d), + byte_size, mem_pool, 0)); + // Deallocate + HIP_CHECK(hipFreeAsync(A_d, 0)); + HIP_CHECK(hipStreamSynchronize(0)); + // Reset High Attributes + resetHighValue(mem_pool); + // Validate usage statistics + uint64_t valueReservedHighAfterReset = 0, valueUsedHighAfterReset = 0; + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, hipMemPoolAttrReservedMemHigh, + &valueReservedHighAfterReset)); + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, hipMemPoolAttrUsedMemHigh, + &valueUsedHighAfterReset)); + REQUIRE(valueReservedHighAfterReset == 0); + REQUIRE(valueUsedHighAfterReset == 0); + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} + /** * End doxygen group hipMemPoolSetAttribute. * @} @@ -562,12 +595,12 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_Negative_Parameters") { * - /unit/memory/hipMemPoolSetGetAttribute.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolGetAttribute_Negative_Parameters") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - + checkMempoolSupported(device_id) MemPoolGuard mempool(MemPools::dev_default, device_id); @@ -588,3 +621,392 @@ TEST_CASE("Unit_hipMemPoolGetAttribute_Negative_Parameters") { HIP_CHECK_ERROR(hipMemPoolGetAttribute(mempool.mempool(), attr, nullptr), hipErrorInvalidValue); } } + +constexpr int iterations = 20; +static int reservedHighExp = 0; +static int usedHighExp = 0; + +struct mempoolUsgStat { + uint64_t reservedMem; + uint64_t reservedMemHigh; + uint64_t usedMem; + uint64_t usedMemHigh; +}; + +/** + * Local function to fetch usage statistics. + */ +static void getUsageStatistics(hipMemPool_t &memPool, struct mempoolUsgStat *stat) { + HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrReservedMemCurrent, + &(stat->reservedMem))); + HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrReservedMemHigh, + &(stat->reservedMemHigh))); + HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrUsedMemCurrent, + &(stat->usedMem))); + HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrUsedMemHigh, + &(stat->usedMemHigh))); +} + +/** + * Local function to get default mempool attribute values. + */ +static bool checkDefaultAttributeValues(hipMemPoolAttr attr, int dev) { + // Create mempool in current device + uint64_t ui64_setValue = 0; + int i32_setValue = 0; + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + if (attr == hipMemPoolAttrReleaseThreshold) { + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &ui64_setValue)); + REQUIRE(ui64_setValue == 0); + } else { + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &i32_setValue)); + REQUIRE(i32_setValue == 1); + } + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + return true; +} + +/** + * Local function to set mempool attribute values and validate + * by getting the values. + */ +static bool checkhipMemPoolSetAttribute(hipMemPoolAttr attr, int dev) { + // Create mempool in current device + uint64_t ui64_setValue = 0; + int i32_setValue = 0; + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + if (attr == hipMemPoolAttrReleaseThreshold) { + uint64_t val = UINT64_MAX; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &val)); + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &ui64_setValue)); + REQUIRE(ui64_setValue == val); + } else { + int val = 0; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &val)); + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &i32_setValue)); + REQUIRE(i32_setValue == val); + } + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + return true; +} + +/** + * Test Description + * ------------------------ + * - Validate hipMemPoolGetAttribute() by setting hipMemPoolSetAttribute(). + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAttribute_SetGet") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + REQUIRE(true == checkhipMemPoolSetAttribute( + hipMemPoolAttrReleaseThreshold, dev)); + REQUIRE(true == checkhipMemPoolSetAttribute( + hipMemPoolReuseFollowEventDependencies, dev)); + REQUIRE(true == checkhipMemPoolSetAttribute( + hipMemPoolReuseAllowOpportunistic, dev)); + REQUIRE(true == checkhipMemPoolSetAttribute( + hipMemPoolReuseAllowInternalDependencies, dev)); + } +} + +/** + * Test Description + * ------------------------ + * - Validate hipMemPoolAttrUsedMemCurrent value. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAttribute_UsedMem") { + checkMempoolSupported(0) + constexpr int N = 1 << 14; + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + size_t byte_size = (N * sizeof(int)); + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + // Get hipMemPoolAttrUsedMemCurrent value for mem_pool when no memory + // is allocated from this pool. + SECTION("Check created mempool") { + uint64_t val = 0; + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrUsedMemCurrent, &val)); + REQUIRE(val == 0); + int *A_d; + // Allocate memory on dev0 from mem_pool. + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d), + byte_size, mem_pool, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate + // its value. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrUsedMemCurrent, &val)); + REQUIRE(val == byte_size); + // Free memory back to memory pool. + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Again get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate + // its value. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrUsedMemCurrent, &val)); + REQUIRE(val == 0); + } + SECTION("Check default mempool") { + hipMemPool_t mem_pool_default = nullptr; + // assign default mem pool to device + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, 0)); + uint64_t valInitital = 0, valPostAlloc = 0, valPostFree = 0; + HIP_CHECK(hipMemPoolGetAttribute(mem_pool_default, + hipMemPoolAttrUsedMemCurrent, &valInitital)); + int *A_d; + // Allocate memory on dev0 from mem_pool. + HIP_CHECK(hipMallocAsync(reinterpret_cast(&A_d), + byte_size, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate + // its value. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool_default, + hipMemPoolAttrUsedMemCurrent, &valPostAlloc)); + uint64_t expVal = byte_size; + expVal = expVal + valInitital; + REQUIRE(valPostAlloc == expVal); + // Free memory back to memory pool. + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Again get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate + // its value. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool_default, + hipMemPoolAttrUsedMemCurrent, &valPostFree)); + REQUIRE(valPostFree == valInitital); + } + SECTION("Default memory pool allocation") { + uint64_t val = 0; + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrUsedMemCurrent, &val)); + REQUIRE(val == 0); + int *A_d; + // Allocate memory on dev0 from mem_pool. + HIP_CHECK(hipMallocAsync(reinterpret_cast(&A_d), + byte_size, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate + // its value. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrUsedMemCurrent, &val)); + REQUIRE(val == 0); + // Free memory back to memory pool. + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + } + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} + +/** + * Test Description + * ------------------------ + * - Validate hipMemPoolAttrReservedMemCurrent value. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAttribute_ReservedMem") { + checkMempoolSupported(0) + constexpr int N = 1 << 14; + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + uint64_t val = 0; + // Verify that at the beginning mempool contains at least + // 0 memory reserved. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrReservedMemCurrent, &val)); + REQUIRE(val >= 0); + size_t byte_size = (N * sizeof(int)); + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + int *A_d; + // Allocate memory on dev0 from mem_pool. + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d), + byte_size, mem_pool, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrReservedMemCurrent, &val)); + REQUIRE(val >= byte_size); + // Free memory back to memory pool. + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + // Again get hipMemPoolAttrReservedMemCurrent value for mem_pool and validate + // its value. + HIP_CHECK(hipMemPoolGetAttribute(mem_pool, + hipMemPoolAttrReservedMemCurrent, &val)); + REQUIRE(val >= 0); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} + +/** + * Test Description + * ------------------------ + * - Validate hipMemPoolAttrReservedMemHigh and hipMemPoolAttrUsedMemHigh value. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAttribute_UsageStatistics") { + checkMempoolSupported(0) + struct mempoolUsgStat stats; + // Create mempool + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + constexpr int N = 1 << 14; + size_t byte_size = (N * sizeof(int)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = 0; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + // Reset and Take Usage Statistics + resetHighValue(mem_pool); + getUsageStatistics(mem_pool, &stats); + // Validate usage statistics + REQUIRE(stats.reservedMem == stats.reservedMemHigh); + REQUIRE(stats.usedMem == 0); + REQUIRE(stats.usedMemHigh == 0); + + // Allocate from mempool + int *A_d[iterations]; + for (int i = 0; i < iterations; i++) { + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d[i]), + byte_size, mem_pool, 0)); + } + HIP_CHECK(hipStreamSynchronize(0)); + // Take Usage Statistics + getUsageStatistics(mem_pool, &stats); + // Validate usage statistics + REQUIRE(stats.reservedMem == stats.reservedMemHigh); + REQUIRE(stats.usedMem == (iterations*byte_size)); + REQUIRE(stats.usedMemHigh == (iterations*byte_size)); + reservedHighExp = stats.reservedMemHigh; + usedHighExp = (iterations*byte_size); + + // Deallocate half of the allocations + for (int i = 0; i < iterations/2; i++) { + HIP_CHECK(hipFreeAsync(A_d[i], 0)); + } + HIP_CHECK(hipStreamSynchronize(0)); + // Take Usage Statistics + getUsageStatistics(mem_pool, &stats); + // Validate usage statistics + REQUIRE(stats.reservedMemHigh == reservedHighExp); + REQUIRE(stats.usedMem == (iterations*byte_size - (iterations/2)*byte_size)); + REQUIRE(stats.usedMemHigh == usedHighExp); + + // Deallocate remaining allocations + for (int i = (iterations/2); i < iterations; i++) { + HIP_CHECK(hipFreeAsync(A_d[i], 0)); + } + HIP_CHECK(hipStreamSynchronize(0)); + // Take Usage Statistics + getUsageStatistics(mem_pool, &stats); + // Validate usage statistics + REQUIRE(stats.reservedMemHigh == reservedHighExp); + REQUIRE(stats.usedMem == 0); + REQUIRE(stats.usedMemHigh == usedHighExp); + + HIP_CHECK(hipMemPoolDestroy(mem_pool)); +} + +/** + * Test Description + * ------------------------ + * - Validate hipMalloc does not affect default mempool + * statistics. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAttribute_hipMalloc_DefMempool") { + checkMempoolSupported(0) + struct mempoolUsgStat stats; + // Create mempool + hipMemPool_t mem_pool; + constexpr int N = 1 << 14; + size_t byte_size = (N * sizeof(int)); + // Get default mempool + HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool, 0)); + // Reset and Take Usage Statistics + resetHighValue(mem_pool); + getUsageStatistics(mem_pool, &stats); + uint64_t reservedMemStart, reservedMemHighStart, usedMemStart, + usedMemHighStart; + reservedMemStart = stats.reservedMem; + reservedMemHighStart = stats.reservedMemHigh; + usedMemStart = stats.usedMem; + usedMemHighStart = stats.usedMemHigh; + // Allocate using hipMalloc + int *Ad; + HIP_CHECK(hipMalloc(&Ad, byte_size)); + getUsageStatistics(mem_pool, &stats); + REQUIRE(reservedMemStart == stats.reservedMem); + REQUIRE(reservedMemHighStart == stats.reservedMemHigh); + REQUIRE(usedMemStart == stats.usedMem); + REQUIRE(usedMemHighStart == stats.usedMemHigh); + HIP_CHECK(hipFree(Ad)); +} +/** + * Test Description + * ------------------------ + * - Validate default attribute values. + * ------------------------ + * - catch\unit\memory\hipMemPoolSetGetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolGetAttribute_CheckDefaultValues") { + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + REQUIRE(true == checkDefaultAttributeValues( + hipMemPoolAttrReleaseThreshold, dev)); + REQUIRE(true == checkDefaultAttributeValues( + hipMemPoolReuseFollowEventDependencies, dev)); + REQUIRE(true == checkDefaultAttributeValues( + hipMemPoolReuseAllowOpportunistic, dev)); + REQUIRE(true == checkDefaultAttributeValues( + hipMemPoolReuseAllowInternalDependencies, dev)); + } +} diff --git a/catch/unit/memory/hipMemPoolTrimTo.cc b/catch/unit/memory/hipMemPoolTrimTo.cc index 26b1710194..b1007de94a 100644 --- a/catch/unit/memory/hipMemPoolTrimTo.cc +++ b/catch/unit/memory/hipMemPoolTrimTo.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -43,19 +43,12 @@ * - /unit/memory/hipMemPoolTrimTo.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolTrimTo_Negative_Parameter") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } - + checkMempoolSupported(device_id) size_t trim_size = 1024; SECTION("Passing nullptr to mem_pool") { @@ -73,18 +66,12 @@ TEST_CASE("Unit_hipMemPoolTrimTo_Negative_Parameter") { * - /unit/memory/hipMemPoolTrimTo.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 6.0 + * - HIP_VERSION >= 6.2 */ TEST_CASE("Unit_hipMemPoolTrimTo_Positive_Basic") { int device_id = 0; HIP_CHECK(hipSetDevice(device_id)); - - int mem_pool_support = 0; - HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); - if (!mem_pool_support) { - SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); - return; - } + checkMempoolSupported(device_id) const size_t allocation_size1 = kPageSize * kPageSize * 2; const size_t allocation_size2 = kPageSize / 2; @@ -164,6 +151,147 @@ TEST_CASE("Unit_hipMemPoolTrimTo_Positive_Basic") { HIP_CHECK(hipFreeAsync(reinterpret_cast(alloc_mem2), stream.stream())); } +static bool thread_results[NUMBER_OF_THREADS]; + +/** + * Local function to test hipMemPoolAttrReleaseThreshold. + */ +static bool checkhipMemPoolTrimTo(hipStream_t stream, int N, + int dev = 0) { + streamMemAllocTest testObj(N); + size_t byte_size = N*sizeof(int); + // assign memory to host pointers + testObj.createHostBufferWithData(); + // Create mempool in current device + hipMemPool_t mem_pool; + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + uint64_t setThreshold = UINT64_MAX; + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, hipMemPoolAttrReleaseThreshold, + &setThreshold)); + testObj.useCommonMempool(mem_pool); + for (int iter = 1; iter <= LAUNCH_ITERATIONS; iter++) { + // Set different min_bytes_to_hold for each iteration + size_t min_bytes_to_hold = + (byte_size * 3 * (LAUNCH_ITERATIONS - iter))/LAUNCH_ITERATIONS; + HIP_CHECK(hipMemPoolTrimTo(mem_pool, min_bytes_to_hold)); + // assign memory to device pointers + testObj.allocFromMempool(stream); + testObj.transferToMempool(stream); + testObj.runKernel(stream); + testObj.transferFromMempool(stream); + testObj.freeDevBuf(stream); + // verify and validate + REQUIRE(true == testObj.validateResult()); + HIP_CHECK(hipStreamSynchronize(stream)); + } + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + return true; +} + +/** + * Test Description + * ------------------------ + * - Create explicit mempool1 on default GPU and set attribute + * hipMemPoolAttrReleaseThreshold to UINT64_MAX. + * LOOP for 10 times: {Trim the memory pool in each iteration, then + * Allocate A_d1, B_d1, C_d1 from pool1, memcpy data to (A_d1, B_d1). + * Launch kernel to perform C_d1(x)=A_d1(x)+B_d1(x), verify + * result and free the memory.} After loop free the pool. + * ------------------------ + * - catch\unit\memory\hipMemPoolTrimTo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolTrimTo_VaryingMinBytesToHold") { + checkMempoolSupported(0) + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + constexpr int N = 1 << 20; + REQUIRE(true == checkhipMemPoolTrimTo(stream, N)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +/** + * Test Description + * ------------------------ + * - MultiGPU scenario: Execute the above scenario in each device. + * ------------------------ + * - catch\unit\memory\hipMemPoolTrimTo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolTrimTo_MGpuVaryingMinBytesToHold") { + constexpr int N = 1 << 20; + int numDevices = 0; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + if (numDevices < 2) { + WARN("Number of GPUs insufficient for test"); + } else { + for (int dev = 0; dev < numDevices; dev++) { + checkMempoolSupported(dev) + HIP_CHECK(hipSetDevice(dev)); + // create a stream + hipStream_t stream; + HIP_CHECK(hipStreamCreate(&stream)); + REQUIRE(true == checkhipMemPoolTrimTo(stream, N, dev)); + HIP_CHECK(hipStreamDestroy(stream)); + } + } +} + +/** + * Local Thread Functions + */ +static void thread_Test(hipStream_t stream, int N, int threadNum) { + thread_results[threadNum] = + checkhipMemPoolTrimTo(stream, N, false); +} + +/** + * Test Description + * ------------------------ + * - Multithread scenario: Execute the above scenario in each thread. + * ------------------------ + * - catch\unit\memory\hipMemPoolTrimTo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.2 + */ +TEST_CASE("Unit_hipMemPoolTrimTo_Multithreaded") { + checkMempoolSupported(0) + // create a stream + constexpr int N = 1 << 20; + std::vector tests; + hipStream_t stream[NUMBER_OF_THREADS]; + // Initialize and create streams + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + thread_results[idx] = false; + HIP_CHECK(hipStreamCreate(&stream[idx])); + } + // Spawn the test threads + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + tests.push_back(std::thread(thread_Test, stream[idx], + N, idx)); + } + // Wait for all threads to complete + for (std::thread &t : tests) { + t.join(); + } + // Wait for thread and destroy stream + bool status = true; + for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) { + status = status & thread_results[idx]; + HIP_CHECK(hipStreamDestroy(stream[idx])); + } +} + /** * End doxygen group StreamOTest. * @} diff --git a/catch/unit/memory/mempool_common.hh b/catch/unit/memory/mempool_common.hh index 8edc69ba5b..7e255984ec 100644 --- a/catch/unit/memory/mempool_common.hh +++ b/catch/unit/memory/mempool_common.hh @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -19,6 +19,7 @@ #pragma once #include +#include #include #include @@ -26,6 +27,37 @@ namespace { constexpr auto wait_ms = 500; } // anonymous namespace +/** + * Local Function to test if Hip Stream Ordered Memory allocator + * functionality is supoorted. + */ +static bool isStrmOrdMemAllocSupported(int dev) { + int deviceSupportsMemoryPools = 0; + bool supported = false; + HIP_CHECK(hipDeviceGetAttribute(&deviceSupportsMemoryPools, + hipDeviceAttributeMemoryPoolsSupported, dev)); + if (deviceSupportsMemoryPools != 0) { + supported = true; + } else { + supported = false; + } + return supported; +} + +#define checkMempoolSupported(device) {\ + if (false == isStrmOrdMemAllocSupported(device)) {\ + HipTest::HIP_SKIP_TEST("Memory Pool not supported. Skipping Test..");\ + return;\ + }\ +} + +#define checkIfMultiDev(numOfDev) {\ + if (numOfDev < 2) {\ + HipTest::HIP_SKIP_TEST("Multiple GPUs not available. Skipping Test..");\ + return;\ + }\ +} + template __global__ void kernel_500ms(T* host_res, int clk_rate) { int tid = threadIdx.x + blockIdx.x * blockDim.x; host_res[tid] = tid + 1; @@ -286,3 +318,135 @@ template void MallocMemPoolAsync_Reuse(F malloc_func, const MemPool HIP_CHECK(hipFreeAsync(reinterpret_cast(alloc_mem3), stream.stream())); } + +// definitions +#define THREADS_PER_BLOCK 512 +#define LAUNCH_ITERATIONS 5 +#define NUMBER_OF_THREADS 5 +#define NUM_OF_STREAM 3 + +enum eTestValue { + testdefault, + testMaximum, + testDisabled, + testEnabled +}; + +class streamMemAllocTest { + int *A_h, *B_h, *C_h; + int *A_d, *B_d, *C_d; + int size; + size_t byte_size; + hipMemPool_t mem_pool; + + public: + explicit streamMemAllocTest(int N) : size(N) { + byte_size = N*sizeof(int); + } + // Create host buffers and initialize them with input data + void createHostBufferWithData() { + A_h = reinterpret_cast(malloc(byte_size)); + REQUIRE(A_h != nullptr); + B_h = reinterpret_cast(malloc(byte_size)); + REQUIRE(B_h != nullptr); + C_h = reinterpret_cast(malloc(byte_size)); + REQUIRE(C_h != nullptr); + // set data to host + for (int i = 0; i < size; i++) { + A_h[i] = 2*i + 1; // Odd + B_h[i] = 2*i; // Even + C_h[i] = 0; + } + } + // Instead of creating a mempool in class use the global mempool. + void useCommonMempool(hipMemPool_t mempool) { + mem_pool = mempool; + } + // Create the mempool + void createMempool(hipMemPoolAttr attr, enum eTestValue testtype, + int dev) { + // Create mempool in current device + hipMemPoolProps pool_props{}; + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.location.id = dev; + pool_props.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props)); + if (attr == hipMemPoolAttrReleaseThreshold) { + uint64_t setThreshold = 0; + if (testtype == testMaximum) { + setThreshold = UINT64_MAX; + } + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &setThreshold)); + } else if ((attr == hipMemPoolReuseFollowEventDependencies) || + (attr == hipMemPoolReuseAllowOpportunistic) || + (attr == hipMemPoolReuseAllowInternalDependencies)) { + int value = 0; + if (testtype == testEnabled) { + value = 1; + } + HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value)); + } + } + // allocate device memory from mempool. + void allocFromMempool(hipStream_t stream) { + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&A_d), + byte_size, mem_pool, stream)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&B_d), + byte_size, mem_pool, stream)); + HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast(&C_d), + byte_size, mem_pool, stream)); + } + // Transfer data from host to device asynchronously. + void transferToMempool(hipStream_t stream) { + HIP_CHECK(hipMemcpyAsync(A_d, A_h, byte_size, hipMemcpyHostToDevice, + stream)); + HIP_CHECK(hipMemcpyAsync(B_d, B_h, byte_size, hipMemcpyHostToDevice, + stream)); + } + // allocate from default mempool. + void allocFromDefMempool(hipStream_t stream) { + HIP_CHECK(hipMallocAsync(reinterpret_cast(&A_d), + byte_size, stream)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&B_d), + byte_size, stream)); + HIP_CHECK(hipMallocAsync(reinterpret_cast(&C_d), + byte_size, stream)); + } + // Execute Kernel to process input data and wait for it. + void runKernel(hipStream_t stream) { + hipLaunchKernelGGL(HipTest::vectorADD, dim3(size / THREADS_PER_BLOCK), + dim3(THREADS_PER_BLOCK), 0, stream, + static_cast(A_d), + static_cast(B_d), C_d, size); + } + // Transfer data from device to host asynchronously. + void transferFromMempool(hipStream_t stream) { + HIP_CHECK(hipMemcpyAsync(C_h, C_d, byte_size, hipMemcpyDeviceToHost, + stream)); + } + // Validate the data returned from device. + bool validateResult() { + for (int i = 0; i < size; i++) { + if (C_h[i] != (A_h[i] + B_h[i])) { + return false; + } + } + return true; + } + // Free device memory + void freeDevBuf(hipStream_t stream) { + HIP_CHECK(hipFreeAsync(reinterpret_cast(A_d), stream)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(B_d), stream)); + HIP_CHECK(hipFreeAsync(reinterpret_cast(C_d), stream)); + } + // Free mempool if not using global mempool + void freeMempool() { + HIP_CHECK(hipMemPoolDestroy(mem_pool)); + } + // Free all host buffers + void freeHostBuf() { + free(A_h); + free(B_h); + free(C_h); + } +};