SWDEV-311271 - [catch2][dtest] Adding test for mempool and stream ordered memory APIs

Change-Id: Iddeb111e4b512bfc7422abc8e784b0a8e8fb133d
This commit is contained in:
Rupam Chetia
2024-05-06 16:16:55 +05:30
والد 0b28ca3f04
کامیت 456001c308
13فایلهای تغییر یافته به همراه2851 افزوده شده و 127 حذف شده
@@ -9,6 +9,10 @@
],
"DisabledTests": [
#if defined COMMON
"Unit_hipMallocFromPoolAsync_MThread_MaxThresh",
"Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool",
"Unit_hipMemPoolTrimTo_Multithreaded",
"Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU",
"Unit_hipStreamPerThread_DeviceReset_1",
"Unit_hipDeviceGetSharedMemConfig_Positive_Basic",
"Unit_hipDeviceGetSharedMemConfig_Positive_Threaded",
@@ -8,6 +8,10 @@
],
"DisabledTests": [
#if defined COMMON
"Unit_hipMallocFromPoolAsync_MThread_MaxThresh",
"Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool",
"Unit_hipMemPoolTrimTo_Multithreaded",
"Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU",
"Unit_hipMalloc_CoherentTst",
"Unit_hipTextureMipmapObj2D_Check",
"Unit_hipGraphAddHostNode_ClonedGraphwithHostNode",
@@ -114,6 +114,8 @@ set(TEST_SRC
hipMemcpyFromSymbol.cc
hipPtrGetAttribute.cc
hipMemPoolApi.cc
hipDeviceGetMemPool.cc
hipDeviceSetMemPool.cc
hipMemPoolSetGetAccess.cc
hipMemPoolSetGetAttribute.cc
hipMemPoolCreate.cc
@@ -0,0 +1,181 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @addtogroup hipDeviceGetMemPool hipDeviceGetMemPool
* @{
* @ingroup MemoryTest
* `hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool,
* int device)` -
* Gets the current memory pool for the specified device.
*/
#include "mempool_common.hh" // NOLINT
#define THREADS_PER_BLOCK 512
static constexpr auto NUM_ELM {1024 * 1024};
/**
* Common function to allocate memory using hipMallocAsync API through a stream,
* launch kernel and perform vectorADD and validate results. Free memory using
* hipFreeAsync.
*/
static bool checkMallocAsync() {
streamMemAllocTest testObj(NUM_ELM);
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Create host buffer with test data.
testObj.createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on stream.
testObj.allocFromDefMempool(stream);
testObj.transferToMempool(stream);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
// Free Buffer Asynchronously on stream.
testObj.freeDevBuf(stream);
HIP_CHECK(hipStreamSynchronize(stream));
// verify and validate
REQUIRE(true == testObj.validateResult());
// Destroy resources
HIP_CHECK(hipStreamDestroy(stream));
testObj.freeHostBuf();
return true;
}
/**
* Test Description
* ------------------------
* - Test case to perform basic scenario, get device mem pool
* and default mem pool and validate both are same.
* ------------------------
* - catch\unit\memory\hipDeviceGetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceGetMemPool_Basic") {
checkMempoolSupported(0)
hipMemPool_t mem_pool_device = nullptr, mem_pool_default = nullptr;
SECTION("Check current mempool is default mempool") {
// assign default mem pool to device
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, 0));
// assign device mem pool to device
HIP_CHECK(hipDeviceGetMemPool(&mem_pool_device, 0));
// validate both are same
REQUIRE(mem_pool_device == mem_pool_default);
}
SECTION("Allocating a mempool does not impact default mempool ctx") {
hipMemPoolProps PoolProps{};
PoolProps.allocType = hipMemAllocationTypePinned;
PoolProps.location.id = 0;
PoolProps.location.type = hipMemLocationTypeDevice;
// assign default mem pool to device
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, 0));
// create explicit mem pool
hipMemPool_t user_mempool;
HIP_CHECK(hipMemPoolCreate(&user_mempool, &PoolProps));
// assign device mem pool to device
HIP_CHECK(hipDeviceGetMemPool(&mem_pool_device, 0));
// validate both are same
REQUIRE(mem_pool_device == mem_pool_default);
HIP_CHECK(hipMemPoolDestroy(user_mempool));
}
}
/**
* Test Description
* ------------------------
* - Test case to check functional scenario, Get the current mempool using
* hipDeviceGetMempool. Set attribute hipMemPoolAttrReleaseThreshold to
* UINT64_MAX. call checkMallocAsync().
* ------------------------
* - catch\unit\memory\hipDeviceGetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceGetMemPool_Functional") {
hipMemPool_t mem_pool = nullptr;
checkMempoolSupported(0)
// assign current mem pool to device
HIP_CHECK(hipDeviceGetMemPool(&mem_pool, 0));
// set attribute hipMemPoolAttrReleaseThreshold as UINT64_MAX
hipMemPoolAttr attr = hipMemPoolAttrReleaseThreshold;
std::uint64_t value = UINT64_MAX;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value));
// call checkMallocAsync() and validate
REQUIRE(true == checkMallocAsync());
}
/**
* Test Description
* ------------------------
* - Test case to verify multi device, get number of devices available
* and verify device mem pool and default mem pool are same.
* ------------------------
* - catch\unit\memory\hipDeviceGetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceGetMemPool_Multidevice") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int i = 0; i < num_devices; i++) {
checkMempoolSupported(i)
HIP_CHECK(hipSetDevice(i));
hipMemPool_t mem_pool_device = nullptr, mem_pool_default = nullptr;
// assign default mem pool to device
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, i));
// assign device mem pool to device
HIP_CHECK(hipDeviceGetMemPool(&mem_pool_device, i));
// validate both are same
REQUIRE(mem_pool_device == mem_pool_default);
}
}
/**
* Test Description
* ------------------------
* - Test case to check functional scenario, Get the current mempool using
* hipDeviceGetDefaultMemPool. Set attribute hipMemPoolAttrReleaseThreshold
* to UINT64_MAX. call checkMallocAsync().
* ------------------------
* - catch\unit\memory\hipDeviceGetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceGetDefaultMemPool_Functional") {
hipMemPool_t mem_pool = nullptr;
checkMempoolSupported(0)
// assign current mem pool to device
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool, 0));
// set attribute hipMemPoolAttrReleaseThreshold as UINT64_MAX
hipMemPoolAttr attr = hipMemPoolAttrReleaseThreshold;
std::uint64_t value = UINT64_MAX;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value));
// call checkMallocAsync() and validate
REQUIRE(true == checkMallocAsync());
}
@@ -0,0 +1,195 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
* @addtogroup hipDeviceSetMemPool hipDeviceSetMemPool
* @{
* @ingroup MemoryTest
* `hipError_t hipDeviceSetMemPool(int device,
* hipMemPool_t mem_pool)` -
* Sets the current memory pool for the specified device.
*/
#include "mempool_common.hh" // NOLINT
#define THREADS_PER_BLOCK 512
static constexpr auto NUM_ELM {1024 * 1024};
/**
* Common function to allocate memory using hipMallocAsync API through a stream,
* launch kernel and perform vectorADD and validate results. Free memory using
* hipFreeAsync.
*/
static bool checkMallocAsync() {
streamMemAllocTest testObj(NUM_ELM);
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Create host buffer with test data.
testObj.createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on stream.
testObj.allocFromDefMempool(stream);
testObj.transferToMempool(stream);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
// Free Buffer Asynchronously on stream.
testObj.freeDevBuf(stream);
HIP_CHECK(hipStreamSynchronize(stream));
// verify and validate
REQUIRE(true == testObj.validateResult());
// Destroy resources
HIP_CHECK(hipStreamDestroy(stream));
testObj.freeHostBuf();
return true;
}
/**
* Test Description
* ------------------------
* - Test case to verify Basic scenario, create an explicit mem pool
* and validate current pool is same as created mem pool.
* ------------------------
* - catch\unit\memory\hipDeviceSetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceSetMemPool_Basic") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int dev = 0; dev < num_devices; dev++) {
checkMempoolSupported(dev)
hipMemPool_t mem_pool_device = nullptr, curr_mem_pool = nullptr;
// create explicit mem pool
hipMemPoolProps prop{};
prop.allocType = hipMemAllocationTypePinned;
prop.location.id = dev;
prop.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool_device, &prop));
HIP_CHECK(hipDeviceSetMemPool(dev, mem_pool_device));
// get current mem pool
HIP_CHECK(hipDeviceGetMemPool(&curr_mem_pool, dev));
// validate both memory are same.
REQUIRE(curr_mem_pool == mem_pool_device);
// free mem pool
HIP_CHECK(hipMemPoolDestroy(mem_pool_device));
}
}
/**
* Test Description
* ------------------------
* - Create a mempool and set it as the current mempool of the
* device. Validate that destroying the current mempool of a device
* sets the default mempool of that device as the current mempool
* for that device.
* ------------------------
* - catch\unit\memory\hipDeviceSetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceSetMemPool_DestroyCurrentMempool") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int dev = 0; dev < num_devices; dev++) {
checkMempoolSupported(dev)
HIP_CHECK(hipSetDevice(dev));
hipMemPool_t mem_pool_device, curr_mem_pool, def_mem_pool;
hipMemPoolProps prop{};
prop.allocType = hipMemAllocationTypePinned;
prop.location.id = dev;
prop.location.type = hipMemLocationTypeDevice;
// Create explicit mempool
HIP_CHECK(hipMemPoolCreate(&mem_pool_device, &prop));
// Set mempool
HIP_CHECK(hipDeviceSetMemPool(dev, mem_pool_device));
// Destroy mem pool
HIP_CHECK(hipMemPoolDestroy(mem_pool_device));
// Get current mem pool
HIP_CHECK(hipDeviceGetMemPool(&curr_mem_pool, dev));
// Get default mempool
HIP_CHECK(hipDeviceGetDefaultMemPool(&def_mem_pool, dev));
// validate the mempool is the default mempool
REQUIRE(curr_mem_pool == def_mem_pool);
}
}
/**
* Test Description
* ------------------------
* - Create explicit memory pool on default GPU. Set this as the current mempool
* call checkMallocAsync() and destroy the mem pool.
* ------------------------
* - catch\unit\memory\hipDeviceSetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceSetMemPool_functional") {
checkMempoolSupported(0)
hipMemPool_t mem_pool = nullptr;
// create explicit mem pool
hipMemPoolProps PoolProps{};
PoolProps.allocType = hipMemAllocationTypePinned;
PoolProps.location.id = 0;
PoolProps.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &PoolProps));
HIP_CHECK(hipDeviceSetMemPool(0, mem_pool));
// call checkMallocAsync function
REQUIRE(true == checkMallocAsync());
// destroy the mem pool.
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
/**
* Test Description
* ------------------------
* - Create explicit memory pool on default GPU. Set this as the current mempool
* Set attribute hipMemPoolAttrReleaseThreshold to UINT64_MAX. call checkMallocAsync()
* and destroy the mem pool.
* ------------------------
* - catch\unit\memory\hipDeviceSetMemPool.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipDeviceSetMemPool_functionalAttribute") {
checkMempoolSupported(0)
hipMemPool_t mem_pool = nullptr;
// create explicit mem pool
hipMemPoolProps PoolProps{};
PoolProps.allocType = hipMemAllocationTypePinned;
PoolProps.location.id = 0;
PoolProps.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &PoolProps));
HIP_CHECK(hipDeviceSetMemPool(0, mem_pool));
// set attribute hipMemPoolAttrReleaseThreshold as UINT64_MAX
hipMemPoolAttr attr = hipMemPoolAttrReleaseThreshold;
std::uint64_t value = UINT64_MAX;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value));
// call checkMallocAsync function
REQUIRE(true == checkMallocAsync());
// destroy the mem pool.
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -23,6 +23,10 @@
#pragma clang diagnostic ignored "-Wunused-parameter"
static bool thread_results[NUMBER_OF_THREADS];
static constexpr auto NUM_ELM {1024 * 1024};
static constexpr int streamPerAsic = 2;
/**
* @addtogroup hipMallocAsync hipMallocAsync
* @{
@@ -41,7 +45,7 @@
* - /unit/memory/hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Basic_OneAlloc") {
MallocMemPoolAsync_OneAlloc(
@@ -61,7 +65,7 @@ TEST_CASE("Unit_hipMallocAsync_Basic_OneAlloc") {
* - /unit/memory/hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Basic_TwoAllocs") {
MallocMemPoolAsync_TwoAllocs(
@@ -80,7 +84,7 @@ TEST_CASE("Unit_hipMallocAsync_Basic_TwoAllocs") {
* - /unit/memory/hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Basic_Reuse") {
MallocMemPoolAsync_Reuse([](void** dev_ptr, size_t size, hipMemPool_t mem_pool,
@@ -102,18 +106,12 @@ TEST_CASE("Unit_hipMallocAsync_Basic_Reuse") {
* - /unit/memory/hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Negative_Parameters") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(0)
int* p = nullptr;
size_t max_size = std::numeric_limits<size_t>::max();
@@ -137,6 +135,535 @@ TEST_CASE("Unit_hipMallocAsync_Negative_Parameters") {
}
}
/**
* Common function to allocate memory using hipMallocAsync API through a stream,
* launch kernel and perform vectorADD and validate results. Free memory using
* hipFreeAsync.
*/
static bool checkMallocAsync(hipStream_t stream) {
streamMemAllocTest testObj(NUM_ELM);
// Create host buffer with test data.
testObj.createHostBufferWithData();
// Allocate device memory.
testObj.allocFromDefMempool(stream);
// Transfer data to it asyncronously on stream.
testObj.transferToMempool(stream);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
// Free Buffer Asynchronously on stream.
testObj.freeDevBuf(stream);
HIP_CHECK(hipStreamSynchronize(stream));
// verify and validate
REQUIRE(true == testObj.validateResult());
// Destroy resources
testObj.freeHostBuf();
return true;
}
/**
* Test Description
* ------------------------
* - Test case to perform basic scenario.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_basic") {
checkMempoolSupported(0)
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
REQUIRE(true == checkMallocAsync(stream));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - Test case to perform multi stream, allocate memory using
* hipMallocAsync API for a stream1 and stream2, launch kernel and
* perform vectorADD, synchronize stream1 and stream2 and validate
* results. Free memory using hipFreeAsync.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Multistream_Concurrent") {
checkMempoolSupported(0)
streamMemAllocTest testObj1(NUM_ELM), testObj2(NUM_ELM);
// create multiple streams
hipStream_t stream1, stream2;
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
// Create host buffer with test data.
testObj1.createHostBufferWithData();
testObj2.createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on streams.
testObj1.allocFromDefMempool(stream1);
testObj2.allocFromDefMempool(stream2);
testObj1.transferToMempool(stream1);
testObj2.transferToMempool(stream2);
// Execute kernel and transfer result back to host asynchronously on streams.
testObj1.runKernel(stream1);
testObj2.runKernel(stream2);
testObj1.transferFromMempool(stream1);
testObj2.transferFromMempool(stream2);
// Free Buffer Asynchronously on streams.
testObj1.freeDevBuf(stream1);
testObj2.freeDevBuf(stream2);
// synchronize both stream1 and stream2
HIP_CHECK(hipStreamSynchronize(stream1));
HIP_CHECK(hipStreamSynchronize(stream2));
// verify and validate
REQUIRE(true == testObj1.validateResult());
REQUIRE(true == testObj2.validateResult());
// Destroy resources
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
testObj1.freeHostBuf();
testObj2.freeHostBuf();
}
/**
* Test Description
* ------------------------
* - Allocate memory using hipMallocAsync API through stream1 and record event1,
* allocate event1 to stream2 and put stream2 to wait, launch kernel through
* stream2 and perform vectorADD and validate results. Free memory using hipFreeAsync.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_StreamEvent_CrissCross") {
checkMempoolSupported(0)
streamMemAllocTest testObj1(NUM_ELM), testObj2(NUM_ELM);
// create two streams.
hipStream_t stream1, stream2;
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipStreamCreate(&stream2));
// create an event
hipEvent_t event1 = nullptr, event2 = nullptr;
HIP_CHECK(hipEventCreate(&event1));
HIP_CHECK(hipEventCreate(&event2));
// Create host buffer with test data.
testObj1.createHostBufferWithData();
testObj2.createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on streams.
testObj1.allocFromDefMempool(stream1);
testObj2.allocFromDefMempool(stream2);
testObj1.transferToMempool(stream1);
testObj2.transferToMempool(stream2);
// create event record
HIP_CHECK(hipEventRecord(event1, stream1));
HIP_CHECK(hipEventRecord(event2, stream2));
HIP_CHECK(hipStreamWaitEvent(stream2, event1, 0));
HIP_CHECK(hipStreamWaitEvent(stream1, event2, 0));
// Execute kernel and transfer result back to host asynchronously on streams.
testObj1.runKernel(stream2);
testObj2.runKernel(stream1);
testObj1.transferFromMempool(stream2);
testObj2.transferFromMempool(stream1);
// Free Buffer Asynchronously on streams.
testObj1.freeDevBuf(stream2);
testObj2.freeDevBuf(stream1);
// Wait for stream2.
HIP_CHECK(hipStreamSynchronize(stream2));
HIP_CHECK(hipStreamSynchronize(stream1));
// verify and validate
REQUIRE(true == testObj1.validateResult());
REQUIRE(true == testObj2.validateResult());
// Destroy resources
HIP_CHECK(hipStreamDestroy(stream1));
HIP_CHECK(hipStreamDestroy(stream2));
HIP_CHECK(hipEventDestroy(event1));
HIP_CHECK(hipEventDestroy(event2));
testObj1.freeHostBuf();
testObj2.freeHostBuf();
}
/**
* Test Description
* ------------------------
* - Test case to perform multi device scenario, get number of devices available
* and call checkMallocAsync function for each device available.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Multidevice") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
for (int i = 0; i < num_devices; i++) {
checkMempoolSupported(i)
HIP_CHECK(hipSetDevice(i));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
REQUIRE(true == checkMallocAsync(stream));
HIP_CHECK(hipStreamDestroy(stream));
}
}
/**
* Test Description
* ------------------------
* - Queue the following commands hipMallocAsync, transfer data
* to it asynchrously, launch Kernel, transfer results back to host
* asynchronously and free buffer async in streams across all GPUs.
* The execution in of the queued commands must happen concurrently.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
#if HT_AMD
static void threadQAsyncCommands(streamMemAllocTest* testObj,
hipStream_t strm) {
// Create host buffer with test data.
testObj->createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on stream.
testObj->allocFromDefMempool(strm);
testObj->transferToMempool(strm);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj->runKernel(strm);
testObj->transferFromMempool(strm);
// Free Buffer Asynchronously on stream.
testObj->freeDevBuf(strm);
}
TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
hipStream_t *stream_buf = new hipStream_t[num_devices];
std::vector<streamMemAllocTest*> tesObjBuf;
// Allocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
checkMempoolSupported(idx)
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamCreate(&stream_buf[idx]));
streamMemAllocTest *testObj = new streamMemAllocTest(NUM_ELM);
tesObjBuf.push_back(testObj);
}
// Queue commands in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx]);
test.join();
}
// Wait for the streams
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamSynchronize(stream_buf[idx]));
// verify and validate
REQUIRE(true == tesObjBuf[idx]->validateResult());
}
// Deallocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
// Destroy resources
tesObjBuf[idx]->freeHostBuf();
HIP_CHECK(hipStreamDestroy(stream_buf[idx]));
delete tesObjBuf[idx];
}
delete[] stream_buf;
}
/**
* Test Description
* ------------------------
* - Queue the following commands hipMallocAsync, transfer data
* to it asynchrously, launch Kernel, transfer results back to host
* asynchronously and free buffer async in streams across all GPUs
* using multiple streams per GPU.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream") {
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
// 2 stream per ASIC
hipStream_t *stream_buf = new hipStream_t[streamPerAsic*num_devices];
std::vector<streamMemAllocTest*> tesObjBuf;
// Allocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
checkMempoolSupported(idx)
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx]));
HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx + 1]));
streamMemAllocTest *testObj1 = new streamMemAllocTest(NUM_ELM);
tesObjBuf.push_back(testObj1);
streamMemAllocTest *testObj2 = new streamMemAllocTest(NUM_ELM);
tesObjBuf.push_back(testObj2);
}
// Queue commands in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx],
stream_buf[streamPerAsic*idx]);
std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx + 1],
stream_buf[streamPerAsic*idx + 1]);
test1.join();
test2.join();
}
// Wait for the streams
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx]));
HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx + 1]));
// verify and validate
REQUIRE(true == tesObjBuf[streamPerAsic*idx]->validateResult());
REQUIRE(true == tesObjBuf[streamPerAsic*idx + 1]->validateResult());
}
// Deallocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
// Destroy resources
tesObjBuf[streamPerAsic*idx]->freeHostBuf();
tesObjBuf[streamPerAsic*idx + 1]->freeHostBuf();
HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx]));
HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx + 1]));
delete tesObjBuf[streamPerAsic*idx];
delete tesObjBuf[streamPerAsic*idx + 1];
}
delete[] stream_buf;
}
#endif
/**
* Test Description
* ------------------------
* - Assign device memory using hipMalloc, launch kernel and perform
* vector square and validate. Free memory using hipFreeAsync API.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_ByUsinghipMalloc") {
checkMempoolSupported(0)
size_t byte_size = NUM_ELM * sizeof(float);
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
float *A_h, *C_h;
float *A_d, *C_d;
// assign memory to host pointers
A_h = reinterpret_cast<float*>(malloc(byte_size));
C_h = reinterpret_cast<float*>(malloc(byte_size));
// set data to host
for (int i = 0; i < NUM_ELM; i++) {
A_h[i] = 7.0f;
C_h[i] = 0;
}
// assign memory to device pointers
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&A_d), byte_size));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&C_d), byte_size));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, byte_size, hipMemcpyHostToDevice, stream));
hipLaunchKernelGGL(HipTest::vector_square, dim3(NUM_ELM / THREADS_PER_BLOCK),
dim3(THREADS_PER_BLOCK), 0, stream,
static_cast<const float*>(A_d), C_d, NUM_ELM);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, byte_size, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), stream));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(C_d), stream));
HIP_CHECK(hipStreamSynchronize(stream));
// verify and validate
for (int i = 0; i < NUM_ELM; i++) {
REQUIRE(C_h[i] == (A_h[i] * A_h[i]));
}
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
free(C_h);
}
/**
* Test Description
* ------------------------
* - Assign device memory using hipMallocAsync, launch kernel and perform
* vector square and validate. Free memory using hipFree API.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_ByUsinghipFree") {
size_t byte_size = NUM_ELM * sizeof(float);
checkMempoolSupported(0)
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
float *A_h, *C_h;
float *A_d, *C_d;
// assign memory to host pointers
A_h = reinterpret_cast<float*>(malloc(byte_size));
C_h = reinterpret_cast<float*>(malloc(byte_size));
// set data to host
for (int i = 0; i < NUM_ELM; i++) {
A_h[i] = 5.0f;
C_h[i] = 0;
}
// assign memory to device pointers
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&A_d), byte_size, stream));
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&C_d), byte_size, stream));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, byte_size, hipMemcpyHostToDevice, stream));
hipLaunchKernelGGL(HipTest::vector_square, dim3(NUM_ELM / THREADS_PER_BLOCK),
dim3(THREADS_PER_BLOCK), 0, stream,
static_cast<const float*>(A_d), C_d, NUM_ELM);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, byte_size, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d)));
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d)));
// verify and validate
for (int i = 0; i < NUM_ELM; i++) {
REQUIRE(C_h[i] == (A_h[i] * A_h[i]));
}
HIP_CHECK(hipStreamDestroy(stream));
free(A_h);
free(C_h);
}
/**
* Test Description
* ------------------------
* - Test case to check hipMallocAsync allocation and usage in multiple
* threads. Each thread will use a local stream.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
static void threadTestLocalStream(int threadNum) {
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
thread_results[threadNum] = checkMallocAsync(stream);
HIP_CHECK(hipStreamDestroy(stream));
}
static bool testhipMallocAsyncMThreadLocalStrm() {
std::vector<std::thread> tests;
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
tests.push_back(std::thread(threadTestLocalStream, idx));
}
// Wait for all threads to complete
for (std::thread &t : tests) {
t.join();
}
// Wait for thread
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
}
return status;
}
TEST_CASE("Unit_hipMallocAsync_MThread_ThreadLocalStream") {
checkMempoolSupported(0)
REQUIRE(true == testhipMallocAsyncMThreadLocalStrm());
}
/**
* Test Description
* ------------------------
* - Test case to check hipMallocAsync allocation and usage in multiple
* threads. Threads will use a common shared stream.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
static void threadTestCommonStream(int threadNum, hipStream_t stream) {
thread_results[threadNum] = checkMallocAsync(stream);
}
static bool testhipMallocAsyncMThreadLocalStrm(hipStream_t stream) {
std::vector<std::thread> tests;
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
tests.push_back(std::thread(threadTestCommonStream, idx, stream));
}
// Wait for all threads to complete
for (std::thread &t : tests) {
t.join();
}
// Wait for thread
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
}
return status;
}
TEST_CASE("Unit_hipMallocAsync_MThread_ThreadSharedStream") {
checkMempoolSupported(0)
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
REQUIRE(true == testhipMallocAsyncMThreadLocalStrm(stream));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - Test case to check MallocAsync functionality on user created stream,
* null stream and hipstreamperthread concurrently. launch kernel and wait
* for all streams to complete and validate results.
* ------------------------
* - catch\unit\memory\hipMallocAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocAsync_DefaultStreams_Concurrent") {
checkMempoolSupported(0)
streamMemAllocTest testObj[3] = {streamMemAllocTest(NUM_ELM),
streamMemAllocTest(NUM_ELM),
streamMemAllocTest(NUM_ELM)};
// create multiple streams
hipStream_t stream[3];
HIP_CHECK(hipStreamCreate(&stream[0]));
stream[1] = 0; // Null stream
stream[2] = hipStreamPerThread;
// Queue operations on the 3 streams
for (int idx = 0; idx < 3; idx++) {
// Create host buffer with test data.
testObj[idx].createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on stream.
testObj[idx].allocFromDefMempool(stream[idx]);
testObj[idx].transferToMempool(stream[idx]);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj[idx].runKernel(stream[idx]);
testObj[idx].transferFromMempool(stream[idx]);
// Free Buffer Asynchronously on stream.
testObj[idx].freeDevBuf(stream[idx]);
}
// Wait for the 3 streams
for (int idx = 0; idx < 3; idx++) {
HIP_CHECK(hipStreamSynchronize(stream[idx]));
// verify and validate
REQUIRE(true == testObj[idx].validateResult());
// Destroy resources
testObj[idx].freeHostBuf();
}
HIP_CHECK(hipStreamDestroy(stream[0]));
}
/**
* End doxygen group StreamOTest.
* @}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -21,6 +21,10 @@
#include <limits>
static bool thread_results[NUMBER_OF_THREADS];
static constexpr int streamPerAsic = 2;
static hipMemPool_t mem_pool_common;
/**
* @addtogroup hipMallocFromPoolAsync hipMallocFromPoolAsync
* @{
@@ -40,7 +44,7 @@
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_OneAlloc") {
MallocMemPoolAsync_OneAlloc(
@@ -61,7 +65,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_OneAlloc") {
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_TwoAllocs") {
MallocMemPoolAsync_TwoAllocs(
@@ -80,7 +84,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_TwoAllocs") {
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_Reuse") {
MallocMemPoolAsync_Reuse(
@@ -104,18 +108,13 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_Reuse") {
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Negative_Parameters") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(0);
void* p = nullptr;
size_t max_size = std::numeric_limits<size_t>::max();
@@ -148,6 +147,731 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Negative_Parameters") {
}
}
/**
* Local function to test mempool allocation, usage and freeing on
* multiple user created Streams with inter Stream synchonization.
*/
static bool checkMempoolMultStreamSync(int N) {
streamMemAllocTest testObj(N);
// create multiple streams
hipStream_t streamMemCreate, streamMemAccess, streamMemDestroy;
HIP_CHECK(hipStreamCreate(&streamMemCreate));
HIP_CHECK(hipStreamCreate(&streamMemAccess));
HIP_CHECK(hipStreamCreate(&streamMemDestroy));
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = 0
testObj.createMempool(hipMemPoolAttrReleaseThreshold, testdefault, 0);
hipEvent_t Event1, Event2;
HIP_CHECK(hipEventCreate(&Event1));
HIP_CHECK(hipEventCreate(&Event2));
// Allocate memory and initialize it on streamMemCreate
testObj.allocFromMempool(streamMemCreate);
testObj.transferToMempool(streamMemCreate);
HIP_CHECK(hipEventRecord(Event1, streamMemCreate));
// Launch Kernel on streamMemAccess
HIP_CHECK(hipStreamWaitEvent(streamMemAccess, Event1, 0));
testObj.runKernel(streamMemAccess);
testObj.transferFromMempool(streamMemAccess);
HIP_CHECK(hipEventRecord(Event2, streamMemAccess));
// Launch Kernel on streamMemAccess
HIP_CHECK(hipStreamWaitEvent(streamMemDestroy, Event2, 0));
testObj.freeDevBuf(streamMemDestroy);
HIP_CHECK(hipStreamSynchronize(streamMemDestroy));
// Validate test result and clean all host buffers and mempool
bool results = false;
results = testObj.validateResult();
testObj.freeMempool();
testObj.freeHostBuf();
HIP_CHECK(hipEventDestroy(Event2));
HIP_CHECK(hipEventDestroy(Event1));
HIP_CHECK(hipStreamDestroy(streamMemDestroy));
HIP_CHECK(hipStreamDestroy(streamMemAccess));
HIP_CHECK(hipStreamDestroy(streamMemCreate));
return results;
}
/**
* Local function to test mempool functionality on a user created
* stream, null stream and hipStreamPerThread concurrently. Wait
* for all the streams to complete and validate result.
*/
static bool checkMempoolMultStreamConcurrentExec(int N,
bool useDefStrm = true) {
streamMemAllocTest testObj[3] = {streamMemAllocTest(N),
streamMemAllocTest(N),
streamMemAllocTest(N)};
// create multiple streams
hipStream_t testStreams[3];
HIP_CHECK(hipStreamCreate(&testStreams[0]));
if (useDefStrm) {
testStreams[1] = 0; // null stream
testStreams[2] = hipStreamPerThread;
} else {
HIP_CHECK(hipStreamCreate(&testStreams[1]));
HIP_CHECK(hipStreamCreate(&testStreams[2]));
}
// Create common mempool
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool_common, &pool_props));
bool results = true;
for (int idx = 0; idx < 3; idx++) {
// Create mempool in current device = 0
testObj[idx].useCommonMempool(mem_pool_common);
// Create host buffer with test data
testObj[idx].createHostBufferWithData();
// Allocate memory and initialize it on testStreams[idx]
testObj[idx].allocFromMempool(testStreams[idx]);
testObj[idx].transferToMempool(testStreams[idx]);
// Launch Kernel on testStreams[idx]
testObj[idx].runKernel(testStreams[idx]);
testObj[idx].transferFromMempool(testStreams[idx]);
testObj[idx].freeDevBuf(testStreams[idx]);
}
for (int idx = 0; idx < 3; idx++) {
HIP_CHECK(hipStreamSynchronize(testStreams[idx]));
// Validate test result and clean all host buffers and mempool
results &= testObj[idx].validateResult();
testObj[idx].freeHostBuf();
}
HIP_CHECK(hipStreamDestroy(testStreams[0]));
if (!useDefStrm) {
HIP_CHECK(hipStreamDestroy(testStreams[1]));
HIP_CHECK(hipStreamDestroy(testStreams[2]));
}
// Destroy common mempool
HIP_CHECK(hipMemPoolDestroy(mem_pool_common));
return results;
}
/**
* Local function to test hipMemPoolAttrReleaseThreshold.
*/
static bool checkMaximumAndDefaultThreshold(hipStream_t stream, int N,
enum eTestValue testtype, int dev = 0) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = dev
testObj.createMempool(hipMemPoolAttrReleaseThreshold, testtype, dev);
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
// Allocate memory and initialize it on stream
testObj.allocFromMempool(stream);
testObj.transferToMempool(stream);
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
// validate
testObj.freeDevBuf(stream);
HIP_CHECK(hipStreamSynchronize(stream));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeMempool();
testObj.freeHostBuf();
return results;
}
/**
* Test Description
* ------------------------
* - Create explicit mempool1 on default GPU and set attribute
* hipMemPoolAttrReleaseThreshold to UINT64_MAX. Create another explicit
* mempool2 on default GPU with default attribute.
* LOOP for 10 times: {Allocate A_d1, B_d1, C_d1 from pool1, memcpy data to
* (A_d1, B_d1). Launch kernel to perform C_d1(x)=A_d1(x)+B_d1(x), verify
* result and free the memory.} After loop free the pool.
* LOOP for 10 times: {Allocate A_d2, B_d2, C_d2 from pool2, memcpy data to
* (A_d2, B_d2). Launch kernel to perform C_d2(x)=A_d2(x)+B_d2(x), verify
* result and free the memory.} After loop free the pool.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold") {
checkMempoolSupported(0)
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
constexpr int N = 1 << 20;
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N,
testdefault));
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N,
testMaximum));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - Validate hipMallocFromPoolAsync functionality on null stream.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_NullStream") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkMaximumAndDefaultThreshold(0, N,
testdefault));
REQUIRE(true == checkMaximumAndDefaultThreshold(0, N,
testMaximum));
}
/**
* Test Description
* ------------------------
* - Validate hipMallocFromPoolAsync functionality on hipStreamPerThread.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_hipStreamPerThread") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkMaximumAndDefaultThreshold(hipStreamPerThread, N,
testdefault));
REQUIRE(true == checkMaximumAndDefaultThreshold(hipStreamPerThread, N,
testMaximum));
}
/**
* Test Description
* ------------------------
* - Check Release Threshold for multiple device.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu") {
constexpr int N = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
HIP_CHECK(hipSetDevice(dev));
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N,
testdefault, dev));
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N,
testMaximum, dev));
HIP_CHECK(hipStreamDestroy(stream));
}
}
/**
* Local Thread Functions
*/
static void threadQAsyncCommands(streamMemAllocTest* testObj,
hipStream_t strm) {
// Create host buffer with test data.
testObj->createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on stream.
testObj->allocFromMempool(strm);
testObj->transferToMempool(strm);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj->runKernel(strm);
testObj->transferFromMempool(strm);
// Free Buffer Asynchronously on stream.
testObj->freeDevBuf(strm);
}
static void thread_Test1(hipStream_t stream, int N,
enum eTestValue testtype, int threadNum) {
thread_results[threadNum] =
checkMaximumAndDefaultThreshold(stream, N, testtype, 0);
}
static bool test_hipMallocFromPoolAsync_MThread(
enum eTestValue testtype) {
// create a stream
constexpr int N = 1 << 20;
std::vector<std::thread> tests;
hipStream_t stream[NUMBER_OF_THREADS];
// Initialize and create streams
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
HIP_CHECK(hipStreamCreate(&stream[idx]));
}
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
tests.push_back(std::thread(thread_Test1, stream[idx],
N, testtype, idx));
}
// Wait for all threads to complete
for (std::thread &t : tests) {
t.join();
}
// Wait for thread and destroy stream
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
HIP_CHECK(hipStreamDestroy(stream[idx]));
}
return status;
}
static void thread_Test2(hipMemPool_t mempool, hipStream_t stream,
int N, int threadNum) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Use the common mempool
testObj.useCommonMempool(mempool);
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
// Allocate memory and initialize it on stream
testObj.allocFromMempool(stream);
testObj.transferToMempool(stream);
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
testObj.freeDevBuf(stream);
// verify and validate
HIP_CHECK(hipStreamSynchronize(stream));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeHostBuf();
thread_results[threadNum] = results;
}
static bool test_hipMallocFromPoolAsync_MThread_CommonMpool(
enum eTestValue testtype, bool bUseDefault = false) {
// create a stream
constexpr int N = 1 << 20;
std::vector<std::thread> tests;
hipStream_t stream[NUMBER_OF_THREADS];
// Create common mempool
if (bUseDefault) {
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_common, 0));
} else {
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool_common, &pool_props));
}
if (testtype == testMaximum) {
uint64_t setThreshold = UINT64_MAX;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool_common,
hipMemPoolAttrReleaseThreshold, &setThreshold));
}
// Initialize and create streams
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
HIP_CHECK(hipStreamCreate(&stream[idx]));
}
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
tests.push_back(std::thread(thread_Test2, mem_pool_common,
stream[idx], N, idx));
}
// Wait for all threads to complete
for (std::thread &t : tests) {
t.join();
}
// Wait for thread and destroy stream
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
HIP_CHECK(hipStreamDestroy(stream[idx]));
}
// Destroy common mempool
if (!bUseDefault) {
HIP_CHECK(hipMemPoolDestroy(mem_pool_common));
}
return status;
}
/**
* Local function to test hipMemPoolReuseFollowEventDependencies.
*/
static bool checkReuseFollowEventDepFlag(int N, enum eTestValue testtype) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = 0
testObj.createMempool(hipMemPoolReuseFollowEventDependencies,
testtype, 0);
hipStream_t testStream1, testStream2;
HIP_CHECK(hipStreamCreate(&testStream1));
HIP_CHECK(hipStreamCreate(&testStream2));
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
hipEvent_t Event1;
HIP_CHECK(hipEventCreate(&Event1));
// Allocate memory and initialize it on testStream1
testObj.allocFromMempool(testStream1);
testObj.transferToMempool(testStream1);
testObj.runKernel(testStream1);
testObj.transferFromMempool(testStream1);
testObj.freeDevBuf(testStream1);
HIP_CHECK(hipEventRecord(Event1, testStream1));
HIP_CHECK(hipStreamWaitEvent(testStream2, Event1, 0));
// Allocate memory and initialize it on testStream2
testObj.allocFromMempool(testStream2);
testObj.transferToMempool(testStream2);
testObj.runKernel(testStream2);
testObj.transferFromMempool(testStream2);
testObj.freeDevBuf(testStream2);
// validate
HIP_CHECK(hipStreamSynchronize(testStream2));
HIP_CHECK(hipEventDestroy(Event1));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeMempool();
testObj.freeHostBuf();
HIP_CHECK(hipStreamDestroy(testStream2));
HIP_CHECK(hipStreamDestroy(testStream1));
return results;
}
/**
* Local function to test hipMemPoolReuseAllowOpportunistic and
* hipMemPoolReuseAllowInternalDependencies.
*/
static bool checkReuseAllowOtherFlags(int N, hipMemPoolAttr attr,
enum eTestValue testtype) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = 0
testObj.createMempool(attr, testtype, 0);
hipStream_t testStream1, testStream2;
HIP_CHECK(hipStreamCreate(&testStream1));
HIP_CHECK(hipStreamCreate(&testStream2));
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
// Allocate memory and initialize it on testStream1
testObj.allocFromMempool(testStream1);
testObj.transferToMempool(testStream1);
testObj.runKernel(testStream1);
testObj.transferFromMempool(testStream1);
testObj.freeDevBuf(testStream1);
// Allocate memory and initialize it on testStream2
testObj.allocFromMempool(testStream2);
testObj.transferToMempool(testStream2);
testObj.runKernel(testStream2);
testObj.transferFromMempool(testStream2);
testObj.freeDevBuf(testStream2);
// validate
HIP_CHECK(hipStreamSynchronize(testStream2));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeMempool();
testObj.freeHostBuf();
HIP_CHECK(hipStreamDestroy(testStream2));
HIP_CHECK(hipStreamDestroy(testStream1));
return results;
}
/**
* Test Description
* ------------------------
* - Queue the following commands hipMallocFromPoolAsync, transfer data to it
* asynchrously, launch Kernel, transfer results back to host asynchronously and
* free buffer async in streams across all GPUs. The execution in of the queued
* commands must happen concurrently.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
#if HT_AMD
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") {
auto testType = GENERATE(testdefault, testMaximum);
constexpr int N = 1 << 20;
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
hipStream_t *stream_buf = new hipStream_t[num_devices];
std::vector<streamMemAllocTest*> tesObjBuf;
// Allocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
checkMempoolSupported(idx)
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamCreate(&stream_buf[idx]));
streamMemAllocTest *testObj = new streamMemAllocTest(N);
testObj->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx);
tesObjBuf.push_back(testObj);
}
// Queue commands in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx]);
test.join();
}
// Wait for the streams
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamSynchronize(stream_buf[idx]));
// verify and validate
REQUIRE(true == tesObjBuf[idx]->validateResult());
}
// Deallocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
// Destroy resources
tesObjBuf[idx]->freeMempool();
tesObjBuf[idx]->freeHostBuf();
HIP_CHECK(hipStreamDestroy(stream_buf[idx]));
delete tesObjBuf[idx];
}
delete[] stream_buf;
}
/**
* Test Description
* ------------------------
* - Queue the following commands hipMallocFromPoolAsync, transfer data to it
* asynchrously, launch Kernel, transfer results back to host asynchronously and
* free buffer async in streams across all GPUs using multiple streams per GPU.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream") {
int num_devices;
auto testType = GENERATE(testdefault, testMaximum);
constexpr int N = 1 << 20;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
// 2 stream per ASIC
hipStream_t *stream_buf = new hipStream_t[streamPerAsic*num_devices];
std::vector<streamMemAllocTest*> tesObjBuf;
// Allocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
checkMempoolSupported(idx)
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx]));
HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic*idx + 1]));
streamMemAllocTest *testObj1 = new streamMemAllocTest(N);
testObj1->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx);
tesObjBuf.push_back(testObj1);
streamMemAllocTest *testObj2 = new streamMemAllocTest(N);
testObj2->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx);
tesObjBuf.push_back(testObj2);
}
// Queue commands in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx],
stream_buf[streamPerAsic*idx]);
std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx + 1],
stream_buf[streamPerAsic*idx + 1]);
test1.join();
test2.join();
}
// Wait for the streams
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx]));
HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic*idx + 1]));
// verify and validate
REQUIRE(true == tesObjBuf[streamPerAsic*idx]->validateResult());
REQUIRE(true == tesObjBuf[streamPerAsic*idx + 1]->validateResult());
}
// Deallocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
// Destroy resources
tesObjBuf[streamPerAsic*idx]->freeMempool();
tesObjBuf[streamPerAsic*idx]->freeHostBuf();
tesObjBuf[streamPerAsic*idx + 1]->freeMempool();
tesObjBuf[streamPerAsic*idx + 1]->freeHostBuf();
HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx]));
HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic*idx + 1]));
delete tesObjBuf[streamPerAsic*idx];
delete tesObjBuf[streamPerAsic*idx + 1];
}
delete[] stream_buf;
}
#endif
/**
* Test Description
* ------------------------
* - Validate memory pool creation, allocation of memory from the
* memory pool and usage in multithreaded environment.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_DefaultThresh") {
checkMempoolSupported(0)
REQUIRE(true == test_hipMallocFromPoolAsync_MThread(testdefault));
}
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_MaxThresh") {
checkMempoolSupported(0)
REQUIRE(true == test_hipMallocFromPoolAsync_MThread(testMaximum));
}
/**
* Test Description
* ------------------------
* - Validate memory pool creation in main thread and its usage -
* device memory allocation, data transfer to and from device and
* kernel launch from multiple threads.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool") {
checkMempoolSupported(0)
REQUIRE(true == test_hipMallocFromPoolAsync_MThread_CommonMpool(
testdefault, true));
}
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_CommonMpool_MaxThresh") {
checkMempoolSupported(0)
REQUIRE(true == test_hipMallocFromPoolAsync_MThread_CommonMpool(
testMaximum, false));
}
/**
* Test Description
* ------------------------
* - Multiple stream scenario: Create explicit memory pool. Create 3 streams.
* Allocate device memory and initialize on 1st stream, Invoke kernel to
* perform operation on 2nd stream and Free the device memory on 3rd stream.
* Synchronize between stream1, stream2 and stream3 using events.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_Sync") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkMempoolMultStreamSync(N));
}
/**
* Test Description
* ------------------------
* - Multiple stream concurrent execution scenario: Create common memory pool.
* Execute mempool functionality on a user created stream, null stream and
* hipStreamPerThread concurrently. Wait for all the streams to complete and
* validate result.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_DefaultStreams") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkMempoolMultStreamConcurrentExec(N, true));
}
/**
* Test Description
* ------------------------
* - Multiple stream concurrent execution scenario: Create common memory pool.
* Execute mempool functionality on multiple user created streams concurrently.
* Wait for all the streams to complete and validate result.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_UserStreams") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkMempoolMultStreamConcurrentExec(N, false));
}
/**
* Test Description
* ------------------------
* - Test to validate mempool functionality when enabling and disabling
* hipMemPoolReuseFollowEventDependencies attribute.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseFollowEventDependencies") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkReuseFollowEventDepFlag(N, testDisabled));
REQUIRE(true == checkReuseFollowEventDepFlag(N, testEnabled));
}
/**
* Test Description
* ------------------------
* - Test to validate mempool functionality when enabling and disabling
* hipMemPoolReuseAllowOpportunistic attribute.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseAllowOpportunistic") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkReuseAllowOtherFlags(N,
hipMemPoolReuseAllowOpportunistic, testDisabled));
REQUIRE(true == checkReuseAllowOtherFlags(N,
hipMemPoolReuseAllowOpportunistic, testEnabled));
}
/**
* Test Description
* ------------------------
* - Test to validate mempool functionality when enabling and disabling
* hipMemPoolReuseAllowInternalDependencies attribute.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseAllowInternalDependencies") {
checkMempoolSupported(0)
constexpr int N = 1 << 20;
REQUIRE(true == checkReuseAllowOtherFlags(N,
hipMemPoolReuseAllowInternalDependencies, testDisabled));
REQUIRE(true == checkReuseAllowOtherFlags(N,
hipMemPoolReuseAllowInternalDependencies, testEnabled));
}
/**
* End doxygen group StreamOTest.
* @}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -18,6 +18,7 @@
*/
#include <hip_test_common.hh>
#include "mempool_common.hh"
/**
* @addtogroup hipMemPoolCreate hipMemPoolCreate
@@ -42,15 +43,10 @@
* - /unit/memory/hipMemPoolCreate.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolCreate_Negative_Parameter") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(0)
int num_dev = 0;
HIP_CHECK(hipGetDeviceCount(&num_dev));
@@ -93,12 +89,7 @@ TEST_CASE("Unit_hipMemPoolCreate_Negative_Parameter") {
}
TEST_CASE("Unit_hipMemPoolCreate_With_maxSize") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(0)
hipMemPoolProps pool_props;
memset(&pool_props, 0, sizeof(pool_props));
pool_props.allocType = hipMemAllocationTypePinned;
@@ -126,12 +117,7 @@ TEST_CASE("Unit_hipMemPoolCreate_With_maxSize") {
}
TEST_CASE("Unit_hipMemPoolCreate_Without_maxSize") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(0)
hipMemPoolProps pool_props;
memset(&pool_props, 0, sizeof(pool_props));
pool_props.allocType = hipMemAllocationTypePinned;
@@ -151,6 +137,67 @@ TEST_CASE("Unit_hipMemPoolCreate_Without_maxSize") {
HIP_CHECK(hipStreamDestroy(stream));
}
static __global__ void setKer(int *devptr) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
devptr[tid] = tid;
}
/**
* Test Description
* ------------------------
* - hipMemPoolCreate functionality tests
* Create mempool for current device and other devices, if they exist, and
* destroy them.
* ------------------------
* - catch\unit\memory\hipMemPoolCreate.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolCreate_DeviceTest") {
checkMempoolSupported(0)
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
// Scenario1
SECTION("Simple Device Test") {
for (int dev = 0; dev < num_devices; dev++) {
hipMemPool_t mem_pool;
hipMemPoolProps prop{};
prop.allocType = hipMemAllocationTypePinned;
prop.location.id = dev;
prop.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &prop));
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
}
// Scenario2
SECTION("Accessibility Test") {
// Allocate a memory pool in current device
constexpr int N = 1 << 12;
constexpr int numThreadsPerBlk = 64;
hipMemPool_t mem_pool;
hipMemPoolProps prop{};
prop.allocType = hipMemAllocationTypePinned;
prop.location.id = 0;
prop.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &prop));
// Try allocating from mempool in other device context
for (int dev = 1; dev < num_devices; dev++) {
int *A_d;
HIP_CHECK(hipSetDevice(dev));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d),
N*sizeof(int), mem_pool, 0));
HIP_CHECK(hipStreamSynchronize(0));
HIP_CHECK(hipSetDevice(0));
// Launch kernel to access A_d and free it on dev 0 context
setKer<<<N/numThreadsPerBlk, numThreadsPerBlk, 0, 0>>>(A_d);
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), 0));
HIP_CHECK(hipStreamSynchronize(0));
}
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
}
/**
* End doxygen group StreamOTest.
* @}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -40,15 +40,10 @@
* - /unit/memory/hipMemPoolDestroy.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolDestroy_Negative_Parameter") {
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(0)
hipMemPool_t mem_pool = nullptr;
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -17,9 +17,10 @@
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <utility>
#include <vector>
#include <resource_guards.hh>
#include <utils.hh>
#include "mempool_common.hh"
/**
* @addtogroup hipMemPoolSetAccess hipMemPoolSetAccess
@@ -63,18 +64,12 @@ static void MemPoolSetGetAccess(const MemPools mempool_type, int src_device, int
* - /unit/memory/hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_Basic") {
const auto device = GENERATE(range(0, HipTest::getDeviceCount()));
int mem_pool_support = 0;
HIP_CHECK(
hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, device));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device)
const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created);
@@ -101,7 +96,7 @@ int CheckP2PMemPoolSupport(int src_device, int dst_device) {
* - /unit/memory/hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU") {
const auto device_count = HipTest::getDeviceCount();
@@ -115,13 +110,12 @@ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_MultipleGPU") {
int mem_pool_support = CheckP2PMemPoolSupport(src_device, dst_device);
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
HipTest::HIP_SKIP_TEST("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created);
const auto access_flag = GENERATE(hipMemAccessFlagsProtNone, hipMemAccessFlagsProtRead,
hipMemAccessFlagsProtReadWrite);
const auto access_flag = hipMemAccessFlagsProtReadWrite;
int can_access_peer = 0;
HIP_CHECK(hipSetDevice(src_device));
@@ -140,7 +134,7 @@ void MemPoolSetGetAccess_P2P(const MemPools mempool_type) {
int mem_pool_support = CheckP2PMemPoolSupport(src_device, dst_device);
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
HipTest::HIP_SKIP_TEST("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
@@ -216,7 +210,7 @@ void MemPoolSetGetAccess_P2P(const MemPools mempool_type) {
* - /unit/memory/hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P") {
const auto device_count = HipTest::getDeviceCount();
@@ -246,13 +240,13 @@ TEST_CASE("Unit_hipMemPoolSetGetAccess_Positive_P2P") {
* - /unit/memory/hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAccess_Negative_Parameters") {
CHECK_IMAGE_SUPPORT
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
checkMempoolSupported(device_id)
MemPoolGuard mempool(MemPools::dev_default, device_id);
int num_dev = 0;
@@ -296,6 +290,217 @@ TEST_CASE("Unit_hipMemPoolSetAccess_Negative_Parameters") {
}
}
/**
* Local function to test hipMemPoolSetAccess function.
*/
static bool checkMempoolSetAccess(int N, int dev0, int dev1) {
// Set the current device context to dev0
HIP_CHECK(hipSetDevice(dev0));
// Create mempool in current device
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
int *A_h, *B_h, *C_h;
size_t byte_size = N*sizeof(int);
// assign memory to host pointers
A_h = reinterpret_cast<int*>(malloc(byte_size));
REQUIRE(A_h != nullptr);
B_h = reinterpret_cast<int*>(malloc(byte_size));
REQUIRE(B_h != nullptr);
C_h = reinterpret_cast<int*>(malloc(byte_size));
REQUIRE(C_h != nullptr);
// set data to host
for (int i = 0; i < N; i++) {
A_h[i] = 2*i + 1; // Odd
B_h[i] = 2*i; // Even
C_h[i] = 0;
}
// create multiple streams
hipStream_t stream0;
HIP_CHECK(hipStreamCreate(&stream0));
int *A_d0, *B_d0;
// Allocate memory on dev0 and initialize it on stream0
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d0),
byte_size, mem_pool, stream0));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&B_d0),
byte_size, mem_pool, stream0));
HIP_CHECK(hipMemcpyAsync(A_d0, A_h, byte_size, hipMemcpyHostToDevice,
stream0));
HIP_CHECK(hipMemcpyAsync(B_d0, B_h, byte_size, hipMemcpyHostToDevice,
stream0));
HIP_CHECK(hipStreamSynchronize(stream0));
HIP_CHECK(hipStreamDestroy(stream0));
// Set the current device context to dev1
HIP_CHECK(hipSetDevice(dev1));
// if withSetAccess is true set the access of mem_pool
// to both dev0 and dev1.
hipMemAccessDesc accessDesc;
accessDesc.location.type = hipMemLocationTypeDevice;
accessDesc.location.id = dev1;
accessDesc.flags = hipMemAccessFlagsProtReadWrite;
HIP_CHECK(hipMemPoolSetAccess(mem_pool, &accessDesc, 1));
int *A_d1, *B_d1, *C_d1;
hipStream_t stream1;
HIP_CHECK(hipStreamCreate(&stream1));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d1),
byte_size, mem_pool, stream1));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&B_d1),
byte_size, mem_pool, stream1));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&C_d1),
byte_size, mem_pool, stream1));
HIP_CHECK(hipMemcpyAsync(A_d1, A_d0, byte_size,
hipMemcpyDeviceToDevice, stream1));
HIP_CHECK(hipMemcpyAsync(B_d1, B_d0, byte_size,
hipMemcpyDeviceToDevice, stream1));
// Launch Kernel on stream1
hipLaunchKernelGGL(HipTest::vectorADD, dim3(N / THREADS_PER_BLOCK),
dim3(THREADS_PER_BLOCK), 0, stream1,
static_cast<const int*>(A_d1),
static_cast<const int*>(B_d1), C_d1, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d1, byte_size, hipMemcpyDeviceToHost,
stream1));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d1), stream1));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(B_d1), stream1));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(C_d1), stream1));
HIP_CHECK(hipStreamSynchronize(stream1));
HIP_CHECK(hipStreamDestroy(stream1));
// Set the current device context back to dev0
HIP_CHECK(hipSetDevice(dev0));
HIP_CHECK(hipMemPoolDestroy(mem_pool));
// verify and validate
for (int i = 0; i < N; i++) {
REQUIRE(C_h[i] == (A_h[i] + B_h[i]));
}
free(A_h);
free(B_h);
free(C_h);
return true;
}
/**
* Local function to get pairs of devices.
*/
static void getDevicePairs(std::vector <std::pair <int, int>> *p2p_pairs,
int numDevices) {
for (int i = 0; i < (numDevices - 1); i++) {
for (int j = i + 1; j < numDevices; j++) {
std::pair <int, int> p2p_pair = std::make_pair(i, j);
p2p_pairs->push_back(p2p_pair);
}
}
}
/**
* Test Description
* ------------------------
* - P2P Access Scenario for mempool: Precondition: NUM OF GPUs >= 2
* and P2P is enabled. Create explicit memory pool (mempool) on default GPU.
* Allocate memory on device 0 and initialize it with data. Set current GPU
* to device 1. Set the access of mempool to device 1. Allocate memory on
* device 1 and transfer data from device 0 to device 1. Launch kernel to
* perform vector add on the data. Validate the data. Destroy the mempool.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAccess_SetAccess") {
constexpr int N = 1 << 14;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
checkIfMultiDev(numDevices)
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
}
std::vector <std::pair <int, int>> p2p_pairs;
getDevicePairs(&p2p_pairs, numDevices);
for (auto pair : p2p_pairs) {
int canAccessPeer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer,
pair.first, pair.second));
if (canAccessPeer) {
REQUIRE(true == checkMempoolSetAccess(N, pair.first,
pair.second));
} else {
WARN("P2P access not enabled between " << pair.first <<
" and " << pair.second << " .");
}
}
}
/**
* Test Description
* ------------------------
* - hipMemPoolSetAccess negative tests
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAccess_NegTst") {
checkMempoolSupported(0)
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
constexpr size_t count = 1;
hipMemAccessDesc descList, descListNeg;
descList.flags = hipMemAccessFlagsProtReadWrite;
descList.location.type = hipMemLocationTypeDevice;
descList.location.id = 0;
// Scenario1
SECTION("memPool NULL check") {
REQUIRE(hipMemPoolSetAccess(nullptr, &descList, count) ==
hipErrorInvalidValue);
}
// Scenario2
SECTION("Invalid Flag") {
descListNeg.flags = static_cast<hipMemAccessFlags>(0xffff);
descListNeg.location.type = hipMemLocationTypeDevice;
descListNeg.location.id = 0;
REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) ==
hipErrorInvalidValue);
}
// Scenario3
#if HT_AMD
SECTION("Invalid location type") {
descListNeg.flags = hipMemAccessFlagsProtReadWrite;
descListNeg.location.type = hipMemLocationTypeInvalid;
descListNeg.location.id = 0;
REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) ==
hipErrorInvalidValue);
}
#endif
// Scenario4
SECTION("Invalid device number") {
descListNeg.flags = hipMemAccessFlagsProtReadWrite;
descListNeg.location.type = hipMemLocationTypeDevice;
descListNeg.location.id = -1;
REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) ==
hipErrorInvalidDevice);
}
// Scenario5
SECTION("Unavailable device number") {
int num_devices = 0;
HIP_CHECK(hipGetDeviceCount(&num_devices));
descListNeg.flags = hipMemAccessFlagsProtReadWrite;
descListNeg.location.type = hipMemLocationTypeDevice;
descListNeg.location.id = num_devices;
REQUIRE(hipMemPoolSetAccess(mem_pool, &descListNeg, count) ==
hipErrorInvalidDevice);
}
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
/**
* End doxygen group hipMemPoolSetAccess.
* @}
@@ -323,12 +528,12 @@ TEST_CASE("Unit_hipMemPoolSetAccess_Negative_Parameters") {
* - /unit/memory/hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAccess_Negative_Parameters") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
checkMempoolSupported(device_id)
MemPoolGuard mempool(MemPools::dev_default, device_id);
int num_dev = 0;
@@ -360,3 +565,129 @@ TEST_CASE("Unit_hipMemPoolGetAccess_Negative_Parameters") {
location.id = device_id;
}
}
/**
* Local function to test hipMemPoolSetAccess/hipMemPoolGetAccess
* function.
*/
static bool checkMempoolSetAccessWithGet(int dev0, int dev1) {
// Create mempool in current device
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
// Set access to dev1
hipMemAccessDesc accessDesc;
accessDesc.location.type = hipMemLocationTypeDevice;
accessDesc.location.id = dev1;
accessDesc.flags = hipMemAccessFlagsProtReadWrite;
HIP_CHECK(hipMemPoolSetAccess(mem_pool, &accessDesc, 1));
// Validate access for dev1
hipMemAccessFlags flags;
hipMemLocation location;
location.type = hipMemLocationTypeDevice;
location.id = dev1;
HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location));
REQUIRE(flags == hipMemAccessFlagsProtReadWrite);
// Validate access for dev0
location.id = dev0;
HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location));
REQUIRE(flags == hipMemAccessFlagsProtReadWrite);
HIP_CHECK(hipMemPoolDestroy(mem_pool));
return true;
}
static bool checkMempoolSetAccessWithGetUsingArray(int dev0, int dev1) {
// Create mempool in current device
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
// Set access of dev0 and dev1
hipMemAccessDesc accessDesc[2];
accessDesc[0].location.type = hipMemLocationTypeDevice;
accessDesc[0].location.id = dev0;
accessDesc[0].flags = hipMemAccessFlagsProtReadWrite;
accessDesc[1].location.type = hipMemLocationTypeDevice;
accessDesc[1].location.id = dev1;
accessDesc[1].flags = hipMemAccessFlagsProtReadWrite;
HIP_CHECK(hipMemPoolSetAccess(mem_pool, accessDesc, 2));
// Validate access for dev0 and dev1
hipMemAccessFlags flags;
hipMemLocation location;
location.type = hipMemLocationTypeDevice;
location.id = dev0;
HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location));
REQUIRE(flags == hipMemAccessFlagsProtReadWrite);
location.id = dev1;
HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location));
REQUIRE(flags == hipMemAccessFlagsProtReadWrite);
return true;
}
/**
* Test Description
* ------------------------
* - Validate hipMemPoolSetAccess with hipMemPoolGetAccess for all
* devices on the system.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAccess_SetGet") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
checkIfMultiDev(numDevices)
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
}
std::vector <std::pair <int, int>> p2p_pairs;
getDevicePairs(&p2p_pairs, numDevices);
for (auto pair : p2p_pairs) {
int canAccessPeer = 0;
HIP_CHECK(hipDeviceCanAccessPeer(&canAccessPeer,
pair.first, pair.second));
if (canAccessPeer) {
REQUIRE(true == checkMempoolSetAccessWithGet(pair.first,
pair.second));
REQUIRE(true == checkMempoolSetAccessWithGetUsingArray(pair.first,
pair.second));
} else {
WARN("P2P access not enabled between " << pair.first <<
" and " << pair.second << " .");
}
}
}
/**
* Test Description
* ------------------------
* - Get the access of the default mempool of each device and verify
* its value.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAccess.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAccess_GetDefMempoolOfEachDevice") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
hipMemAccessFlags flags;
hipMemLocation location;
hipMemPool_t mem_pool;
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool, dev));
location.id = dev;
location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolGetAccess(&flags, mem_pool, &location));
REQUIRE(flags == hipMemAccessFlagsProtReadWrite);
}
}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -17,7 +17,6 @@
THE SOFTWARE.
*/
#include "mempool_common.hh"
#include <resource_guards.hh>
#include <utils.hh>
@@ -48,18 +47,12 @@ static void MemPoolSetGetAttribute(const hipMemPool_t mempool, const hipMemPoolA
* - /unit/memory/hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_Default") {
const auto device = GENERATE(range(0, HipTest::getDeviceCount()));
int mem_pool_support = 0;
HIP_CHECK(
hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, device));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device)
const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created);
MemPoolGuard mempool(mempool_type, device);
@@ -87,18 +80,12 @@ TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_Default") {
* - /unit/memory/hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_MemBasic") {
const auto device = GENERATE(range(0, HipTest::getDeviceCount()));
int mem_pool_support = 0;
HIP_CHECK(
hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, device));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device)
const auto mempool_type = GENERATE(MemPools::dev_default, MemPools::created);
MemPoolGuard mempool(mempool_type, device);
@@ -128,18 +115,13 @@ TEST_CASE("Unit_hipMemPoolSetGetAttribute_Positive_MemBasic") {
* - /unit/memory/hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAttribute_Opportunistic") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device_id)
MemPoolGuard mempool(MemPools::created, device_id);
@@ -349,18 +331,13 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_Opportunistic") {
* - /unit/memory/hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAttribute_EventDependencies") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device_id)
MemPoolGuard mempool(MemPools::created, device_id);
@@ -494,12 +471,12 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_EventDependencies") {
* - /unit/memory/hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAttribute_Negative_Parameters") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
checkMempoolSupported(device_id)
MemPoolGuard mempool(MemPools::dev_default, device_id);
hipMemPoolAttr attr = hipMemPoolReuseFollowEventDependencies;
@@ -536,6 +513,62 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_Negative_Parameters") {
}
}
/**
* Local function to reset hipMemPoolAttrReservedMemHigh and hipMemPoolAttrUsedMemHigh.
*/
static void resetHighValue(hipMemPool_t &memPool) {
uint64_t value = 0;
HIP_CHECK(hipMemPoolSetAttribute(memPool, hipMemPoolAttrReservedMemHigh,
&value));
HIP_CHECK(hipMemPoolSetAttribute(memPool, hipMemPoolAttrUsedMemHigh,
&value));
}
/**
* Test Description
* ------------------------
* - Reset hipMemPoolAttrReservedMemHigh and hipMemPoolAttrUsedMemHigh values
* and validate their values.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolSetAttribute_ResetMemHighAttr") {
checkMempoolSupported(0)
// Create mempool
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
constexpr int N = 1 << 14;
size_t byte_size = (N * sizeof(int));
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
// Reset High Attributes
resetHighValue(mem_pool);
// Allocate from mempool
int *A_d;
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d),
byte_size, mem_pool, 0));
// Deallocate
HIP_CHECK(hipFreeAsync(A_d, 0));
HIP_CHECK(hipStreamSynchronize(0));
// Reset High Attributes
resetHighValue(mem_pool);
// Validate usage statistics
uint64_t valueReservedHighAfterReset = 0, valueUsedHighAfterReset = 0;
HIP_CHECK(hipMemPoolGetAttribute(mem_pool, hipMemPoolAttrReservedMemHigh,
&valueReservedHighAfterReset));
HIP_CHECK(hipMemPoolGetAttribute(mem_pool, hipMemPoolAttrUsedMemHigh,
&valueUsedHighAfterReset));
REQUIRE(valueReservedHighAfterReset == 0);
REQUIRE(valueUsedHighAfterReset == 0);
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
/**
* End doxygen group hipMemPoolSetAttribute.
* @}
@@ -562,12 +595,12 @@ TEST_CASE("Unit_hipMemPoolSetAttribute_Negative_Parameters") {
* - /unit/memory/hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_Negative_Parameters") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
checkMempoolSupported(device_id)
MemPoolGuard mempool(MemPools::dev_default, device_id);
@@ -588,3 +621,392 @@ TEST_CASE("Unit_hipMemPoolGetAttribute_Negative_Parameters") {
HIP_CHECK_ERROR(hipMemPoolGetAttribute(mempool.mempool(), attr, nullptr), hipErrorInvalidValue);
}
}
constexpr int iterations = 20;
static int reservedHighExp = 0;
static int usedHighExp = 0;
struct mempoolUsgStat {
uint64_t reservedMem;
uint64_t reservedMemHigh;
uint64_t usedMem;
uint64_t usedMemHigh;
};
/**
* Local function to fetch usage statistics.
*/
static void getUsageStatistics(hipMemPool_t &memPool, struct mempoolUsgStat *stat) {
HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrReservedMemCurrent,
&(stat->reservedMem)));
HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrReservedMemHigh,
&(stat->reservedMemHigh)));
HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrUsedMemCurrent,
&(stat->usedMem)));
HIP_CHECK(hipMemPoolGetAttribute(memPool, hipMemPoolAttrUsedMemHigh,
&(stat->usedMemHigh)));
}
/**
* Local function to get default mempool attribute values.
*/
static bool checkDefaultAttributeValues(hipMemPoolAttr attr, int dev) {
// Create mempool in current device
uint64_t ui64_setValue = 0;
int i32_setValue = 0;
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
if (attr == hipMemPoolAttrReleaseThreshold) {
HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &ui64_setValue));
REQUIRE(ui64_setValue == 0);
} else {
HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &i32_setValue));
REQUIRE(i32_setValue == 1);
}
HIP_CHECK(hipMemPoolDestroy(mem_pool));
return true;
}
/**
* Local function to set mempool attribute values and validate
* by getting the values.
*/
static bool checkhipMemPoolSetAttribute(hipMemPoolAttr attr, int dev) {
// Create mempool in current device
uint64_t ui64_setValue = 0;
int i32_setValue = 0;
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
if (attr == hipMemPoolAttrReleaseThreshold) {
uint64_t val = UINT64_MAX;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &val));
HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &ui64_setValue));
REQUIRE(ui64_setValue == val);
} else {
int val = 0;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &val));
HIP_CHECK(hipMemPoolGetAttribute(mem_pool, attr, &i32_setValue));
REQUIRE(i32_setValue == val);
}
HIP_CHECK(hipMemPoolDestroy(mem_pool));
return true;
}
/**
* Test Description
* ------------------------
* - Validate hipMemPoolGetAttribute() by setting hipMemPoolSetAttribute().
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_SetGet") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
REQUIRE(true == checkhipMemPoolSetAttribute(
hipMemPoolAttrReleaseThreshold, dev));
REQUIRE(true == checkhipMemPoolSetAttribute(
hipMemPoolReuseFollowEventDependencies, dev));
REQUIRE(true == checkhipMemPoolSetAttribute(
hipMemPoolReuseAllowOpportunistic, dev));
REQUIRE(true == checkhipMemPoolSetAttribute(
hipMemPoolReuseAllowInternalDependencies, dev));
}
}
/**
* Test Description
* ------------------------
* - Validate hipMemPoolAttrUsedMemCurrent value.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_UsedMem") {
checkMempoolSupported(0)
constexpr int N = 1 << 14;
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
size_t byte_size = (N * sizeof(int));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
// Get hipMemPoolAttrUsedMemCurrent value for mem_pool when no memory
// is allocated from this pool.
SECTION("Check created mempool") {
uint64_t val = 0;
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrUsedMemCurrent, &val));
REQUIRE(val == 0);
int *A_d;
// Allocate memory on dev0 from mem_pool.
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d),
byte_size, mem_pool, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate
// its value.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrUsedMemCurrent, &val));
REQUIRE(val == byte_size);
// Free memory back to memory pool.
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Again get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate
// its value.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrUsedMemCurrent, &val));
REQUIRE(val == 0);
}
SECTION("Check default mempool") {
hipMemPool_t mem_pool_default = nullptr;
// assign default mem pool to device
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_default, 0));
uint64_t valInitital = 0, valPostAlloc = 0, valPostFree = 0;
HIP_CHECK(hipMemPoolGetAttribute(mem_pool_default,
hipMemPoolAttrUsedMemCurrent, &valInitital));
int *A_d;
// Allocate memory on dev0 from mem_pool.
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&A_d),
byte_size, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate
// its value.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool_default,
hipMemPoolAttrUsedMemCurrent, &valPostAlloc));
uint64_t expVal = byte_size;
expVal = expVal + valInitital;
REQUIRE(valPostAlloc == expVal);
// Free memory back to memory pool.
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Again get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate
// its value.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool_default,
hipMemPoolAttrUsedMemCurrent, &valPostFree));
REQUIRE(valPostFree == valInitital);
}
SECTION("Default memory pool allocation") {
uint64_t val = 0;
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrUsedMemCurrent, &val));
REQUIRE(val == 0);
int *A_d;
// Allocate memory on dev0 from mem_pool.
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&A_d),
byte_size, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Get hipMemPoolAttrUsedMemCurrent value for mem_pool and validate
// its value.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrUsedMemCurrent, &val));
REQUIRE(val == 0);
// Free memory back to memory pool.
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), stream));
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
/**
* Test Description
* ------------------------
* - Validate hipMemPoolAttrReservedMemCurrent value.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_ReservedMem") {
checkMempoolSupported(0)
constexpr int N = 1 << 14;
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
uint64_t val = 0;
// Verify that at the beginning mempool contains at least
// 0 memory reserved.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrReservedMemCurrent, &val));
REQUIRE(val >= 0);
size_t byte_size = (N * sizeof(int));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
int *A_d;
// Allocate memory on dev0 from mem_pool.
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d),
byte_size, mem_pool, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrReservedMemCurrent, &val));
REQUIRE(val >= byte_size);
// Free memory back to memory pool.
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Again get hipMemPoolAttrReservedMemCurrent value for mem_pool and validate
// its value.
HIP_CHECK(hipMemPoolGetAttribute(mem_pool,
hipMemPoolAttrReservedMemCurrent, &val));
REQUIRE(val >= 0);
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
/**
* Test Description
* ------------------------
* - Validate hipMemPoolAttrReservedMemHigh and hipMemPoolAttrUsedMemHigh value.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_UsageStatistics") {
checkMempoolSupported(0)
struct mempoolUsgStat stats;
// Create mempool
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
constexpr int N = 1 << 14;
size_t byte_size = (N * sizeof(int));
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
// Reset and Take Usage Statistics
resetHighValue(mem_pool);
getUsageStatistics(mem_pool, &stats);
// Validate usage statistics
REQUIRE(stats.reservedMem == stats.reservedMemHigh);
REQUIRE(stats.usedMem == 0);
REQUIRE(stats.usedMemHigh == 0);
// Allocate from mempool
int *A_d[iterations];
for (int i = 0; i < iterations; i++) {
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d[i]),
byte_size, mem_pool, 0));
}
HIP_CHECK(hipStreamSynchronize(0));
// Take Usage Statistics
getUsageStatistics(mem_pool, &stats);
// Validate usage statistics
REQUIRE(stats.reservedMem == stats.reservedMemHigh);
REQUIRE(stats.usedMem == (iterations*byte_size));
REQUIRE(stats.usedMemHigh == (iterations*byte_size));
reservedHighExp = stats.reservedMemHigh;
usedHighExp = (iterations*byte_size);
// Deallocate half of the allocations
for (int i = 0; i < iterations/2; i++) {
HIP_CHECK(hipFreeAsync(A_d[i], 0));
}
HIP_CHECK(hipStreamSynchronize(0));
// Take Usage Statistics
getUsageStatistics(mem_pool, &stats);
// Validate usage statistics
REQUIRE(stats.reservedMemHigh == reservedHighExp);
REQUIRE(stats.usedMem == (iterations*byte_size - (iterations/2)*byte_size));
REQUIRE(stats.usedMemHigh == usedHighExp);
// Deallocate remaining allocations
for (int i = (iterations/2); i < iterations; i++) {
HIP_CHECK(hipFreeAsync(A_d[i], 0));
}
HIP_CHECK(hipStreamSynchronize(0));
// Take Usage Statistics
getUsageStatistics(mem_pool, &stats);
// Validate usage statistics
REQUIRE(stats.reservedMemHigh == reservedHighExp);
REQUIRE(stats.usedMem == 0);
REQUIRE(stats.usedMemHigh == usedHighExp);
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
/**
* Test Description
* ------------------------
* - Validate hipMalloc does not affect default mempool
* statistics.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_hipMalloc_DefMempool") {
checkMempoolSupported(0)
struct mempoolUsgStat stats;
// Create mempool
hipMemPool_t mem_pool;
constexpr int N = 1 << 14;
size_t byte_size = (N * sizeof(int));
// Get default mempool
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool, 0));
// Reset and Take Usage Statistics
resetHighValue(mem_pool);
getUsageStatistics(mem_pool, &stats);
uint64_t reservedMemStart, reservedMemHighStart, usedMemStart,
usedMemHighStart;
reservedMemStart = stats.reservedMem;
reservedMemHighStart = stats.reservedMemHigh;
usedMemStart = stats.usedMem;
usedMemHighStart = stats.usedMemHigh;
// Allocate using hipMalloc
int *Ad;
HIP_CHECK(hipMalloc(&Ad, byte_size));
getUsageStatistics(mem_pool, &stats);
REQUIRE(reservedMemStart == stats.reservedMem);
REQUIRE(reservedMemHighStart == stats.reservedMemHigh);
REQUIRE(usedMemStart == stats.usedMem);
REQUIRE(usedMemHighStart == stats.usedMemHigh);
HIP_CHECK(hipFree(Ad));
}
/**
* Test Description
* ------------------------
* - Validate default attribute values.
* ------------------------
* - catch\unit\memory\hipMemPoolSetGetAttribute.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolGetAttribute_CheckDefaultValues") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
REQUIRE(true == checkDefaultAttributeValues(
hipMemPoolAttrReleaseThreshold, dev));
REQUIRE(true == checkDefaultAttributeValues(
hipMemPoolReuseFollowEventDependencies, dev));
REQUIRE(true == checkDefaultAttributeValues(
hipMemPoolReuseAllowOpportunistic, dev));
REQUIRE(true == checkDefaultAttributeValues(
hipMemPoolReuseAllowInternalDependencies, dev));
}
}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -43,19 +43,12 @@
* - /unit/memory/hipMemPoolTrimTo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolTrimTo_Negative_Parameter") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device_id)
size_t trim_size = 1024;
SECTION("Passing nullptr to mem_pool") {
@@ -73,18 +66,12 @@ TEST_CASE("Unit_hipMemPoolTrimTo_Negative_Parameter") {
* - /unit/memory/hipMemPoolTrimTo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.0
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolTrimTo_Positive_Basic") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
int mem_pool_support = 0;
HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0));
if (!mem_pool_support) {
SUCCEED("Runtime doesn't support Memory Pool. Skip the test case.");
return;
}
checkMempoolSupported(device_id)
const size_t allocation_size1 = kPageSize * kPageSize * 2;
const size_t allocation_size2 = kPageSize / 2;
@@ -164,6 +151,147 @@ TEST_CASE("Unit_hipMemPoolTrimTo_Positive_Basic") {
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(alloc_mem2), stream.stream()));
}
static bool thread_results[NUMBER_OF_THREADS];
/**
* Local function to test hipMemPoolAttrReleaseThreshold.
*/
static bool checkhipMemPoolTrimTo(hipStream_t stream, int N,
int dev = 0) {
streamMemAllocTest testObj(N);
size_t byte_size = N*sizeof(int);
// assign memory to host pointers
testObj.createHostBufferWithData();
// Create mempool in current device
hipMemPool_t mem_pool;
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
uint64_t setThreshold = UINT64_MAX;
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, hipMemPoolAttrReleaseThreshold,
&setThreshold));
testObj.useCommonMempool(mem_pool);
for (int iter = 1; iter <= LAUNCH_ITERATIONS; iter++) {
// Set different min_bytes_to_hold for each iteration
size_t min_bytes_to_hold =
(byte_size * 3 * (LAUNCH_ITERATIONS - iter))/LAUNCH_ITERATIONS;
HIP_CHECK(hipMemPoolTrimTo(mem_pool, min_bytes_to_hold));
// assign memory to device pointers
testObj.allocFromMempool(stream);
testObj.transferToMempool(stream);
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
testObj.freeDevBuf(stream);
// verify and validate
REQUIRE(true == testObj.validateResult());
HIP_CHECK(hipStreamSynchronize(stream));
}
HIP_CHECK(hipMemPoolDestroy(mem_pool));
return true;
}
/**
* Test Description
* ------------------------
* - Create explicit mempool1 on default GPU and set attribute
* hipMemPoolAttrReleaseThreshold to UINT64_MAX.
* LOOP for 10 times: {Trim the memory pool in each iteration, then
* Allocate A_d1, B_d1, C_d1 from pool1, memcpy data to (A_d1, B_d1).
* Launch kernel to perform C_d1(x)=A_d1(x)+B_d1(x), verify
* result and free the memory.} After loop free the pool.
* ------------------------
* - catch\unit\memory\hipMemPoolTrimTo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolTrimTo_VaryingMinBytesToHold") {
checkMempoolSupported(0)
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
constexpr int N = 1 << 20;
REQUIRE(true == checkhipMemPoolTrimTo(stream, N));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - MultiGPU scenario: Execute the above scenario in each device.
* ------------------------
* - catch\unit\memory\hipMemPoolTrimTo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolTrimTo_MGpuVaryingMinBytesToHold") {
constexpr int N = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
if (numDevices < 2) {
WARN("Number of GPUs insufficient for test");
} else {
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev)
HIP_CHECK(hipSetDevice(dev));
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
REQUIRE(true == checkhipMemPoolTrimTo(stream, N, dev));
HIP_CHECK(hipStreamDestroy(stream));
}
}
}
/**
* Local Thread Functions
*/
static void thread_Test(hipStream_t stream, int N, int threadNum) {
thread_results[threadNum] =
checkhipMemPoolTrimTo(stream, N, false);
}
/**
* Test Description
* ------------------------
* - Multithread scenario: Execute the above scenario in each thread.
* ------------------------
* - catch\unit\memory\hipMemPoolTrimTo.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMemPoolTrimTo_Multithreaded") {
checkMempoolSupported(0)
// create a stream
constexpr int N = 1 << 20;
std::vector<std::thread> tests;
hipStream_t stream[NUMBER_OF_THREADS];
// Initialize and create streams
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
HIP_CHECK(hipStreamCreate(&stream[idx]));
}
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
tests.push_back(std::thread(thread_Test, stream[idx],
N, idx));
}
// Wait for all threads to complete
for (std::thread &t : tests) {
t.join();
}
// Wait for thread and destroy stream
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
HIP_CHECK(hipStreamDestroy(stream[idx]));
}
}
/**
* End doxygen group StreamOTest.
* @}
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
@@ -19,6 +19,7 @@
#pragma once
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <resource_guards.hh>
#include <utils.hh>
@@ -26,6 +27,37 @@ namespace {
constexpr auto wait_ms = 500;
} // anonymous namespace
/**
* Local Function to test if Hip Stream Ordered Memory allocator
* functionality is supoorted.
*/
static bool isStrmOrdMemAllocSupported(int dev) {
int deviceSupportsMemoryPools = 0;
bool supported = false;
HIP_CHECK(hipDeviceGetAttribute(&deviceSupportsMemoryPools,
hipDeviceAttributeMemoryPoolsSupported, dev));
if (deviceSupportsMemoryPools != 0) {
supported = true;
} else {
supported = false;
}
return supported;
}
#define checkMempoolSupported(device) {\
if (false == isStrmOrdMemAllocSupported(device)) {\
HipTest::HIP_SKIP_TEST("Memory Pool not supported. Skipping Test..");\
return;\
}\
}
#define checkIfMultiDev(numOfDev) {\
if (numOfDev < 2) {\
HipTest::HIP_SKIP_TEST("Multiple GPUs not available. Skipping Test..");\
return;\
}\
}
template <typename T> __global__ void kernel_500ms(T* host_res, int clk_rate) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
host_res[tid] = tid + 1;
@@ -286,3 +318,135 @@ template <typename F> void MallocMemPoolAsync_Reuse(F malloc_func, const MemPool
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(alloc_mem3), stream.stream()));
}
// definitions
#define THREADS_PER_BLOCK 512
#define LAUNCH_ITERATIONS 5
#define NUMBER_OF_THREADS 5
#define NUM_OF_STREAM 3
enum eTestValue {
testdefault,
testMaximum,
testDisabled,
testEnabled
};
class streamMemAllocTest {
int *A_h, *B_h, *C_h;
int *A_d, *B_d, *C_d;
int size;
size_t byte_size;
hipMemPool_t mem_pool;
public:
explicit streamMemAllocTest(int N) : size(N) {
byte_size = N*sizeof(int);
}
// Create host buffers and initialize them with input data
void createHostBufferWithData() {
A_h = reinterpret_cast<int*>(malloc(byte_size));
REQUIRE(A_h != nullptr);
B_h = reinterpret_cast<int*>(malloc(byte_size));
REQUIRE(B_h != nullptr);
C_h = reinterpret_cast<int*>(malloc(byte_size));
REQUIRE(C_h != nullptr);
// set data to host
for (int i = 0; i < size; i++) {
A_h[i] = 2*i + 1; // Odd
B_h[i] = 2*i; // Even
C_h[i] = 0;
}
}
// Instead of creating a mempool in class use the global mempool.
void useCommonMempool(hipMemPool_t mempool) {
mem_pool = mempool;
}
// Create the mempool
void createMempool(hipMemPoolAttr attr, enum eTestValue testtype,
int dev) {
// Create mempool in current device
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = dev;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool, &pool_props));
if (attr == hipMemPoolAttrReleaseThreshold) {
uint64_t setThreshold = 0;
if (testtype == testMaximum) {
setThreshold = UINT64_MAX;
}
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &setThreshold));
} else if ((attr == hipMemPoolReuseFollowEventDependencies) ||
(attr == hipMemPoolReuseAllowOpportunistic) ||
(attr == hipMemPoolReuseAllowInternalDependencies)) {
int value = 0;
if (testtype == testEnabled) {
value = 1;
}
HIP_CHECK(hipMemPoolSetAttribute(mem_pool, attr, &value));
}
}
// allocate device memory from mempool.
void allocFromMempool(hipStream_t stream) {
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&A_d),
byte_size, mem_pool, stream));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&B_d),
byte_size, mem_pool, stream));
HIP_CHECK(hipMallocFromPoolAsync(reinterpret_cast<void**>(&C_d),
byte_size, mem_pool, stream));
}
// Transfer data from host to device asynchronously.
void transferToMempool(hipStream_t stream) {
HIP_CHECK(hipMemcpyAsync(A_d, A_h, byte_size, hipMemcpyHostToDevice,
stream));
HIP_CHECK(hipMemcpyAsync(B_d, B_h, byte_size, hipMemcpyHostToDevice,
stream));
}
// allocate from default mempool.
void allocFromDefMempool(hipStream_t stream) {
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&A_d),
byte_size, stream));
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&B_d),
byte_size, stream));
HIP_CHECK(hipMallocAsync(reinterpret_cast<void**>(&C_d),
byte_size, stream));
}
// Execute Kernel to process input data and wait for it.
void runKernel(hipStream_t stream) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(size / THREADS_PER_BLOCK),
dim3(THREADS_PER_BLOCK), 0, stream,
static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, size);
}
// Transfer data from device to host asynchronously.
void transferFromMempool(hipStream_t stream) {
HIP_CHECK(hipMemcpyAsync(C_h, C_d, byte_size, hipMemcpyDeviceToHost,
stream));
}
// Validate the data returned from device.
bool validateResult() {
for (int i = 0; i < size; i++) {
if (C_h[i] != (A_h[i] + B_h[i])) {
return false;
}
}
return true;
}
// Free device memory
void freeDevBuf(hipStream_t stream) {
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(A_d), stream));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(B_d), stream));
HIP_CHECK(hipFreeAsync(reinterpret_cast<void*>(C_d), stream));
}
// Free mempool if not using global mempool
void freeMempool() {
HIP_CHECK(hipMemPoolDestroy(mem_pool));
}
// Free all host buffers
void freeHostBuf() {
free(A_h);
free(B_h);
free(C_h);
}
};