Files
rocm-systems/projects/hip-tests/catch/unit/memory/hipMallocFromPoolAsync.cc
T

837 خطوط
30 KiB
C++

/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "mempool_common.hh"
#include <limits>
static bool thread_results[NUMBER_OF_THREADS];
static constexpr int streamPerAsic = 2;
static hipMemPool_t mem_pool_common;
/**
* @addtogroup hipMallocFromPoolAsync hipMallocFromPoolAsync
* @{
* @ingroup StreamOTest
* `hipMallocFromPoolAsync(void** dev_ptr, size_t size, hipMemPool_t mem_pool, hipStream_t stream)`
* - Allocates memory from a specified pool with stream ordered semantics
*/
/**
* Test Description
* ------------------------
* - Basic test to verify proper allocation and stream ordering of hipMallocFromPoolAsync when one
* memory allocation is performed.
* Test source
* ------------------------
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_OneAlloc") {
MallocMemPoolAsync_OneAlloc(
[](void** dev_ptr, size_t size, hipMemPool_t mem_pool, hipStream_t stream) {
return hipMallocFromPoolAsync(dev_ptr, size, mem_pool, stream);
},
MemPools::created);
}
/**
* Test Description
* ------------------------
* - Basic test to verify proper allocation and stream ordering of hipMallocFromPoolAsync when two
* memory allocations are performed.
* Test source
* ------------------------
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_TwoAllocs") {
MallocMemPoolAsync_TwoAllocs(
[](void** dev_ptr, size_t size, hipMemPool_t mem_pool, hipStream_t stream) {
return hipMallocFromPoolAsync(dev_ptr, size, mem_pool, stream);
},
MemPools::created);
}
/**
* Test Description
* ------------------------
* - Basic test to verify that memory allocated with hipMallocFromPoolAsync can be properly reused.
* Test source
* ------------------------
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Basic_Reuse") {
MallocMemPoolAsync_Reuse(
[](void** dev_ptr, size_t size, hipMemPool_t mem_pool, hipStream_t stream) {
return hipMallocFromPoolAsync(dev_ptr, size, mem_pool, stream);
},
MemPools::created);
}
/**
* Test Description
* ------------------------
* - Test to verify hipMallocFromPoolAsync behavior with invalid arguments:
* -# Nullptr dev_ptr
* -# Nullptr mem_pool
* -# Invalid stream handle
* -# Size is max size_t
*
* Test source
* ------------------------
* - /unit/memory/hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Negative_Parameters") {
int device_id = 0;
HIP_CHECK(hipSetDevice(device_id));
checkMempoolSupported(0);
void* p = nullptr;
size_t max_size = std::numeric_limits<size_t>::max();
size_t alloc_size = 1024;
MemPoolGuard mempool(MemPools::created, device_id);
StreamGuard stream(Streams::created);
SECTION("dev_ptr is nullptr") {
HIP_CHECK_ERROR(hipMallocFromPoolAsync(nullptr, alloc_size, mempool.mempool(), stream.stream()),
hipErrorInvalidValue);
}
SECTION("Mempool not created") {
hipMemPool_t dummy_mem_pool = nullptr;
HIP_CHECK_ERROR(hipMallocFromPoolAsync(static_cast<void**>(&p), alloc_size, dummy_mem_pool,
stream.stream()),
hipErrorInvalidValue);
}
SECTION("Size is max size_t") {
HIP_CHECK_ERROR(hipMallocFromPoolAsync(static_cast<void**>(&p), max_size, mempool.mempool(),
stream.stream()),
hipErrorOutOfMemory);
}
}
/**
* Local function to test mempool allocation, usage and freeing on
* multiple user created Streams with inter Stream synchonization.
*/
static bool checkMempoolMultStreamSync(int N) {
streamMemAllocTest testObj(N);
// create multiple streams
hipStream_t streamMemCreate, streamMemAccess, streamMemDestroy;
HIP_CHECK(hipStreamCreate(&streamMemCreate));
HIP_CHECK(hipStreamCreate(&streamMemAccess));
HIP_CHECK(hipStreamCreate(&streamMemDestroy));
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = 0
testObj.createMempool(hipMemPoolAttrReleaseThreshold, testdefault, 0);
hipEvent_t Event1, Event2;
HIP_CHECK(hipEventCreate(&Event1));
HIP_CHECK(hipEventCreate(&Event2));
// Allocate memory and initialize it on streamMemCreate
testObj.allocFromMempool(streamMemCreate);
testObj.transferToMempool(streamMemCreate);
HIP_CHECK(hipEventRecord(Event1, streamMemCreate));
// Launch Kernel on streamMemAccess
HIP_CHECK(hipStreamWaitEvent(streamMemAccess, Event1, 0));
testObj.runKernel(streamMemAccess);
testObj.transferFromMempool(streamMemAccess);
HIP_CHECK(hipEventRecord(Event2, streamMemAccess));
// Launch Kernel on streamMemAccess
HIP_CHECK(hipStreamWaitEvent(streamMemDestroy, Event2, 0));
testObj.freeDevBuf(streamMemDestroy);
HIP_CHECK(hipStreamSynchronize(streamMemDestroy));
// Validate test result and clean all host buffers and mempool
bool results = false;
results = testObj.validateResult();
testObj.freeMempool();
testObj.freeHostBuf();
HIP_CHECK(hipEventDestroy(Event2));
HIP_CHECK(hipEventDestroy(Event1));
HIP_CHECK(hipStreamDestroy(streamMemDestroy));
HIP_CHECK(hipStreamDestroy(streamMemAccess));
HIP_CHECK(hipStreamDestroy(streamMemCreate));
return results;
}
/**
* Local function to test mempool functionality on a user created
* stream, null stream and hipStreamPerThread concurrently. Wait
* for all the streams to complete and validate result.
*/
static bool checkMempoolMultStreamConcurrentExec(int N, bool useDefStrm = true) {
streamMemAllocTest testObj[3] = {streamMemAllocTest(N), streamMemAllocTest(N),
streamMemAllocTest(N)};
// create multiple streams
hipStream_t testStreams[3];
HIP_CHECK(hipStreamCreate(&testStreams[0]));
if (useDefStrm) {
testStreams[1] = 0; // null stream
testStreams[2] = hipStreamPerThread;
} else {
HIP_CHECK(hipStreamCreate(&testStreams[1]));
HIP_CHECK(hipStreamCreate(&testStreams[2]));
}
// Create common mempool
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool_common, &pool_props));
bool results = true;
for (int idx = 0; idx < 3; idx++) {
// Create mempool in current device = 0
testObj[idx].useCommonMempool(mem_pool_common);
// Create host buffer with test data
testObj[idx].createHostBufferWithData();
// Allocate memory and initialize it on testStreams[idx]
testObj[idx].allocFromMempool(testStreams[idx]);
testObj[idx].transferToMempool(testStreams[idx]);
// Launch Kernel on testStreams[idx]
testObj[idx].runKernel(testStreams[idx]);
testObj[idx].transferFromMempool(testStreams[idx]);
testObj[idx].freeDevBuf(testStreams[idx]);
}
for (int idx = 0; idx < 3; idx++) {
HIP_CHECK(hipStreamSynchronize(testStreams[idx]));
// Validate test result and clean all host buffers and mempool
results &= testObj[idx].validateResult();
testObj[idx].freeHostBuf();
}
HIP_CHECK(hipStreamDestroy(testStreams[0]));
if (!useDefStrm) {
HIP_CHECK(hipStreamDestroy(testStreams[1]));
HIP_CHECK(hipStreamDestroy(testStreams[2]));
}
// Destroy common mempool
HIP_CHECK(hipMemPoolDestroy(mem_pool_common));
return results;
}
/**
* Local function to test hipMemPoolAttrReleaseThreshold.
*/
static bool checkMaximumAndDefaultThreshold(hipStream_t stream, int N, enum eTestValue testtype,
int dev = 0) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = dev
testObj.createMempool(hipMemPoolAttrReleaseThreshold, testtype, dev);
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
// Allocate memory and initialize it on stream
testObj.allocFromMempool(stream);
testObj.transferToMempool(stream);
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
// validate
testObj.freeDevBuf(stream);
HIP_CHECK(hipStreamSynchronize(stream));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeMempool();
testObj.freeHostBuf();
return results;
}
/**
* Test Description
* ------------------------
* - Create explicit mempool1 on default GPU and set attribute
* hipMemPoolAttrReleaseThreshold to UINT64_MAX. Create another explicit
* mempool2 on default GPU with default attribute.
* LOOP for 10 times: {Allocate A_d1, B_d1, C_d1 from pool1, memcpy data to
* (A_d1, B_d1). Launch kernel to perform C_d1(x)=A_d1(x)+B_d1(x), verify
* result and free the memory.} After loop free the pool.
* LOOP for 10 times: {Allocate A_d2, B_d2, C_d2 from pool2, memcpy data to
* (A_d2, B_d2). Launch kernel to perform C_d2(x)=A_d2(x)+B_d2(x), verify
* result and free the memory.} After loop free the pool.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold") {
checkMempoolSupported(0)
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
constexpr int N = 1 << 20;
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, testdefault));
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, testMaximum));
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Test Description
* ------------------------
* - Validate hipMallocFromPoolAsync functionality on null stream.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_NullStream") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkMaximumAndDefaultThreshold(0, N, testdefault));
REQUIRE(true == checkMaximumAndDefaultThreshold(0, N, testMaximum));
}
/**
* Test Description
* ------------------------
* - Validate hipMallocFromPoolAsync functionality on hipStreamPerThread.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_hipStreamPerThread") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkMaximumAndDefaultThreshold(hipStreamPerThread, N, testdefault));
REQUIRE(true == checkMaximumAndDefaultThreshold(hipStreamPerThread, N, testMaximum));
}
/**
* Test Description
* ------------------------
* - Check Release Threshold for multiple device.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu", "[multigpu]") {
constexpr int N = 1 << 20;
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
checkMempoolSupported(dev) HIP_CHECK(hipSetDevice(dev));
// create a stream
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, testdefault, dev));
REQUIRE(true == checkMaximumAndDefaultThreshold(stream, N, testMaximum, dev));
HIP_CHECK(hipStreamDestroy(stream));
}
}
/**
* Local Thread Functions
*/
static void threadQAsyncCommands(streamMemAllocTest* testObj, hipStream_t strm, int idx) {
HIP_CHECK(hipSetDevice(idx));
// Create host buffer with test data.
testObj->createHostBufferWithData();
// Allocate device memory and transfer data to it asyncronously on stream.
testObj->allocFromMempool(strm);
testObj->transferToMempool(strm);
// Execute kernel and transfer result back to host asynchronously on stream.
testObj->runKernel(strm);
testObj->transferFromMempool(strm);
// Free Buffer Asynchronously on stream.
testObj->freeDevBuf(strm);
}
static void thread_Test1(hipStream_t stream, int N, enum eTestValue testtype, int threadNum) {
thread_results[threadNum] = checkMaximumAndDefaultThreshold(stream, N, testtype, 0);
}
static bool test_hipMallocFromPoolAsync_MThread(enum eTestValue testtype) {
// create a stream
constexpr int N = 1 << 20;
std::vector<std::thread> tests;
hipStream_t stream[NUMBER_OF_THREADS];
// Initialize and create streams
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
HIP_CHECK(hipStreamCreate(&stream[idx]));
}
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
tests.push_back(std::thread(thread_Test1, stream[idx], N, testtype, idx));
}
// Wait for all threads to complete
for (std::thread& t : tests) {
t.join();
}
// Wait for thread and destroy stream
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
HIP_CHECK(hipStreamDestroy(stream[idx]));
}
return status;
}
static void thread_Test2(hipMemPool_t mempool, hipStream_t stream, int N, int threadNum) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Use the common mempool
testObj.useCommonMempool(mempool);
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
// Allocate memory and initialize it on stream
testObj.allocFromMempool(stream);
testObj.transferToMempool(stream);
testObj.runKernel(stream);
testObj.transferFromMempool(stream);
testObj.freeDevBuf(stream);
// verify and validate
HIP_CHECK(hipStreamSynchronize(stream));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeHostBuf();
thread_results[threadNum] = results;
}
static bool test_hipMallocFromPoolAsync_MThread_CommonMpool(enum eTestValue testtype,
bool bUseDefault = false) {
// create a stream
constexpr int N = 1 << 20;
std::vector<std::thread> tests;
hipStream_t stream[NUMBER_OF_THREADS];
// Create common mempool
if (bUseDefault) {
HIP_CHECK(hipDeviceGetDefaultMemPool(&mem_pool_common, 0));
} else {
hipMemPoolProps pool_props{};
pool_props.allocType = hipMemAllocationTypePinned;
pool_props.location.id = 0;
pool_props.location.type = hipMemLocationTypeDevice;
HIP_CHECK(hipMemPoolCreate(&mem_pool_common, &pool_props));
}
if (testtype == testMaximum) {
uint64_t setThreshold = UINT64_MAX;
HIP_CHECK(
hipMemPoolSetAttribute(mem_pool_common, hipMemPoolAttrReleaseThreshold, &setThreshold));
}
// Initialize and create streams
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
thread_results[idx] = false;
HIP_CHECK(hipStreamCreate(&stream[idx]));
}
// Spawn the test threads
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
tests.push_back(std::thread(thread_Test2, mem_pool_common, stream[idx], N, idx));
}
// Wait for all threads to complete
for (std::thread& t : tests) {
t.join();
}
// Wait for thread and destroy stream
bool status = true;
for (int idx = 0; idx < NUMBER_OF_THREADS; idx++) {
status = status & thread_results[idx];
HIP_CHECK(hipStreamDestroy(stream[idx]));
}
// Destroy common mempool
if (!bUseDefault) {
HIP_CHECK(hipMemPoolDestroy(mem_pool_common));
}
return status;
}
/**
* Local function to test hipMemPoolReuseFollowEventDependencies.
*/
static bool checkReuseFollowEventDepFlag(int N, enum eTestValue testtype) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = 0
testObj.createMempool(hipMemPoolReuseFollowEventDependencies, testtype, 0);
hipStream_t testStream1, testStream2;
HIP_CHECK(hipStreamCreate(&testStream1));
HIP_CHECK(hipStreamCreate(&testStream2));
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
hipEvent_t Event1;
HIP_CHECK(hipEventCreate(&Event1));
// Allocate memory and initialize it on testStream1
testObj.allocFromMempool(testStream1);
testObj.transferToMempool(testStream1);
testObj.runKernel(testStream1);
testObj.transferFromMempool(testStream1);
testObj.freeDevBuf(testStream1);
HIP_CHECK(hipEventRecord(Event1, testStream1));
HIP_CHECK(hipStreamWaitEvent(testStream2, Event1, 0));
// Allocate memory and initialize it on testStream2
testObj.allocFromMempool(testStream2);
testObj.transferToMempool(testStream2);
testObj.runKernel(testStream2);
testObj.transferFromMempool(testStream2);
testObj.freeDevBuf(testStream2);
// validate
HIP_CHECK(hipStreamSynchronize(testStream2));
HIP_CHECK(hipEventDestroy(Event1));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeMempool();
testObj.freeHostBuf();
HIP_CHECK(hipStreamDestroy(testStream2));
HIP_CHECK(hipStreamDestroy(testStream1));
return results;
}
/**
* Local function to test hipMemPoolReuseAllowOpportunistic and
* hipMemPoolReuseAllowInternalDependencies.
*/
static bool checkReuseAllowOtherFlags(int N, hipMemPoolAttr attr, enum eTestValue testtype) {
streamMemAllocTest testObj(N);
// Create host buffer with test data
testObj.createHostBufferWithData();
// Create mempool in current device = 0
testObj.createMempool(attr, testtype, 0);
hipStream_t testStream1, testStream2;
HIP_CHECK(hipStreamCreate(&testStream1));
HIP_CHECK(hipStreamCreate(&testStream2));
bool results = true;
for (int iter = 0; iter < LAUNCH_ITERATIONS; iter++) {
// Allocate memory and initialize it on testStream1
testObj.allocFromMempool(testStream1);
testObj.transferToMempool(testStream1);
testObj.runKernel(testStream1);
testObj.transferFromMempool(testStream1);
testObj.freeDevBuf(testStream1);
// Allocate memory and initialize it on testStream2
testObj.allocFromMempool(testStream2);
testObj.transferToMempool(testStream2);
testObj.runKernel(testStream2);
testObj.transferFromMempool(testStream2);
testObj.freeDevBuf(testStream2);
// validate
HIP_CHECK(hipStreamSynchronize(testStream2));
results = testObj.validateResult();
if (!results) {
break;
}
}
testObj.freeMempool();
testObj.freeHostBuf();
HIP_CHECK(hipStreamDestroy(testStream2));
HIP_CHECK(hipStreamDestroy(testStream1));
return results;
}
/**
* Test Description
* ------------------------
* - Queue the following commands hipMallocFromPoolAsync, transfer data to it
* asynchrously, launch Kernel, transfer results back to host asynchronously and
* free buffer async in streams across all GPUs. The execution in of the queued
* commands must happen concurrently.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
#if HT_AMD
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent", "[multigpu]") {
auto testType = GENERATE(testdefault, testMaximum);
constexpr int N = 1 << 20;
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices) hipStream_t* stream_buf = new hipStream_t[num_devices];
std::vector<streamMemAllocTest*> tesObjBuf;
// Allocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
checkMempoolSupported(idx) HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamCreate(&stream_buf[idx]));
streamMemAllocTest* testObj = new streamMemAllocTest(N);
testObj->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx);
tesObjBuf.push_back(testObj);
}
// Queue commands in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx], idx);
test.join();
}
// Wait for the streams
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamSynchronize(stream_buf[idx]));
// verify and validate
REQUIRE(true == tesObjBuf[idx]->validateResult());
}
// Deallocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
// Destroy resources
tesObjBuf[idx]->freeMempool();
tesObjBuf[idx]->freeHostBuf();
HIP_CHECK(hipStreamDestroy(stream_buf[idx]));
delete tesObjBuf[idx];
}
delete[] stream_buf;
}
/**
* Test Description
* ------------------------
* - Queue the following commands hipMallocFromPoolAsync, transfer data to it
* asynchrously, launch Kernel, transfer results back to host asynchronously and
* free buffer async in streams across all GPUs using multiple streams per GPU.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream", "[multigpu]") {
int num_devices;
auto testType = GENERATE(testdefault, testMaximum);
constexpr int N = 1 << 20;
HIP_CHECK(hipGetDeviceCount(&num_devices));
checkIfMultiDev(num_devices)
// 2 stream per ASIC
hipStream_t* stream_buf = new hipStream_t[streamPerAsic * num_devices];
std::vector<streamMemAllocTest*> tesObjBuf;
// Allocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
checkMempoolSupported(idx) HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic * idx]));
HIP_CHECK(hipStreamCreate(&stream_buf[streamPerAsic * idx + 1]));
streamMemAllocTest* testObj1 = new streamMemAllocTest(N);
testObj1->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx);
tesObjBuf.push_back(testObj1);
streamMemAllocTest* testObj2 = new streamMemAllocTest(N);
testObj2->createMempool(hipMemPoolAttrReleaseThreshold, testType, idx);
tesObjBuf.push_back(testObj2);
}
// Queue commands in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic * idx],
stream_buf[streamPerAsic * idx], idx);
std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic * idx + 1],
stream_buf[streamPerAsic * idx + 1], idx);
test1.join();
test2.join();
}
// Wait for the streams
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic * idx]));
HIP_CHECK(hipStreamSynchronize(stream_buf[streamPerAsic * idx + 1]));
// verify and validate
REQUIRE(true == tesObjBuf[streamPerAsic * idx]->validateResult());
REQUIRE(true == tesObjBuf[streamPerAsic * idx + 1]->validateResult());
}
// Deallocate resources in each device
for (int idx = 0; idx < num_devices; idx++) {
HIP_CHECK(hipSetDevice(idx));
// Destroy resources
tesObjBuf[streamPerAsic * idx]->freeMempool();
tesObjBuf[streamPerAsic * idx]->freeHostBuf();
tesObjBuf[streamPerAsic * idx + 1]->freeMempool();
tesObjBuf[streamPerAsic * idx + 1]->freeHostBuf();
HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic * idx]));
HIP_CHECK(hipStreamDestroy(stream_buf[streamPerAsic * idx + 1]));
delete tesObjBuf[streamPerAsic * idx];
delete tesObjBuf[streamPerAsic * idx + 1];
}
delete[] stream_buf;
}
#endif
/**
* Test Description
* ------------------------
* - Validate memory pool creation, allocation of memory from the
* memory pool and usage in multithreaded environment.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_DefaultThresh") {
checkMempoolSupported(0) REQUIRE(true == test_hipMallocFromPoolAsync_MThread(testdefault));
}
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_MaxThresh") {
checkMempoolSupported(0) REQUIRE(true == test_hipMallocFromPoolAsync_MThread(testMaximum));
}
/**
* Test Description
* ------------------------
* - Validate memory pool creation in main thread and its usage -
* device memory allocation, data transfer to and from device and
* kernel launch from multiple threads.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_CommonMpool_DefaultMempool") {
checkMempoolSupported(0)
REQUIRE(true == test_hipMallocFromPoolAsync_MThread_CommonMpool(testdefault, true));
}
TEST_CASE("Unit_hipMallocFromPoolAsync_MThread_CommonMpool_MaxThresh") {
checkMempoolSupported(0)
REQUIRE(true == test_hipMallocFromPoolAsync_MThread_CommonMpool(testMaximum, false));
}
/**
* Test Description
* ------------------------
* - Multiple stream scenario: Create explicit memory pool. Create 3 streams.
* Allocate device memory and initialize on 1st stream, Invoke kernel to
* perform operation on 2nd stream and Free the device memory on 3rd stream.
* Synchronize between stream1, stream2 and stream3 using events.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_Sync") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkMempoolMultStreamSync(N));
}
/**
* Test Description
* ------------------------
* - Multiple stream concurrent execution scenario: Create common memory pool.
* Execute mempool functionality on a user created stream, null stream and
* hipStreamPerThread concurrently. Wait for all the streams to complete and
* validate result.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_DefaultStreams") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkMempoolMultStreamConcurrentExec(N, true));
}
/**
* Test Description
* ------------------------
* - Multiple stream concurrent execution scenario: Create common memory pool.
* Execute mempool functionality on multiple user created streams concurrently.
* Wait for all the streams to complete and validate result.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_MultStream_UserStreams") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkMempoolMultStreamConcurrentExec(N, false));
}
/**
* Test Description
* ------------------------
* - Test to validate mempool functionality when enabling and disabling
* hipMemPoolReuseFollowEventDependencies attribute.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseFollowEventDependencies") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkReuseFollowEventDepFlag(N, testDisabled));
REQUIRE(true == checkReuseFollowEventDepFlag(N, testEnabled));
}
/**
* Test Description
* ------------------------
* - Test to validate mempool functionality when enabling and disabling
* hipMemPoolReuseAllowOpportunistic attribute.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseAllowOpportunistic") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true == checkReuseAllowOtherFlags(N, hipMemPoolReuseAllowOpportunistic, testDisabled));
REQUIRE(true == checkReuseAllowOtherFlags(N, hipMemPoolReuseAllowOpportunistic, testEnabled));
}
/**
* Test Description
* ------------------------
* - Test to validate mempool functionality when enabling and disabling
* hipMemPoolReuseAllowInternalDependencies attribute.
* ------------------------
* - catch\unit\memory\hipMallocFromPoolAsync.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.2
*/
TEST_CASE("Unit_hipMallocFromPoolAsync_ReuseAllowInternalDependencies") {
checkMempoolSupported(0) constexpr int N = 1 << 20;
REQUIRE(true ==
checkReuseAllowOtherFlags(N, hipMemPoolReuseAllowInternalDependencies, testDisabled));
REQUIRE(true ==
checkReuseAllowOtherFlags(N, hipMemPoolReuseAllowInternalDependencies, testEnabled));
}
/**
* End doxygen group StreamOTest.
* @}
*/