SWDEV-312831 - Adding device side malloc/new tests (#2635)

Following tests are added in this patch
1) Functional Tests
2) Stress Tests
3) Multiprocess Tests

Change-Id: Ifd2310fe8f5d555a9af9b87c296fdf6dd4a0a552

[ROCm/hip-tests commit: de0eb21023]
This commit is contained in:
ROCm CI Service Account
2022-08-05 16:21:15 +05:30
gecommit door GitHub
bovenliggende 9d7601de4c
commit aa5f494517
13 gewijzigde bestanden met toevoegingen van 2664 en 0 verwijderingen
@@ -14,6 +14,7 @@ set(LINUX_TEST_SRC
hipMemCoherencyTstMProc.cc
hipIpcEventHandle.cc
hipIpcMemAccessTest.cc
deviceAllocationMproc.cc
)
# the last argument linker libraries is required for this test but optional to the function
@@ -0,0 +1,319 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#ifdef __linux__
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <dlfcn.h>
#endif
#define SIZE 2097152
// GPU threads
#define BLOCKSIZE 512
#define GRIDSIZE 256
__device__ static char* dev_common_ptr = nullptr;
/**
* This kernel allocates a memory chunk using malloc().
*/
static __global__ void kerTestDeviceMalloc(size_t size) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
dev_common_ptr = reinterpret_cast<char*> (malloc(size));
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed! \n");
return;
}
}
}
/**
* This kernel writes to the memory location allocated in kernel
* kerTestDeviceMalloc or kerTestDeviceNew.
*/
static __global__ void kerTestDeviceWrite() {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed! \n");
return;
}
*(dev_common_ptr + myId) = SCHAR_MAX;
}
/**
* This kernel frees the memory chunk allocated in kernel
* kerTestDeviceMalloc using free().
*/
static __global__ void kerTestDeviceFree(int *result) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
if (dev_common_ptr != nullptr) {
*result = 1;
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
*result = 0;
break;
}
}
free(dev_common_ptr);
} else {
*result = 0;
}
}
}
/**
* This kernel allocates a memory chunk using new operator.
*/
static __global__ void kerTestDeviceNew(size_t size) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
dev_common_ptr = new char[size];
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed! \n");
return;
}
}
}
/**
* This kernel frees the memory chunk allocated in kernel
* kerTestDeviceNew using delete operator.
*/
static __global__ void kerTestDeviceDelete(int *result) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
if (myId == 0) {
if (dev_common_ptr != nullptr) {
*result = 1;
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
*result = 0;
break;
}
}
delete[] dev_common_ptr;
} else {
*result = 0;
}
}
}
/**
* Test device malloc()/new in both Parent and Child Process.
* Allocate SIZE bytes in both parent and child process. Verify
* the allocated size in both parent and child process.
*/
static bool testDeviceAllocMulProc(bool testmalloc) {
int fd[2];
pid_t childpid;
bool testResult = false;
size_t avail = 0, tot = 0;
// create pipe descriptors
pipe(fd);
// fork process
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
// Allocate in parent
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
} else {
kerTestDeviceNew<<<1, 1>>>(SIZE);
}
HIP_CHECK(hipDeviceSynchronize());
// Check allocated memory size
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if ((tot - avail) < SIZE) {
return false;
}
// parent will wait to read the device cnt
read(fd[0], &testResult, sizeof(testResult));
// close the read-descriptor
close(fd[0]);
// wait for child exit
wait(NULL);
// At this point the child process exits.
// Ensure that device memory allocated from child is freed.
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if ((tot - avail) < SIZE) {
testResult = false;
}
} else if (!childpid) { // Child
// Wait for hipDeviceSetLimit() completion in parent.
close(fd[0]);
// Allocate in child
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
} else {
kerTestDeviceNew<<<1, 1>>>(SIZE);
}
HIP_CHECK(hipDeviceSynchronize());
// Check allocated memory size
HIP_CHECK(hipMemGetInfo(&avail, &tot));
if ((tot - avail) < SIZE) {
testResult = false;
} else {
testResult = true;
}
// send the value on the write-descriptor:
write(fd[1], &testResult, sizeof(testResult));
// close the write descriptor:
close(fd[1]);
exit(0);
}
return testResult;
}
/**
* Test device malloc()/new, write and free()/delete[]
* from both Parent and Child Process. From both Parent and
* Child Process invoke the kernel to allocate memory, the
* kernel to write to the allocated memory and a third kernel
* to verify the memory contents and free it.
*/
static bool testDeviceMemMulProc(bool testmalloc) {
int fd[2];
bool testResult = false;
pid_t childpid;
int testResultChild = 0;
size_t size = BLOCKSIZE*GRIDSIZE;
// create pipe descriptors
pipe(fd);
// fork process
childpid = fork();
if (childpid > 0) { // Parent
close(fd[1]);
int *result_d{nullptr}, *result_h{nullptr};
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
REQUIRE(result_h != nullptr);
// Allocate in parent
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(size);
} else {
kerTestDeviceNew<<<1, 1>>>(size);
}
// Write
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
// Free
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
*result_h = 0;
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
hipMemcpyDefault));
if (*result_h == 0) {
testResult = false;
} else {
testResult = true;
}
// parent will wait to read the device cnt
read(fd[0], &testResultChild, sizeof(int));
if (testResultChild == 0) {
testResult &= false;
} else {
testResult &= true;
}
// close the read-descriptor
close(fd[0]);
hipFree(result_d);
free(result_h);
// wait for child exit
wait(NULL);
} else if (!childpid) { // Child
// Wait for hipDeviceSetLimit() completion in parent.
close(fd[0]);
int *result_d{nullptr}, *result_h{nullptr};
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
REQUIRE(result_h != nullptr);
// Allocate in child
if (testmalloc) {
kerTestDeviceMalloc<<<1, 1>>>(size);
} else {
kerTestDeviceNew<<<1, 1>>>(size);
}
// Write
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
// Free
if (testmalloc) {
kerTestDeviceFree<<<1, 1>>>(result_d);
} else {
kerTestDeviceDelete<<<1, 1>>>(result_d);
}
HIP_CHECK(hipDeviceSynchronize());
*result_h = 0;
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
hipMemcpyDefault));
// send the value on the write-descriptor:
write(fd[1], result_h, sizeof(int));
// close the write descriptor:
close(fd[1]);
hipFree(result_d);
free(result_h);
exit(0);
}
return testResult;
}
/**
* Multiprocess device side malloc test.
*/
TEST_CASE("Unit_deviceAllocation_Malloc_MultProcess") {
auto res = testDeviceAllocMulProc(true);
REQUIRE(res == true);
}
/**
* Multiprocess device side new test.
*/
TEST_CASE("Unit_deviceAllocation_New_MultProcess") {
auto res = testDeviceAllocMulProc(false);
REQUIRE(res == true);
}
/**
* Multiprocess device side malloc, write and free test.
*/
TEST_CASE("Unit_deviceAllocation_MallocFree_MultProcess") {
auto res = testDeviceMemMulProc(true);
REQUIRE(res == true);
}
/**
* Multiprocess device side new, write and delete test.
*/
TEST_CASE("Unit_deviceAllocation_NewDelete_MultProcess") {
auto res = testDeviceMemMulProc(false);
REQUIRE(res == true);
}
@@ -6,3 +6,4 @@ if(HIP_PLATFORM MATCHES "amd")
add_subdirectory(printf)
add_subdirectory(stream)
endif()
add_subdirectory(deviceallocation)
@@ -0,0 +1,8 @@
# Common Tests - Test independent of all platforms
set(TEST_SRC
Stress_deviceAllocationStress.cc
)
hip_add_exe_to_target(NAME devalloc
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME stress_test)
@@ -0,0 +1,487 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <unistd.h>
// Size Macros
#define MEMORY_CHUNK_SIZE (1024*1024)
#define MEMORY_CHUNK_SIZE_ODD (1025*1025)
#define MAXIMUM_CHUNKS (256*1024)
// Subtest Macros
#define NO_ALLOCATION_ONHOST 0
#define ALLOCATE_ONHOST_HIPMALLOCMANAGED 1
#define ALLOCATE_ONHOST_HIPMALLOC 2
// Test Type Macros
#define TEST_MALLOC_FREE 1
#define TEST_NEW_DELETE 2
// GPU threads
#define BLOCKSIZE 512
#define GRIDSIZE 512
// Test parameters
// Two different loops
#define NUM_OF_LOOP_SINGLE_KER 100000
#define NUM_OF_LOOP_MULTIPLE_KER 20000
// The following flag is defined for platforms (nvidia)
// which honors device memory limit. For AMD this flag
// is disabled and defect is raised.
#if HT_NVIDIA
#define HT_HONORS_DEVICEMEMORY_LIMIT
#endif
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
__device__ static char* dev_mem_glob[MAXIMUM_CHUNKS];
#endif
__device__ static int* dev_mem[GRIDSIZE];
__device__ static int* dev_common_ptr;
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
/**
* This kernel checks kernel allocation of size more than available
* memory.
*/
static __global__ void kerTestDynamicAllocNeg(int test_type,
size_t perThreadSize,
int *ret) {
// Allocate
char* ptr = nullptr;
printf("Memory to allocate in GPU = %zu \n", perThreadSize);
if (test_type == TEST_MALLOC_FREE) {
ptr = reinterpret_cast<char*> (malloc(perThreadSize));
} else {
ptr = new char[perThreadSize];
}
printf("Allocation Done \n");
if (ptr == nullptr) {
printf("Allocation Failed. PASSED! \n");
*ret = 0;
return;
} else {
// Free memory
if (test_type == TEST_MALLOC_FREE) {
free(ptr);
} else {
delete[] ptr;
}
*ret = -1;
}
}
/**
* This kernel allocates memory till nullptr is returned.
*/
static __global__ void kerAllocTillExhaust(int test_type,
size_t *total_allocated_mem,
size_t mem_chunk_size) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate memory in thread 0 of block 0
if (0 == myId) {
for (int idx = 0; idx < MAXIMUM_CHUNKS; idx++) {
dev_mem_glob[idx] = nullptr;
}
int idx = 0;
if (test_type == TEST_MALLOC_FREE) {
do {
dev_mem_glob[idx] =
reinterpret_cast<char*> (malloc(mem_chunk_size));
if (idx >= MAXIMUM_CHUNKS) {
break;
}
} while (dev_mem_glob[idx++] != nullptr);
} else {
do {
dev_mem_glob[idx] =
reinterpret_cast<char*> (new char[mem_chunk_size]);
if (idx >= MAXIMUM_CHUNKS) {
break;
}
} while (dev_mem_glob[idx++] != nullptr);
}
idx = 0;
*total_allocated_mem = 0;
while ((dev_mem_glob[idx] != nullptr) &&
(idx < MAXIMUM_CHUNKS)) {
*total_allocated_mem = *total_allocated_mem + mem_chunk_size;
idx++;
}
}
}
/**
* This kernel deletes the memory.
*/
static __global__ void kerFreeAll(int test_type) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
if (0 == myId) {
if (test_type == TEST_MALLOC_FREE) {
int idx = 0;
while (dev_mem_glob[idx] != nullptr) {
free(dev_mem_glob[idx++]);
if (idx >= MAXIMUM_CHUNKS) {
break;
}
}
} else {
int idx = 0;
while (dev_mem_glob[idx] != nullptr) {
delete[] (dev_mem_glob[idx++]);
if (idx >= MAXIMUM_CHUNKS) {
break;
}
}
}
}
}
#endif
/**
* This kernel allocates memory once in thread 0 of each block and
* access this memory in all threads of the block. The memory is
* finally deleted in last thread of each block.
*/
static __global__ void kerBlockLevelMemoryAllocation(int *outputBuf,
int test_type) {
int myThreadId = threadIdx.x, lastThreadId = (blockDim.x - 1);
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate memory in thread 0
if (0 == myThreadId) {
if (test_type == TEST_MALLOC_FREE) {
dev_mem[blockIdx.x] =
reinterpret_cast<int*> (malloc(blockDim.x*sizeof(int)));
} else {
dev_mem[blockIdx.x] =
reinterpret_cast<int*> (new int[blockDim.x]);
}
}
// All threads wait at this barrier
__syncthreads();
// Check allocated memory in all threads in block before access
if (dev_mem[blockIdx.x] == nullptr) {
printf("Device Allocation Failed in thread = %d \n", myId);
return;
}
int *ptr = reinterpret_cast<int*> (dev_mem[blockIdx.x]);
// Copy to buffer
ptr[myThreadId] = myId;
// All threads wait
__syncthreads();
// Copy memory to host and free the memory in thread <blockDim.x - 1>
if (lastThreadId == myThreadId) {
for (size_t idx = 0; idx < blockDim.x; idx++) {
outputBuf[idx + blockDim.x * blockIdx.x] = ptr[idx];
}
if (test_type == TEST_MALLOC_FREE) {
free(ptr);
} else {
delete[] ptr;
}
}
}
/**
* This kernel allocates memory in one thread.
*/
static __global__ void kerAlloc(int test_type) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate memory in thread 0 of block 0
if (0 == myId) {
if (test_type == TEST_MALLOC_FREE) {
dev_common_ptr =
reinterpret_cast<int*> (malloc(blockDim.x*gridDim.x*sizeof(int)));
} else {
dev_common_ptr =
reinterpret_cast<int*> (new int[blockDim.x*gridDim.x]);
}
}
}
/**
* This kernel writes to memory allocated in <kerAlloc>.
*/
static __global__ void kerWrite() {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Check allocated memory in all threads in block before access
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed in thread = %d \n", myId);
return;
}
// Copy to buffer
dev_common_ptr[myId] = myId;
}
/**
* This kernel copies the contents of memory allocated in <kerAlloc>
* to host and deletes the memory from thread 0.
*/
static __global__ void kerFree(int *outputBuf, int test_type) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Check allocated memory in all threads in block before access
if (dev_common_ptr == nullptr) {
printf("Device Allocation Failed in thread = %d \n", myId);
return;
}
if (0 == myId) {
for (size_t idx = 0; idx < (blockDim.x*gridDim.x); idx++) {
outputBuf[idx] = dev_common_ptr[idx];
}
if (test_type == TEST_MALLOC_FREE) {
free(dev_common_ptr);
} else {
delete[] dev_common_ptr;
}
}
}
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
/**
* Local function: Launch kerAllocTillExhaust<<<>>> and
* kerFreeAll<<<>>> to test memory allocation till all device
* memory is exhausted.
*/
static bool TestAllocationOfAllAvailableMemory(int test_type,
int category, size_t mem_chunk_size) {
size_t avail1 = 0, avail2 = 0, tot = 0;
constexpr size_t host_alloc = 2147483648; // 2 GB
HIP_CHECK(hipMemGetInfo(&avail1, &tot));
#if HT_NVIDIA
HIP_CHECK(hipDeviceSetLimit(hipLimitMallocHeapSize, avail1));
#endif
size_t *tot_alloc_mem_d = nullptr, *tot_alloc_mem_h = nullptr;
tot_alloc_mem_h =
reinterpret_cast<size_t*> (malloc(sizeof(size_t)));
REQUIRE(nullptr != tot_alloc_mem_h);
HIP_CHECK(hipMalloc(&tot_alloc_mem_d, sizeof(size_t)));
REQUIRE(nullptr != tot_alloc_mem_d);
char *devptrHost = nullptr;
if (category == ALLOCATE_ONHOST_HIPMALLOCMANAGED) {
HIP_CHECK(hipMallocManaged(&devptrHost, host_alloc));
} else if (category == ALLOCATE_ONHOST_HIPMALLOC) {
HIP_CHECK(hipMalloc(&devptrHost, host_alloc));
}
HIP_CHECK(hipMemGetInfo(&avail1, &tot));
INFO("Total available memory " << tot);
INFO("Available memory before allocation " << avail1);
// Launch Test Kernel
kerAllocTillExhaust<<<1, 1>>>(test_type, tot_alloc_mem_d,
mem_chunk_size);
HIP_CHECK(hipDeviceSynchronize());
// Copy to host buffer
HIP_CHECK(hipMemcpy(tot_alloc_mem_h, tot_alloc_mem_d,
sizeof(size_t), hipMemcpyDefault));
HIP_CHECK(hipMemGetInfo(&avail2, &tot));
kerFreeAll<<<1, 1>>>(test_type);
HIP_CHECK(hipDeviceSynchronize());
// Copy to host buffer
bool bPassed = false;
INFO("Available memory after allocation " << avail2);
if (category == NO_ALLOCATION_ONHOST) {
size_t allocated_dev_mem = (tot - avail2);
if (allocated_dev_mem >= *tot_alloc_mem_h) {
bPassed = true;
}
} else if ((category == ALLOCATE_ONHOST_HIPMALLOCMANAGED) ||
(category == ALLOCATE_ONHOST_HIPMALLOC)) {
size_t allocated_dev_mem = (tot - avail2 - host_alloc);
if (allocated_dev_mem >= *tot_alloc_mem_h) {
bPassed = true;
}
hipFree(devptrHost);
}
hipFree(tot_alloc_mem_d);
free(tot_alloc_mem_h);
return bPassed;
}
#endif
/**
* Local function: Launch kerBlockLevelMemoryAllocation<<<>>>
* in a loop to stress test allocation and deallocation.
*/
static bool TestMemoryAllocationInLoop(int test_type,
bool isMultikernel = false) {
int *outputVec_d{nullptr}, *outputVec_h{nullptr};
int arraysize = (BLOCKSIZE * GRIDSIZE);
outputVec_h = reinterpret_cast<int*> (malloc(sizeof(int) * arraysize));
REQUIRE(outputVec_h != nullptr);
HIP_CHECK(hipMalloc(&outputVec_d, (sizeof(int) * arraysize)));
bool bPassed = true;
// Launch Test Kernel
int max_index = 0;
if (isMultikernel) {
max_index = NUM_OF_LOOP_MULTIPLE_KER;
} else {
max_index = NUM_OF_LOOP_SINGLE_KER;
}
for (int idx = 0; idx < max_index; idx++) {
if (isMultikernel) {
kerAlloc<<<GRIDSIZE, BLOCKSIZE>>>(test_type);
kerWrite<<<GRIDSIZE, BLOCKSIZE>>>();
kerFree<<<GRIDSIZE, BLOCKSIZE>>>(outputVec_d, test_type);
} else {
kerBlockLevelMemoryAllocation<<<GRIDSIZE, BLOCKSIZE>>>(outputVec_d,
test_type);
}
HIP_CHECK(hipDeviceSynchronize());
// Copy to host buffer
HIP_CHECK(hipMemcpy(outputVec_h, outputVec_d, sizeof(int) * arraysize,
hipMemcpyDefault));
bPassed = true;
for (int idx = 0; idx < arraysize; idx++) {
if (outputVec_h[idx] != idx) {
bPassed = false;
break;
}
}
if (!bPassed) break;
}
hipFree(outputVec_d);
free(outputVec_h);
return bPassed;
}
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
/**
* Scenario: Test malloc till nullptr is returned using even chunksize.
*/
TEST_CASE("Stress_deviceAllocation_malloc_Even") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE));
}
/**
* Scenario: Test malloc till nullptr is returned using odd chunksize.
*/
TEST_CASE("Stress_deviceAllocation_malloc_Odd") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE_ODD));
}
/**
* Scenario: Test new till nullptr is returned using even chunksize.
*/
TEST_CASE("Stress_deviceAllocation_new_Even") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE));
}
/**
* Scenario: Test new till nullptr is returned using odd chunksize.
*/
TEST_CASE("Stress_deviceAllocation_new_Odd") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE_ODD));
}
/**
* Scenario: This test checks device allocation using malloc till nullptr
* is returned. Device memory is also allocated using hipmallocmanaged
* from host.
*/
TEST_CASE("Stress_deviceAllocation_malloc_hipmallocmanaged") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
ALLOCATE_ONHOST_HIPMALLOCMANAGED, MEMORY_CHUNK_SIZE));
}
/**
* Scenario: This test checks device allocation using new till nullptr
* is returned. Device memory is also allocated using hipmallocmanaged
* from host.
*/
TEST_CASE("Stress_deviceAllocation_new_hipmallocmanaged") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
ALLOCATE_ONHOST_HIPMALLOCMANAGED, MEMORY_CHUNK_SIZE));
}
/**
* Scenario: This test checks device allocation using malloc till nullptr
* is returned. Device memory is also allocated using hipmalloc from host.
*/
TEST_CASE("Stress_deviceAllocation_malloc_hipmalloc") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
ALLOCATE_ONHOST_HIPMALLOC, MEMORY_CHUNK_SIZE));
}
/**
* Scenario: This test checks device allocation using new till nullptr
* is returned. Device memory is also allocated using hipmalloc from host.
*/
TEST_CASE("Stress_deviceAllocation_new_hipmalloc") {
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
ALLOCATE_ONHOST_HIPMALLOC, MEMORY_CHUNK_SIZE));
}
/**
* Scenario: This test validates device allocation negative scenario
* when size > available memory.
*/
TEST_CASE("Stress_deviceAllocation_Negative") {
int *ret_d{nullptr}, *ret_h{nullptr};
size_t avail = 0, tot = 0;
HIP_CHECK(hipMemGetInfo(&avail, &tot));
printf("Available Memory in GPU = %zu \n", avail);
ret_h = reinterpret_cast<int*> (malloc(sizeof(int)));
REQUIRE(ret_h != nullptr);
HIP_CHECK(hipMalloc(&ret_d, (sizeof(int))));
SECTION("Test allocation with malloc") {
kerTestDynamicAllocNeg<<<1, 1>>>(TEST_MALLOC_FREE, (avail + 1), ret_d);
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(ret_h, ret_d, sizeof(int), hipMemcpyDefault));
REQUIRE(0 == *ret_h);
}
SECTION("Test allocation with new") {
kerTestDynamicAllocNeg<<<1, 1>>>(TEST_NEW_DELETE, (avail + 1), ret_d);
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(ret_h, ret_d, sizeof(int), hipMemcpyDefault));
REQUIRE(0 == *ret_h);
}
hipFree(ret_d);
free(ret_h);
}
#endif
/**
* Scenario: This test performs stress test of malloc/free in a loop
* using single kernel.
*/
TEST_CASE("Stress_deviceAllocation_malloc_loop_singlekernel") {
REQUIRE(true == TestMemoryAllocationInLoop(TEST_MALLOC_FREE, false));
}
/**
* Scenario: This test performs stress test of new/delete in a loop
* using single kernel.
*/
TEST_CASE("Stress_deviceAllocation_new_loop_singlekernel") {
REQUIRE(true == TestMemoryAllocationInLoop(TEST_NEW_DELETE, false));
}
/**
* Scenario: This test performs stress test of malloc/free in a loop
* using multiple kernel.
*/
TEST_CASE("Stress_deviceAllocation_malloc_loop_multkernel") {
REQUIRE(true == TestMemoryAllocationInLoop(TEST_MALLOC_FREE, true));
}
/**
* Scenario: This test performs stress test of new/delete in a loop
* using multiple kernel.
*/
TEST_CASE("Stress_deviceAllocation_new_loop_multkernel") {
REQUIRE(true == TestMemoryAllocationInLoop(TEST_NEW_DELETE, true));
}
@@ -13,6 +13,7 @@ set(TEST_SRC
syncthreadsand.cc
syncthreadscount.cc
syncthreadsor.cc
deviceAllocation.cc
)
# AMD only tests
@@ -43,6 +44,11 @@ set(AMD_ARCH_SPEC_TEST_SRC
unsafeAtomicAdd_NonCoherent_withunsafeflag.cc
)
add_custom_target(kerDevAllocMultCO.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevAllocMultCO.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevAllocMultCO.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
add_custom_target(kerDevWriteMultCO.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevWriteMultCO.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevWriteMultCO.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
add_custom_target(kerDevFreeMultCO.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevFreeMultCO.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevFreeMultCO.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
add_custom_target(kerDevAllocSingleKer.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevAllocSingleKer.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevAllocSingleKer.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
if(HIP_PLATFORM MATCHES "amd")
if (DEFINED OFFLOAD_ARCH_STR)
string(FIND ${OFFLOAD_ARCH_STR} "gfx90a" ARCH_CHECK)
@@ -78,3 +84,4 @@ elseif(HIP_PLATFORM MATCHES "nvidia")
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS --Wno-deprecated-declarations)
endif()
add_dependencies(build_tests kerDevAllocMultCO.code kerDevWriteMultCO.code kerDevFreeMultCO.code kerDevAllocSingleKer.code)
@@ -0,0 +1,36 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#define INTERNAL_BUFFER_SIZE 8
// Test Type
#define TEST_MALLOC_FREE 1
#define TEST_NEW_DELETE 2
// Kernel Params
#define BLOCKSIZE 64
#define GRIDSIZE 32
// Code Obj
#define DEV_ALLOC_SINGKER_COBJ "kerDevAllocSingleKer.code"
#define DEV_ALLOC_SINGKER_COBJ_FUNC "ker_TestDynamicAllocInAllThreads_CodeObj"
#define DEV_ALLOC_MULCOBJ "kerDevAllocMultCO.code"
#define DEV_WRITE_MULCOBJ "kerDevWriteMultCO.code"
#define DEV_FREE_MULCOBJ "kerDevFreeMultCO.code"
#define DEV_ALLOC_MULCODEOBJ_ALLOC "ker_Alloc_MultCodeObj"
#define DEV_ALLOC_MULCODEOBJ_WRITE "ker_Write_MultCodeObj"
#define DEV_ALLOC_MULCODEOBJ_FREE "ker_Free_MultCodeObj"
@@ -0,0 +1,132 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <cfloat>
#include <atomic>
#include "./defs.h"
static __device__ int* deviceAlloc(int test_type);
static __device__ void deviceWrite(int myId, int *devmem);
static __device__ void deviceFree(int *outputBuf, int *devmem,
int test_type, int myId);
/**
* Allocation base and derived class to test dynamic allocation.
*/
class baseAlloc{
public:
virtual __device__ int* alloc(size_t size) = 0;
virtual __device__ void free(int* ptr) = 0;
};
class derivedAlloc: public baseAlloc{
public:
virtual __device__ int* alloc(size_t size) {
return new int[size];
}
virtual __device__ void free(int* ptr) {
delete ptr;
}
};
/**
* Allocation Structure to test dynamic allocation.
*/
struct deviceAllocFunc{
int* (*alloc)(int);
void (*write)(int, int*);
void (*free)(int*, int*, int, int);
};
/**
* Simple Structure to test dynamic allocation.
*/
struct simpleStruct{
int32_t i;
double d;
float f;
int16_t s;
char c;
int32_t iarr[INTERNAL_BUFFER_SIZE];
bool operator!=(const struct simpleStruct &inpStr) {
if ((i != inpStr.i) || (d != inpStr.d) ||
(f != inpStr.f) || (s != inpStr.s) || (c != inpStr.c)) {
return true;
}
for (int32_t idx = 0; idx < INTERNAL_BUFFER_SIZE; idx++) {
if (iarr[idx] != inpStr.iarr[idx]) {
return true;
}
}
return false;
}
};
/**
* Simple Structure containing thread information
*/
struct threadInfo{
int threadid;
int blockid;
int32_t ival;
double dval;
float fval;
int16_t sval;
char cval;
};
/**
* C/C++ Union
*/
union testInfoUnion{
int32_t ival;
double dval;
float fval;
int16_t sval;
char cval;
};
/**
* Complex (nested) Structure to test dynamic allocation using malloc.
*/
struct complexStructure{
struct threadInfo *sthreadInfo;
__device__ void alloc_internal_members(int test_type, size_t size) {
sthreadInfo = nullptr;
if (test_type == TEST_MALLOC_FREE) {
sthreadInfo = reinterpret_cast<struct threadInfo*>(
malloc(size*sizeof(struct threadInfo)));
} else {
sthreadInfo = new struct threadInfo[size];
}
}
__device__ void free_internal_members(int test_type) {
if (test_type == TEST_MALLOC_FREE) {
free(sthreadInfo);
} else {
delete[] sthreadInfo;
}
sthreadInfo = nullptr;
}
};
Diff onderdrukt omdat het te groot bestand Laad Diff
@@ -0,0 +1,39 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "./defs.h"
/**
* This kernel allocates memory in thread 0.
*/
extern "C" __global__ void ker_Alloc_MultCodeObj(int **dev_mem,
int test_type) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate memory in thread 0 of block 0
if (0 == myId) {
if (test_type == TEST_MALLOC_FREE) {
*dev_mem =
reinterpret_cast<int*> (malloc(blockDim.x*gridDim.x*sizeof(int)));
} else {
*dev_mem =
reinterpret_cast<int*> (new int[blockDim.x*gridDim.x]);
}
}
}
@@ -0,0 +1,57 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "./defs.h"
/**
* This kernel allocates and deallocates memory in every thread.
*/
extern "C" __global__ void ker_TestDynamicAllocInAllThreads_CodeObj(
int *outputBuf, int test_type, int value,
size_t perThreadSize) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Allocate
size_t size = 0;
int* ptr = nullptr;
if (test_type == TEST_MALLOC_FREE) {
size = perThreadSize * sizeof(int);
ptr = reinterpret_cast<int*> (malloc(size));
} else {
size = perThreadSize;
ptr = new int[perThreadSize];
}
if (ptr == nullptr) {
printf("Device Allocation in thread %d Failed! \n", myId);
return;
}
// Set memory
for (size_t idx = 0; idx < perThreadSize; idx++) {
ptr[idx] = value;
}
// Copy to output buffer
for (size_t idx = 0; idx < perThreadSize; idx++) {
outputBuf[myId*perThreadSize + idx] = ptr[idx];
}
// Free memory
if (test_type == TEST_MALLOC_FREE) {
free(ptr);
} else {
delete[] ptr;
}
}
@@ -0,0 +1,47 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "./defs.h"
/**
* This kernel copies the contents of memory allocated in
* ker_Alloc_MultCodeObj<<<>>> to host and deletes the memory
* from thread 0.
*/
extern "C" __global__ void ker_Free_MultCodeObj(int *outputBuf,
int **dev_mem, int test_type) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Check allocated memory in all threads in block before access
if (*dev_mem == nullptr) {
printf("Device Allocation Failed in thread = %d \n", myId);
return;
}
if (0 == myId) {
for (size_t idx = 0; idx < (blockDim.x*gridDim.x); idx++) {
outputBuf[idx] = (*dev_mem)[idx];
}
if (test_type == TEST_MALLOC_FREE) {
free(*dev_mem);
} else {
delete[] (*dev_mem);
}
}
}
@@ -0,0 +1,36 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "./defs.h"
/**
* This kernel writes to memory allocated in ker_Alloc_MultCodeObj<<<>>>.
*/
extern "C" __global__ void ker_Write_MultCodeObj(int **dev_mem,
int value) {
int myId = threadIdx.x + blockDim.x * blockIdx.x;
// Check allocated memory in all threads in block before access
if (*dev_mem == nullptr) {
printf("Device Allocation Failed in thread = %d \n", myId);
return;
}
// Copy to buffer
(*dev_mem)[myId] = value;
}