SWDEV-312831 - Adding device side malloc/new tests (#2635)
Following tests are added in this patch
1) Functional Tests
2) Stress Tests
3) Multiprocess Tests
Change-Id: Ifd2310fe8f5d555a9af9b87c296fdf6dd4a0a552
[ROCm/hip-tests commit: de0eb21023]
This commit is contained in:
gecommit door
GitHub
bovenliggende
9d7601de4c
commit
aa5f494517
@@ -14,6 +14,7 @@ set(LINUX_TEST_SRC
|
||||
hipMemCoherencyTstMProc.cc
|
||||
hipIpcEventHandle.cc
|
||||
hipIpcMemAccessTest.cc
|
||||
deviceAllocationMproc.cc
|
||||
)
|
||||
|
||||
# the last argument linker libraries is required for this test but optional to the function
|
||||
|
||||
@@ -0,0 +1,319 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/wait.h>
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#define SIZE 2097152
|
||||
// GPU threads
|
||||
#define BLOCKSIZE 512
|
||||
#define GRIDSIZE 256
|
||||
|
||||
__device__ static char* dev_common_ptr = nullptr;
|
||||
|
||||
/**
|
||||
* This kernel allocates a memory chunk using malloc().
|
||||
*/
|
||||
static __global__ void kerTestDeviceMalloc(size_t size) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
dev_common_ptr = reinterpret_cast<char*> (malloc(size));
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed! \n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel writes to the memory location allocated in kernel
|
||||
* kerTestDeviceMalloc or kerTestDeviceNew.
|
||||
*/
|
||||
static __global__ void kerTestDeviceWrite() {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed! \n");
|
||||
return;
|
||||
}
|
||||
*(dev_common_ptr + myId) = SCHAR_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel frees the memory chunk allocated in kernel
|
||||
* kerTestDeviceMalloc using free().
|
||||
*/
|
||||
static __global__ void kerTestDeviceFree(int *result) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
if (dev_common_ptr != nullptr) {
|
||||
*result = 1;
|
||||
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
|
||||
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
|
||||
*result = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(dev_common_ptr);
|
||||
} else {
|
||||
*result = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel allocates a memory chunk using new operator.
|
||||
*/
|
||||
static __global__ void kerTestDeviceNew(size_t size) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
dev_common_ptr = new char[size];
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed! \n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel frees the memory chunk allocated in kernel
|
||||
* kerTestDeviceNew using delete operator.
|
||||
*/
|
||||
static __global__ void kerTestDeviceDelete(int *result) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
if (myId == 0) {
|
||||
if (dev_common_ptr != nullptr) {
|
||||
*result = 1;
|
||||
for (int idx = 0; idx < (BLOCKSIZE*GRIDSIZE); idx++) {
|
||||
if (*(dev_common_ptr + myId) != SCHAR_MAX) {
|
||||
*result = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete[] dev_common_ptr;
|
||||
} else {
|
||||
*result = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test device malloc()/new in both Parent and Child Process.
|
||||
* Allocate SIZE bytes in both parent and child process. Verify
|
||||
* the allocated size in both parent and child process.
|
||||
*/
|
||||
static bool testDeviceAllocMulProc(bool testmalloc) {
|
||||
int fd[2];
|
||||
pid_t childpid;
|
||||
bool testResult = false;
|
||||
size_t avail = 0, tot = 0;
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
// fork process
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// Allocate in parent
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(SIZE);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Check allocated memory size
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
if ((tot - avail) < SIZE) {
|
||||
return false;
|
||||
}
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
// At this point the child process exits.
|
||||
// Ensure that device memory allocated from child is freed.
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
if ((tot - avail) < SIZE) {
|
||||
testResult = false;
|
||||
}
|
||||
} else if (!childpid) { // Child
|
||||
// Wait for hipDeviceSetLimit() completion in parent.
|
||||
close(fd[0]);
|
||||
// Allocate in child
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(SIZE);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(SIZE);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Check allocated memory size
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
if ((tot - avail) < SIZE) {
|
||||
testResult = false;
|
||||
} else {
|
||||
testResult = true;
|
||||
}
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test device malloc()/new, write and free()/delete[]
|
||||
* from both Parent and Child Process. From both Parent and
|
||||
* Child Process invoke the kernel to allocate memory, the
|
||||
* kernel to write to the allocated memory and a third kernel
|
||||
* to verify the memory contents and free it.
|
||||
*/
|
||||
static bool testDeviceMemMulProc(bool testmalloc) {
|
||||
int fd[2];
|
||||
bool testResult = false;
|
||||
pid_t childpid;
|
||||
int testResultChild = 0;
|
||||
size_t size = BLOCKSIZE*GRIDSIZE;
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
// fork process
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
int *result_d{nullptr}, *result_h{nullptr};
|
||||
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
|
||||
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
|
||||
REQUIRE(result_h != nullptr);
|
||||
// Allocate in parent
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(size);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(size);
|
||||
}
|
||||
// Write
|
||||
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
|
||||
// Free
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
*result_h = 0;
|
||||
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
|
||||
hipMemcpyDefault));
|
||||
if (*result_h == 0) {
|
||||
testResult = false;
|
||||
} else {
|
||||
testResult = true;
|
||||
}
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &testResultChild, sizeof(int));
|
||||
if (testResultChild == 0) {
|
||||
testResult &= false;
|
||||
} else {
|
||||
testResult &= true;
|
||||
}
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
hipFree(result_d);
|
||||
free(result_h);
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
} else if (!childpid) { // Child
|
||||
// Wait for hipDeviceSetLimit() completion in parent.
|
||||
close(fd[0]);
|
||||
int *result_d{nullptr}, *result_h{nullptr};
|
||||
HIP_CHECK(hipMalloc(&result_d, sizeof(int)));
|
||||
result_h = reinterpret_cast<int*> (malloc(sizeof(int)));
|
||||
REQUIRE(result_h != nullptr);
|
||||
// Allocate in child
|
||||
if (testmalloc) {
|
||||
kerTestDeviceMalloc<<<1, 1>>>(size);
|
||||
} else {
|
||||
kerTestDeviceNew<<<1, 1>>>(size);
|
||||
}
|
||||
// Write
|
||||
kerTestDeviceWrite<<<GRIDSIZE, BLOCKSIZE>>>();
|
||||
// Free
|
||||
if (testmalloc) {
|
||||
kerTestDeviceFree<<<1, 1>>>(result_d);
|
||||
} else {
|
||||
kerTestDeviceDelete<<<1, 1>>>(result_d);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
*result_h = 0;
|
||||
HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(int),
|
||||
hipMemcpyDefault));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], result_h, sizeof(int));
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
hipFree(result_d);
|
||||
free(result_h);
|
||||
exit(0);
|
||||
}
|
||||
return testResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side malloc test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_Malloc_MultProcess") {
|
||||
auto res = testDeviceAllocMulProc(true);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side new test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_New_MultProcess") {
|
||||
auto res = testDeviceAllocMulProc(false);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side malloc, write and free test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_MallocFree_MultProcess") {
|
||||
auto res = testDeviceMemMulProc(true);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiprocess device side new, write and delete test.
|
||||
*/
|
||||
TEST_CASE("Unit_deviceAllocation_NewDelete_MultProcess") {
|
||||
auto res = testDeviceMemMulProc(false);
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
@@ -6,3 +6,4 @@ if(HIP_PLATFORM MATCHES "amd")
|
||||
add_subdirectory(printf)
|
||||
add_subdirectory(stream)
|
||||
endif()
|
||||
add_subdirectory(deviceallocation)
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
Stress_deviceAllocationStress.cc
|
||||
)
|
||||
|
||||
hip_add_exe_to_target(NAME devalloc
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME stress_test)
|
||||
+487
@@ -0,0 +1,487 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <unistd.h>
|
||||
// Size Macros
|
||||
#define MEMORY_CHUNK_SIZE (1024*1024)
|
||||
#define MEMORY_CHUNK_SIZE_ODD (1025*1025)
|
||||
#define MAXIMUM_CHUNKS (256*1024)
|
||||
// Subtest Macros
|
||||
#define NO_ALLOCATION_ONHOST 0
|
||||
#define ALLOCATE_ONHOST_HIPMALLOCMANAGED 1
|
||||
#define ALLOCATE_ONHOST_HIPMALLOC 2
|
||||
// Test Type Macros
|
||||
#define TEST_MALLOC_FREE 1
|
||||
#define TEST_NEW_DELETE 2
|
||||
// GPU threads
|
||||
#define BLOCKSIZE 512
|
||||
#define GRIDSIZE 512
|
||||
// Test parameters
|
||||
// Two different loops
|
||||
#define NUM_OF_LOOP_SINGLE_KER 100000
|
||||
#define NUM_OF_LOOP_MULTIPLE_KER 20000
|
||||
|
||||
// The following flag is defined for platforms (nvidia)
|
||||
// which honors device memory limit. For AMD this flag
|
||||
// is disabled and defect is raised.
|
||||
#if HT_NVIDIA
|
||||
#define HT_HONORS_DEVICEMEMORY_LIMIT
|
||||
#endif
|
||||
|
||||
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
|
||||
__device__ static char* dev_mem_glob[MAXIMUM_CHUNKS];
|
||||
#endif
|
||||
__device__ static int* dev_mem[GRIDSIZE];
|
||||
__device__ static int* dev_common_ptr;
|
||||
|
||||
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
|
||||
/**
|
||||
* This kernel checks kernel allocation of size more than available
|
||||
* memory.
|
||||
*/
|
||||
static __global__ void kerTestDynamicAllocNeg(int test_type,
|
||||
size_t perThreadSize,
|
||||
int *ret) {
|
||||
// Allocate
|
||||
char* ptr = nullptr;
|
||||
printf("Memory to allocate in GPU = %zu \n", perThreadSize);
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
ptr = reinterpret_cast<char*> (malloc(perThreadSize));
|
||||
} else {
|
||||
ptr = new char[perThreadSize];
|
||||
}
|
||||
printf("Allocation Done \n");
|
||||
if (ptr == nullptr) {
|
||||
printf("Allocation Failed. PASSED! \n");
|
||||
*ret = 0;
|
||||
return;
|
||||
} else {
|
||||
// Free memory
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
free(ptr);
|
||||
} else {
|
||||
delete[] ptr;
|
||||
}
|
||||
*ret = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel allocates memory till nullptr is returned.
|
||||
*/
|
||||
static __global__ void kerAllocTillExhaust(int test_type,
|
||||
size_t *total_allocated_mem,
|
||||
size_t mem_chunk_size) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate memory in thread 0 of block 0
|
||||
if (0 == myId) {
|
||||
for (int idx = 0; idx < MAXIMUM_CHUNKS; idx++) {
|
||||
dev_mem_glob[idx] = nullptr;
|
||||
}
|
||||
int idx = 0;
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
do {
|
||||
dev_mem_glob[idx] =
|
||||
reinterpret_cast<char*> (malloc(mem_chunk_size));
|
||||
if (idx >= MAXIMUM_CHUNKS) {
|
||||
break;
|
||||
}
|
||||
} while (dev_mem_glob[idx++] != nullptr);
|
||||
} else {
|
||||
do {
|
||||
dev_mem_glob[idx] =
|
||||
reinterpret_cast<char*> (new char[mem_chunk_size]);
|
||||
if (idx >= MAXIMUM_CHUNKS) {
|
||||
break;
|
||||
}
|
||||
} while (dev_mem_glob[idx++] != nullptr);
|
||||
}
|
||||
idx = 0;
|
||||
*total_allocated_mem = 0;
|
||||
while ((dev_mem_glob[idx] != nullptr) &&
|
||||
(idx < MAXIMUM_CHUNKS)) {
|
||||
*total_allocated_mem = *total_allocated_mem + mem_chunk_size;
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel deletes the memory.
|
||||
*/
|
||||
static __global__ void kerFreeAll(int test_type) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
if (0 == myId) {
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
int idx = 0;
|
||||
while (dev_mem_glob[idx] != nullptr) {
|
||||
free(dev_mem_glob[idx++]);
|
||||
if (idx >= MAXIMUM_CHUNKS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int idx = 0;
|
||||
while (dev_mem_glob[idx] != nullptr) {
|
||||
delete[] (dev_mem_glob[idx++]);
|
||||
if (idx >= MAXIMUM_CHUNKS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* This kernel allocates memory once in thread 0 of each block and
|
||||
* access this memory in all threads of the block. The memory is
|
||||
* finally deleted in last thread of each block.
|
||||
*/
|
||||
static __global__ void kerBlockLevelMemoryAllocation(int *outputBuf,
|
||||
int test_type) {
|
||||
int myThreadId = threadIdx.x, lastThreadId = (blockDim.x - 1);
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate memory in thread 0
|
||||
if (0 == myThreadId) {
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
dev_mem[blockIdx.x] =
|
||||
reinterpret_cast<int*> (malloc(blockDim.x*sizeof(int)));
|
||||
} else {
|
||||
dev_mem[blockIdx.x] =
|
||||
reinterpret_cast<int*> (new int[blockDim.x]);
|
||||
}
|
||||
}
|
||||
// All threads wait at this barrier
|
||||
__syncthreads();
|
||||
// Check allocated memory in all threads in block before access
|
||||
if (dev_mem[blockIdx.x] == nullptr) {
|
||||
printf("Device Allocation Failed in thread = %d \n", myId);
|
||||
return;
|
||||
}
|
||||
int *ptr = reinterpret_cast<int*> (dev_mem[blockIdx.x]);
|
||||
// Copy to buffer
|
||||
ptr[myThreadId] = myId;
|
||||
// All threads wait
|
||||
__syncthreads();
|
||||
// Copy memory to host and free the memory in thread <blockDim.x - 1>
|
||||
if (lastThreadId == myThreadId) {
|
||||
for (size_t idx = 0; idx < blockDim.x; idx++) {
|
||||
outputBuf[idx + blockDim.x * blockIdx.x] = ptr[idx];
|
||||
}
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
free(ptr);
|
||||
} else {
|
||||
delete[] ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel allocates memory in one thread.
|
||||
*/
|
||||
static __global__ void kerAlloc(int test_type) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate memory in thread 0 of block 0
|
||||
if (0 == myId) {
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
dev_common_ptr =
|
||||
reinterpret_cast<int*> (malloc(blockDim.x*gridDim.x*sizeof(int)));
|
||||
} else {
|
||||
dev_common_ptr =
|
||||
reinterpret_cast<int*> (new int[blockDim.x*gridDim.x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel writes to memory allocated in <kerAlloc>.
|
||||
*/
|
||||
static __global__ void kerWrite() {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Check allocated memory in all threads in block before access
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed in thread = %d \n", myId);
|
||||
return;
|
||||
}
|
||||
// Copy to buffer
|
||||
dev_common_ptr[myId] = myId;
|
||||
}
|
||||
|
||||
/**
|
||||
* This kernel copies the contents of memory allocated in <kerAlloc>
|
||||
* to host and deletes the memory from thread 0.
|
||||
*/
|
||||
static __global__ void kerFree(int *outputBuf, int test_type) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Check allocated memory in all threads in block before access
|
||||
if (dev_common_ptr == nullptr) {
|
||||
printf("Device Allocation Failed in thread = %d \n", myId);
|
||||
return;
|
||||
}
|
||||
if (0 == myId) {
|
||||
for (size_t idx = 0; idx < (blockDim.x*gridDim.x); idx++) {
|
||||
outputBuf[idx] = dev_common_ptr[idx];
|
||||
}
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
free(dev_common_ptr);
|
||||
} else {
|
||||
delete[] dev_common_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
|
||||
/**
|
||||
* Local function: Launch kerAllocTillExhaust<<<>>> and
|
||||
* kerFreeAll<<<>>> to test memory allocation till all device
|
||||
* memory is exhausted.
|
||||
*/
|
||||
static bool TestAllocationOfAllAvailableMemory(int test_type,
|
||||
int category, size_t mem_chunk_size) {
|
||||
size_t avail1 = 0, avail2 = 0, tot = 0;
|
||||
constexpr size_t host_alloc = 2147483648; // 2 GB
|
||||
HIP_CHECK(hipMemGetInfo(&avail1, &tot));
|
||||
#if HT_NVIDIA
|
||||
HIP_CHECK(hipDeviceSetLimit(hipLimitMallocHeapSize, avail1));
|
||||
#endif
|
||||
size_t *tot_alloc_mem_d = nullptr, *tot_alloc_mem_h = nullptr;
|
||||
tot_alloc_mem_h =
|
||||
reinterpret_cast<size_t*> (malloc(sizeof(size_t)));
|
||||
REQUIRE(nullptr != tot_alloc_mem_h);
|
||||
HIP_CHECK(hipMalloc(&tot_alloc_mem_d, sizeof(size_t)));
|
||||
REQUIRE(nullptr != tot_alloc_mem_d);
|
||||
char *devptrHost = nullptr;
|
||||
if (category == ALLOCATE_ONHOST_HIPMALLOCMANAGED) {
|
||||
HIP_CHECK(hipMallocManaged(&devptrHost, host_alloc));
|
||||
} else if (category == ALLOCATE_ONHOST_HIPMALLOC) {
|
||||
HIP_CHECK(hipMalloc(&devptrHost, host_alloc));
|
||||
}
|
||||
HIP_CHECK(hipMemGetInfo(&avail1, &tot));
|
||||
INFO("Total available memory " << tot);
|
||||
INFO("Available memory before allocation " << avail1);
|
||||
// Launch Test Kernel
|
||||
kerAllocTillExhaust<<<1, 1>>>(test_type, tot_alloc_mem_d,
|
||||
mem_chunk_size);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Copy to host buffer
|
||||
HIP_CHECK(hipMemcpy(tot_alloc_mem_h, tot_alloc_mem_d,
|
||||
sizeof(size_t), hipMemcpyDefault));
|
||||
HIP_CHECK(hipMemGetInfo(&avail2, &tot));
|
||||
kerFreeAll<<<1, 1>>>(test_type);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Copy to host buffer
|
||||
bool bPassed = false;
|
||||
INFO("Available memory after allocation " << avail2);
|
||||
if (category == NO_ALLOCATION_ONHOST) {
|
||||
size_t allocated_dev_mem = (tot - avail2);
|
||||
if (allocated_dev_mem >= *tot_alloc_mem_h) {
|
||||
bPassed = true;
|
||||
}
|
||||
} else if ((category == ALLOCATE_ONHOST_HIPMALLOCMANAGED) ||
|
||||
(category == ALLOCATE_ONHOST_HIPMALLOC)) {
|
||||
size_t allocated_dev_mem = (tot - avail2 - host_alloc);
|
||||
if (allocated_dev_mem >= *tot_alloc_mem_h) {
|
||||
bPassed = true;
|
||||
}
|
||||
hipFree(devptrHost);
|
||||
}
|
||||
hipFree(tot_alloc_mem_d);
|
||||
free(tot_alloc_mem_h);
|
||||
return bPassed;
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* Local function: Launch kerBlockLevelMemoryAllocation<<<>>>
|
||||
* in a loop to stress test allocation and deallocation.
|
||||
*/
|
||||
static bool TestMemoryAllocationInLoop(int test_type,
|
||||
bool isMultikernel = false) {
|
||||
int *outputVec_d{nullptr}, *outputVec_h{nullptr};
|
||||
int arraysize = (BLOCKSIZE * GRIDSIZE);
|
||||
outputVec_h = reinterpret_cast<int*> (malloc(sizeof(int) * arraysize));
|
||||
REQUIRE(outputVec_h != nullptr);
|
||||
HIP_CHECK(hipMalloc(&outputVec_d, (sizeof(int) * arraysize)));
|
||||
bool bPassed = true;
|
||||
// Launch Test Kernel
|
||||
int max_index = 0;
|
||||
if (isMultikernel) {
|
||||
max_index = NUM_OF_LOOP_MULTIPLE_KER;
|
||||
} else {
|
||||
max_index = NUM_OF_LOOP_SINGLE_KER;
|
||||
}
|
||||
for (int idx = 0; idx < max_index; idx++) {
|
||||
if (isMultikernel) {
|
||||
kerAlloc<<<GRIDSIZE, BLOCKSIZE>>>(test_type);
|
||||
kerWrite<<<GRIDSIZE, BLOCKSIZE>>>();
|
||||
kerFree<<<GRIDSIZE, BLOCKSIZE>>>(outputVec_d, test_type);
|
||||
} else {
|
||||
kerBlockLevelMemoryAllocation<<<GRIDSIZE, BLOCKSIZE>>>(outputVec_d,
|
||||
test_type);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Copy to host buffer
|
||||
HIP_CHECK(hipMemcpy(outputVec_h, outputVec_d, sizeof(int) * arraysize,
|
||||
hipMemcpyDefault));
|
||||
bPassed = true;
|
||||
for (int idx = 0; idx < arraysize; idx++) {
|
||||
if (outputVec_h[idx] != idx) {
|
||||
bPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bPassed) break;
|
||||
}
|
||||
hipFree(outputVec_d);
|
||||
free(outputVec_h);
|
||||
return bPassed;
|
||||
}
|
||||
|
||||
#ifdef HT_HONORS_DEVICEMEMORY_LIMIT
|
||||
/**
|
||||
* Scenario: Test malloc till nullptr is returned using even chunksize.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_malloc_Even") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
|
||||
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Test malloc till nullptr is returned using odd chunksize.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_malloc_Odd") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
|
||||
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE_ODD));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Test new till nullptr is returned using even chunksize.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_new_Even") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
|
||||
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Test new till nullptr is returned using odd chunksize.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_new_Odd") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
|
||||
NO_ALLOCATION_ONHOST, MEMORY_CHUNK_SIZE_ODD));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test checks device allocation using malloc till nullptr
|
||||
* is returned. Device memory is also allocated using hipmallocmanaged
|
||||
* from host.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_malloc_hipmallocmanaged") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
|
||||
ALLOCATE_ONHOST_HIPMALLOCMANAGED, MEMORY_CHUNK_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test checks device allocation using new till nullptr
|
||||
* is returned. Device memory is also allocated using hipmallocmanaged
|
||||
* from host.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_new_hipmallocmanaged") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
|
||||
ALLOCATE_ONHOST_HIPMALLOCMANAGED, MEMORY_CHUNK_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test checks device allocation using malloc till nullptr
|
||||
* is returned. Device memory is also allocated using hipmalloc from host.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_malloc_hipmalloc") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_MALLOC_FREE,
|
||||
ALLOCATE_ONHOST_HIPMALLOC, MEMORY_CHUNK_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test checks device allocation using new till nullptr
|
||||
* is returned. Device memory is also allocated using hipmalloc from host.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_new_hipmalloc") {
|
||||
REQUIRE(true == TestAllocationOfAllAvailableMemory(TEST_NEW_DELETE,
|
||||
ALLOCATE_ONHOST_HIPMALLOC, MEMORY_CHUNK_SIZE));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test validates device allocation negative scenario
|
||||
* when size > available memory.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_Negative") {
|
||||
int *ret_d{nullptr}, *ret_h{nullptr};
|
||||
size_t avail = 0, tot = 0;
|
||||
HIP_CHECK(hipMemGetInfo(&avail, &tot));
|
||||
printf("Available Memory in GPU = %zu \n", avail);
|
||||
ret_h = reinterpret_cast<int*> (malloc(sizeof(int)));
|
||||
REQUIRE(ret_h != nullptr);
|
||||
HIP_CHECK(hipMalloc(&ret_d, (sizeof(int))));
|
||||
SECTION("Test allocation with malloc") {
|
||||
kerTestDynamicAllocNeg<<<1, 1>>>(TEST_MALLOC_FREE, (avail + 1), ret_d);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(ret_h, ret_d, sizeof(int), hipMemcpyDefault));
|
||||
REQUIRE(0 == *ret_h);
|
||||
}
|
||||
|
||||
SECTION("Test allocation with new") {
|
||||
kerTestDynamicAllocNeg<<<1, 1>>>(TEST_NEW_DELETE, (avail + 1), ret_d);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(ret_h, ret_d, sizeof(int), hipMemcpyDefault));
|
||||
REQUIRE(0 == *ret_h);
|
||||
}
|
||||
hipFree(ret_d);
|
||||
free(ret_h);
|
||||
}
|
||||
#endif
|
||||
/**
|
||||
* Scenario: This test performs stress test of malloc/free in a loop
|
||||
* using single kernel.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_malloc_loop_singlekernel") {
|
||||
REQUIRE(true == TestMemoryAllocationInLoop(TEST_MALLOC_FREE, false));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test performs stress test of new/delete in a loop
|
||||
* using single kernel.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_new_loop_singlekernel") {
|
||||
REQUIRE(true == TestMemoryAllocationInLoop(TEST_NEW_DELETE, false));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test performs stress test of malloc/free in a loop
|
||||
* using multiple kernel.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_malloc_loop_multkernel") {
|
||||
REQUIRE(true == TestMemoryAllocationInLoop(TEST_MALLOC_FREE, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: This test performs stress test of new/delete in a loop
|
||||
* using multiple kernel.
|
||||
*/
|
||||
TEST_CASE("Stress_deviceAllocation_new_loop_multkernel") {
|
||||
REQUIRE(true == TestMemoryAllocationInLoop(TEST_NEW_DELETE, true));
|
||||
}
|
||||
@@ -13,6 +13,7 @@ set(TEST_SRC
|
||||
syncthreadsand.cc
|
||||
syncthreadscount.cc
|
||||
syncthreadsor.cc
|
||||
deviceAllocation.cc
|
||||
)
|
||||
|
||||
# AMD only tests
|
||||
@@ -43,6 +44,11 @@ set(AMD_ARCH_SPEC_TEST_SRC
|
||||
unsafeAtomicAdd_NonCoherent_withunsafeflag.cc
|
||||
)
|
||||
|
||||
add_custom_target(kerDevAllocMultCO.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevAllocMultCO.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevAllocMultCO.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
add_custom_target(kerDevWriteMultCO.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevWriteMultCO.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevWriteMultCO.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
add_custom_target(kerDevFreeMultCO.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevFreeMultCO.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevFreeMultCO.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
add_custom_target(kerDevAllocSingleKer.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/kerDevAllocSingleKer.cc -o ${CMAKE_CURRENT_BINARY_DIR}/../../unit/deviceLib/kerDevAllocSingleKer.code -I${CMAKE_CURRENT_SOURCE_DIR}/../../../../include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
if (DEFINED OFFLOAD_ARCH_STR)
|
||||
string(FIND ${OFFLOAD_ARCH_STR} "gfx90a" ARCH_CHECK)
|
||||
@@ -78,3 +84,4 @@ elseif(HIP_PLATFORM MATCHES "nvidia")
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS --Wno-deprecated-declarations)
|
||||
endif()
|
||||
add_dependencies(build_tests kerDevAllocMultCO.code kerDevWriteMultCO.code kerDevFreeMultCO.code kerDevAllocSingleKer.code)
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#define INTERNAL_BUFFER_SIZE 8
|
||||
// Test Type
|
||||
#define TEST_MALLOC_FREE 1
|
||||
#define TEST_NEW_DELETE 2
|
||||
// Kernel Params
|
||||
#define BLOCKSIZE 64
|
||||
#define GRIDSIZE 32
|
||||
// Code Obj
|
||||
#define DEV_ALLOC_SINGKER_COBJ "kerDevAllocSingleKer.code"
|
||||
#define DEV_ALLOC_SINGKER_COBJ_FUNC "ker_TestDynamicAllocInAllThreads_CodeObj"
|
||||
#define DEV_ALLOC_MULCOBJ "kerDevAllocMultCO.code"
|
||||
#define DEV_WRITE_MULCOBJ "kerDevWriteMultCO.code"
|
||||
#define DEV_FREE_MULCOBJ "kerDevFreeMultCO.code"
|
||||
#define DEV_ALLOC_MULCODEOBJ_ALLOC "ker_Alloc_MultCodeObj"
|
||||
#define DEV_ALLOC_MULCODEOBJ_WRITE "ker_Write_MultCodeObj"
|
||||
#define DEV_ALLOC_MULCODEOBJ_FREE "ker_Free_MultCodeObj"
|
||||
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <cfloat>
|
||||
#include <atomic>
|
||||
#include "./defs.h"
|
||||
|
||||
static __device__ int* deviceAlloc(int test_type);
|
||||
static __device__ void deviceWrite(int myId, int *devmem);
|
||||
static __device__ void deviceFree(int *outputBuf, int *devmem,
|
||||
int test_type, int myId);
|
||||
|
||||
/**
|
||||
* Allocation base and derived class to test dynamic allocation.
|
||||
*/
|
||||
class baseAlloc{
|
||||
public:
|
||||
virtual __device__ int* alloc(size_t size) = 0;
|
||||
virtual __device__ void free(int* ptr) = 0;
|
||||
};
|
||||
|
||||
class derivedAlloc: public baseAlloc{
|
||||
public:
|
||||
virtual __device__ int* alloc(size_t size) {
|
||||
return new int[size];
|
||||
}
|
||||
virtual __device__ void free(int* ptr) {
|
||||
delete ptr;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Allocation Structure to test dynamic allocation.
|
||||
*/
|
||||
struct deviceAllocFunc{
|
||||
int* (*alloc)(int);
|
||||
void (*write)(int, int*);
|
||||
void (*free)(int*, int*, int, int);
|
||||
};
|
||||
|
||||
/**
|
||||
* Simple Structure to test dynamic allocation.
|
||||
*/
|
||||
struct simpleStruct{
|
||||
int32_t i;
|
||||
double d;
|
||||
float f;
|
||||
int16_t s;
|
||||
char c;
|
||||
int32_t iarr[INTERNAL_BUFFER_SIZE];
|
||||
bool operator!=(const struct simpleStruct &inpStr) {
|
||||
if ((i != inpStr.i) || (d != inpStr.d) ||
|
||||
(f != inpStr.f) || (s != inpStr.s) || (c != inpStr.c)) {
|
||||
return true;
|
||||
}
|
||||
for (int32_t idx = 0; idx < INTERNAL_BUFFER_SIZE; idx++) {
|
||||
if (iarr[idx] != inpStr.iarr[idx]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Simple Structure containing thread information
|
||||
*/
|
||||
struct threadInfo{
|
||||
int threadid;
|
||||
int blockid;
|
||||
int32_t ival;
|
||||
double dval;
|
||||
float fval;
|
||||
int16_t sval;
|
||||
char cval;
|
||||
};
|
||||
|
||||
/**
|
||||
* C/C++ Union
|
||||
*/
|
||||
union testInfoUnion{
|
||||
int32_t ival;
|
||||
double dval;
|
||||
float fval;
|
||||
int16_t sval;
|
||||
char cval;
|
||||
};
|
||||
|
||||
/**
|
||||
* Complex (nested) Structure to test dynamic allocation using malloc.
|
||||
*/
|
||||
struct complexStructure{
|
||||
struct threadInfo *sthreadInfo;
|
||||
__device__ void alloc_internal_members(int test_type, size_t size) {
|
||||
sthreadInfo = nullptr;
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
sthreadInfo = reinterpret_cast<struct threadInfo*>(
|
||||
malloc(size*sizeof(struct threadInfo)));
|
||||
} else {
|
||||
sthreadInfo = new struct threadInfo[size];
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void free_internal_members(int test_type) {
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
free(sthreadInfo);
|
||||
} else {
|
||||
delete[] sthreadInfo;
|
||||
}
|
||||
sthreadInfo = nullptr;
|
||||
}
|
||||
};
|
||||
Diff onderdrukt omdat het te groot bestand
Laad Diff
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "./defs.h"
|
||||
|
||||
/**
|
||||
* This kernel allocates memory in thread 0.
|
||||
*/
|
||||
extern "C" __global__ void ker_Alloc_MultCodeObj(int **dev_mem,
|
||||
int test_type) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate memory in thread 0 of block 0
|
||||
if (0 == myId) {
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
*dev_mem =
|
||||
reinterpret_cast<int*> (malloc(blockDim.x*gridDim.x*sizeof(int)));
|
||||
} else {
|
||||
*dev_mem =
|
||||
reinterpret_cast<int*> (new int[blockDim.x*gridDim.x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "./defs.h"
|
||||
/**
|
||||
* This kernel allocates and deallocates memory in every thread.
|
||||
*/
|
||||
extern "C" __global__ void ker_TestDynamicAllocInAllThreads_CodeObj(
|
||||
int *outputBuf, int test_type, int value,
|
||||
size_t perThreadSize) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Allocate
|
||||
size_t size = 0;
|
||||
int* ptr = nullptr;
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
size = perThreadSize * sizeof(int);
|
||||
ptr = reinterpret_cast<int*> (malloc(size));
|
||||
} else {
|
||||
size = perThreadSize;
|
||||
ptr = new int[perThreadSize];
|
||||
}
|
||||
if (ptr == nullptr) {
|
||||
printf("Device Allocation in thread %d Failed! \n", myId);
|
||||
return;
|
||||
}
|
||||
// Set memory
|
||||
for (size_t idx = 0; idx < perThreadSize; idx++) {
|
||||
ptr[idx] = value;
|
||||
}
|
||||
// Copy to output buffer
|
||||
for (size_t idx = 0; idx < perThreadSize; idx++) {
|
||||
outputBuf[myId*perThreadSize + idx] = ptr[idx];
|
||||
}
|
||||
// Free memory
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
free(ptr);
|
||||
} else {
|
||||
delete[] ptr;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "./defs.h"
|
||||
|
||||
/**
|
||||
* This kernel copies the contents of memory allocated in
|
||||
* ker_Alloc_MultCodeObj<<<>>> to host and deletes the memory
|
||||
* from thread 0.
|
||||
*/
|
||||
extern "C" __global__ void ker_Free_MultCodeObj(int *outputBuf,
|
||||
int **dev_mem, int test_type) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Check allocated memory in all threads in block before access
|
||||
if (*dev_mem == nullptr) {
|
||||
printf("Device Allocation Failed in thread = %d \n", myId);
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 == myId) {
|
||||
for (size_t idx = 0; idx < (blockDim.x*gridDim.x); idx++) {
|
||||
outputBuf[idx] = (*dev_mem)[idx];
|
||||
}
|
||||
if (test_type == TEST_MALLOC_FREE) {
|
||||
free(*dev_mem);
|
||||
} else {
|
||||
delete[] (*dev_mem);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "./defs.h"
|
||||
|
||||
/**
|
||||
* This kernel writes to memory allocated in ker_Alloc_MultCodeObj<<<>>>.
|
||||
*/
|
||||
extern "C" __global__ void ker_Write_MultCodeObj(int **dev_mem,
|
||||
int value) {
|
||||
int myId = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
// Check allocated memory in all threads in block before access
|
||||
if (*dev_mem == nullptr) {
|
||||
printf("Device Allocation Failed in thread = %d \n", myId);
|
||||
return;
|
||||
}
|
||||
// Copy to buffer
|
||||
(*dev_mem)[myId] = value;
|
||||
}
|
||||
Verwijs in nieuw issue
Block a user