SWDEV-294643 - [catch2][dtest] Migration of Malloc related files to Catch2 (#2314)

Migrated malloc related files under memory folder into catch2 framework

Change-Id: I5aa07fc8148bdf6bef135947091aaf1d3c54663b

[ROCm/hip-tests commit: 4287af6a2f]
This commit is contained in:
dkrottap
2021-08-19 10:29:07 +05:30
committed by GitHub
parent 1edf1934d5
commit c088dceed2
16 changed files with 1551 additions and 84 deletions
@@ -9,6 +9,7 @@ set(LINUX_TEST_SRC
hipGetDeviceCountMproc.cc
hipGetDevicePropertiesMproc.cc
hipSetGetDeviceMproc.cc
hipIpcMemAccessTest.cc
)
if(UNIX)
@@ -0,0 +1,156 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the hipIpcMemAccess APIs by creating memory handle
in parent process and access it in child process.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#ifdef __linux__
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <semaphore.h>
#include <unistd.h>
typedef struct mem_handle {
int device;
hipIpcMemHandle_t memHandle;
bool IfTestPassed;
} hip_ipc_t;
// This testcase verifies the hipIpcMemAccess APIs as follows
// The following program spawns a child process and does the following
// Parent iterate through each device, create memory -- create hipIpcMemhandle
// stores the mem handle in mmaped memory, release the child using sem_post()
// and wait for child to release itself(parent process)
// child process:
// Child process get the ipc mem handle using hipIpcOpenMemHandle
// Iterate through all the available gpus and do Device to Device copies
// and check for data consistencies and close the hipIpcCloseMemHandle
// release the parent and wait for parent to release itself(child)
TEST_CASE("Unit_hipIpcMemAccess_Semaphores") {
hip_ipc_t *shrd_mem = NULL;
pid_t pid;
size_t N = 1024;
size_t Nbytes = N * sizeof(int);
int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr};
int *A_h{nullptr}, *C_h{nullptr};
sem_t *sem_ob1{nullptr}, *sem_ob2{nullptr};
int Num_devices = 0, CanAccessPeer = 0;
std::string cmd_line = "rm -rf /dev/shm/sem.my-sem-object*";
int res = system(cmd_line.c_str());
REQUIRE(res != -1);
sem_ob1 = sem_open("/my-sem-object1", O_CREAT|O_EXCL, 0660, 0);
sem_ob2 = sem_open("/my-sem-object2", O_CREAT|O_EXCL, 0660, 0);
REQUIRE(sem_ob1 != SEM_FAILED);
REQUIRE(sem_ob2 != SEM_FAILED);
shrd_mem = reinterpret_cast<hip_ipc_t *>(mmap(NULL, sizeof(hip_ipc_t),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS,
0, 0));
REQUIRE(shrd_mem != NULL);
shrd_mem->IfTestPassed = true;
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
&A_h, nullptr, &C_h, N, false);
pid = fork();
if (pid != 0) {
// Parent process
HIP_CHECK(hipGetDeviceCount(&Num_devices));
for (int i = 0; i < Num_devices; ++i) {
if (shrd_mem->IfTestPassed == true) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipIpcGetMemHandle(reinterpret_cast<hipIpcMemHandle_t *>
(&shrd_mem->memHandle),
A_d));
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
shrd_mem->device = i;
if ((sem_post(sem_ob1)) == -1) {
// Need to use inline function to release resources.
shrd_mem->IfTestPassed = false;
WARN("sem_post() call failed in parent process.");
}
if ((sem_wait(sem_ob2)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_wait() call failed in parent process.");
}
HIP_CHECK(hipFree(A_d));
}
}
} else {
// Child process
HIP_CHECK(hipGetDeviceCount(&Num_devices));
for (int j = 0; j < Num_devices; ++j) {
HIP_CHECK(hipSetDevice(j));
if ((sem_wait(sem_ob1)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_wait() call failed in child process.");
if ((sem_post(sem_ob2)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_post() call on sem_ob2 failed");
exit(1);
}
}
for (int i = 0; i < Num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast<void **>(&B_d),
shrd_mem->memHandle,
hipIpcMemLazyEnablePeerAccess));
HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, i, shrd_mem->device));
if (CanAccessPeer == 1) {
HIP_CHECK(hipMemcpy(C_d, B_d, Nbytes, hipMemcpyDeviceToDevice));
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkTest<int>(A_h, C_h, N);
memset(reinterpret_cast<void*>(C_h), 0, Nbytes);
// Checking if the data obtained from Ipc shared memory is consistent
HIP_CHECK(hipMemcpy(C_h, B_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkTest<int>(A_h, C_h, N);
}
HIP_CHECK(hipIpcCloseMemHandle(reinterpret_cast<void*>(B_d)));
HIP_CHECK(hipFree(C_d));
}
if ((sem_post(sem_ob2)) == -1) {
shrd_mem->IfTestPassed = false;
WARN("sem_post() call on sem_ob2 failed");
exit(1);
}
}
exit(0);
}
if ((sem_unlink("/my-sem-object1")) == -1) {
WARN("sem_unlink() call on /my-sem-object1 failed");
}
if ((sem_unlink("/my-sem-object2")) == -1) {
WARN("sem_unlink() call on /my-sem-object2 failed");
}
int rFlag = 0;
waitpid(pid, &rFlag, 0);
REQUIRE(shrd_mem->IfTestPassed == true);
}
#endif
@@ -1,4 +1,5 @@
# Common Tests - Test independent of all platforms
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
memset.cc
malloc.cc
@@ -24,8 +25,49 @@ set(TEST_SRC
hipMemcpyAllApiNegative.cc
hipMemcpyWithStreamMultiThread.cc
hipMemcpy_MultiThread.cc
hipHostMalloc.cc
hipHostRegister.cc
hipMemPtrGetInfo.cc
hipPointerGetAttributes.cc
hipHostGetFlags.cc
hipMemoryAllocateCoherent.cc
hipMallocManaged_MultiScenario.cc
hipManagedKeyword.cc
)
else()
set(TEST_SRC
memset.cc
malloc.cc
hipMemcpy2DToArray.cc
hipMemcpy2DToArrayAsync.cc
hipMemcpyPeer.cc
hipMemcpyPeerAsync.cc
hipMemcpy3D.cc
hipMemcpy3DAsync.cc
hipMemcpyParam2D.cc
hipMemcpyParam2DAsync.cc
hipMemcpy2D.cc
hipMemcpy2DAsync.cc
hipMemcpy2DFromArray.cc
hipMemcpy2DFromArrayAsync.cc
hipMemcpyAtoH.cc
hipMemcpyHtoA.cc
hipMemcpyDtoD.cc
hipMemcpyDtoDAsync.cc
hipMemcpyAsync.cc
hipMemcpy.cc
hipMemcpyWithStream.cc
hipMemcpyAllApiNegative.cc
hipMemcpyWithStreamMultiThread.cc
hipMemcpy_MultiThread.cc
hipHostMalloc.cc
hipHostRegister.cc
hipHostGetFlags.cc
hipMemoryAllocateCoherent.cc
hipMallocManaged_MultiScenario.cc
hipManagedKeyword.cc
)
endif()
# Create shared lib of all tests
add_library(MemoryTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
@@ -0,0 +1,94 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the basic scenario of hipHostGetFlags API
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
static constexpr auto LEN{1024*1024};
/*
This testcase verifies hipHostGetFlags API basic scenario
1. Allocates the memory using different flags
2. Gets the flags of the respective variable using
hipHostGetFlags API
3. Validates it with the initial flags used while allocating
memory
*/
TEMPLATE_TEST_CASE("Unit_hipHostGetFlags_Basic", "", int,
float, double) {
constexpr auto SIZE{LEN * sizeof(TestType)};
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr};
TestType *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr};
unsigned int FlagA, FlagB, FlagC;
FlagA = hipHostMallocWriteCombined | hipHostMallocMapped;
FlagB = hipHostMallocWriteCombined | hipHostMallocMapped;
FlagC = hipHostMallocMapped;
hipDeviceProp_t prop;
int device;
HIP_CHECK(hipGetDevice(&device));
HIP_CHECK(hipGetDeviceProperties(&prop, device));
if (prop.canMapHostMemory != 1) {
SUCCEED("Device Property canMapHostMemory is not set");
} else {
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_h), SIZE,
hipHostMallocWriteCombined | hipHostMallocMapped));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&B_h), SIZE,
hipHostMallocWriteCombined | hipHostMallocMapped));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C_h), SIZE,
hipHostMallocMapped));
unsigned int flagA, flagB, flagC;
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&A_d), A_h, 0));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&C_d), C_h, 0));
HIP_CHECK(hipHostGetFlags(&flagA, A_h));
HIP_CHECK(hipHostGetFlags(&flagB, B_h));
HIP_CHECK(hipHostGetFlags(&flagC, C_h));
HipTest::setDefaultData<TestType>(LEN, A_h, B_h, C_h);
dim3 dimGrid(LEN / 512, 1, 1);
dim3 dimBlock(512, 1, 1);
hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock,
0, 0, static_cast<const TestType*>(A_d),
static_cast<const TestType*>(B_d), C_d, LEN);
HIP_CHECK(hipMemcpy(C_h, C_d, SIZE, hipMemcpyDeviceToHost));
// Note this really HostToHost not
// DeviceToHost, since memory is mapped...
HipTest::checkVectorADD(A_h, B_h, C_h, LEN);
REQUIRE(flagA == FlagA);
REQUIRE(flagB == FlagB);
REQUIRE(flagC == FlagC);
HIP_CHECK(hipHostFree(A_h));
HIP_CHECK(hipHostFree(B_h));
HIP_CHECK(hipHostFree(C_h));
}
}
@@ -0,0 +1,236 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the following scenarios of hipHostMalloc API
1. Basic scenario of hipHostMalloc API
2. Negative Scenarios of hipHostMalloc API
3. Allocating memory using hipHostMalloc with Coherent flag
4. Allocating memory using hipHostMalloc with NonCoherent flag
5. Allocating memory using hipHostMalloc with default flag
*/
#include<hip_test_checkers.hh>
#include<hip_test_kernels.hh>
#include<hip_test_common.hh>
#define SYNC_EVENT 0
#define SYNC_STREAM 1
#define SYNC_DEVICE 2
std::vector<std::string> syncMsg = {"event", "stream", "device"};
static constexpr int numElements{1024 * 16};
static constexpr size_t sizeBytes{numElements * sizeof(int)};
__global__ void Set(int* Ad, int val) {
int tx = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tx] = val;
}
void CheckHostPointer(int numElements, int* ptr, unsigned eventFlags,
int syncMethod, std::string msg) {
std::cerr << "test: CheckHostPointer "
<< msg
<< " eventFlags = " << std::hex << eventFlags
<< ((eventFlags & hipEventReleaseToDevice) ?
" hipEventReleaseToDevice" : "")
<< ((eventFlags & hipEventReleaseToSystem) ?
" hipEventReleaseToSystem" : "")
<< " ptr=" << ptr << " syncMethod="
<< syncMsg[syncMethod] << "\n";
hipStream_t s;
hipEvent_t e;
// Init:
HIP_CHECK(hipStreamCreate(&s));
HIP_CHECK(hipEventCreateWithFlags(&e, eventFlags))
dim3 dimBlock(64, 1, 1);
dim3 dimGrid(numElements / dimBlock.x, 1, 1);
const int expected = 13;
// Init array to know state:
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, ptr, -42);
HIP_CHECK(hipDeviceSynchronize());
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, ptr, expected);
HIP_CHECK(hipEventRecord(e, s));
// Host waits for event :
switch (syncMethod) {
case SYNC_EVENT:
HIP_CHECK(hipEventSynchronize(e));
break;
case SYNC_STREAM:
HIP_CHECK(hipStreamSynchronize(s));
break;
case SYNC_DEVICE:
HIP_CHECK(hipDeviceSynchronize());
break;
default:
assert(0);
}
for (int i = 0; i < numElements; i++) {
if (ptr[i] != expected) {
printf("mismatch at %d: %d != %d\n", i, ptr[i], expected);
REQUIRE(ptr[i] == expected);
}
}
HIP_CHECK(hipStreamDestroy(s));
HIP_CHECK(hipEventDestroy(e));
}
/*
This testcase performs the basic scenario of hipHostMalloc API
Allocates the memory using hipHostMalloc API
Launches the kernel and performs vector addition.
validates thes result.
*/
TEST_CASE("Unit_hipHostMalloc_Basic") {
static constexpr auto LEN{1024 * 1024};
static constexpr auto SIZE{LEN * sizeof(float)};
hipDeviceProp_t prop;
int device;
HIP_CHECK(hipGetDevice(&device));
HIP_CHECK(hipGetDeviceProperties(&prop, device));
if (prop.canMapHostMemory != 1) {
SUCCEED("Does support HostPinned Memory");
} else {
float *A_h, *B_h, *C_h;
float *A_d, *B_d, *C_d;
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_h), SIZE,
hipHostMallocWriteCombined | hipHostMallocMapped));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&B_h), SIZE,
hipHostMallocDefault));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C_h), SIZE,
hipHostMallocMapped));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&A_d), A_h, 0));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&C_d), C_h, 0));
HipTest::setDefaultData<float>(LEN, A_h, B_h, C_h);
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_d), SIZE));
HIP_CHECK(hipMemcpy(B_d, B_h, SIZE, hipMemcpyHostToDevice));
dim3 dimGrid(LEN / 512, 1, 1);
dim3 dimBlock(512, 1, 1);
hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock,
0, 0, static_cast<const float*>(A_d),
static_cast<const float*>(B_d), C_d, LEN);
HIP_CHECK(hipMemcpy(C_h, C_d, LEN*sizeof(float),
hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD<float>(A_h, B_h, C_h, numElements);
HIP_CHECK(hipHostFree(A_h));
HIP_CHECK(hipHostFree(B_h));
HIP_CHECK(hipHostFree(C_h));
}
}
/*
This testcase verifies the hipHostMalloc API by passing nullptr
to the pointer variable
*/
TEST_CASE("Unit_hipHostMalloc_Negative") {
#if HT_AMD
{
// Stimulate error condition:
int* A = nullptr;
REQUIRE(hipHostMalloc(reinterpret_cast<void**>(&A), sizeBytes,
hipHostMallocCoherent | hipHostMallocNonCoherent)
!= hipSuccess);
REQUIRE(A == nullptr);
}
#endif
}
/*
This testcase verifies the hipHostMalloc API by
1.Allocating memory using noncoherent flag
2. Launches the kernel and modifies the variable
using different synchronization
techniquies
3. validates the result.
*/
TEST_CASE("Unit_hipHostMalloc_NonCoherent") {
int* A = nullptr;
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A),
sizeBytes, hipHostMallocNonCoherent));
const char* ptrType = "non-coherent";
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
SYNC_EVENT, ptrType);
}
/*
This testcase verifies the hipHostMalloc API by
1.Allocating memory using coherent flag
2. Launches the kernel and modifies the variable
using different synchronization
techniquies
3. validates the result.
*/
TEST_CASE("Unit_hipHostMalloc_Coherent") {
int* A = nullptr;
if (hipHostMalloc(reinterpret_cast<void**>(&A), sizeBytes,
hipHostMallocCoherent) == hipSuccess) {
const char* ptrType = "coherent";
CheckHostPointer(numElements, A, hipEventReleaseToDevice,
SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToDevice,
SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToDevice,
SYNC_EVENT, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
SYNC_EVENT, ptrType);
} else {
SUCCEED("Coherence memory allocation failed. Is SVM atomic supported?");
}
}
/*
This testcase verifies the hipHostMalloc API by
1.Allocating memory using default flag
2. Launches the kernel and modifies the variable
using different synchronization
techniquies
3. validates the result.
*/
TEST_CASE("Unit_hipHostMalloc_Default") {
int* A = nullptr;
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A), sizeBytes));
const char* ptrType = "default";
CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType);
}
@@ -0,0 +1,160 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the following scenarios of hipHostRegister API
1. Referencing the hipHostRegister variable from kernel and performing
memset on that variable.This is verified for different datatypes.
2. hipHostRegister and perform hipMemcpy on it.
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#include <hip_test_kernels.hh>
#define OFFSET 128
static constexpr auto LEN{1024*1024};
template<typename T>
__global__ void Inc(T* Ad) {
int tx = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tx] = Ad[tx] + static_cast<T>(1);
}
template <typename T>
void doMemCopy(size_t numElements, int offset, T* A, T* Bh, T* Bd,
bool internalRegister) {
constexpr auto memsetval = 13.0f;
A = A + offset;
numElements -= offset;
size_t sizeBytes = numElements * sizeof(T);
if (internalRegister) {
HIP_CHECK(hipHostRegister(A, sizeBytes, 0));
}
// Reset
for (size_t i = 0; i < numElements; i++) {
A[i] = static_cast<float>(i);
Bh[i] = 0.0f;
}
HIP_CHECK(hipMemset(Bd, memsetval, sizeBytes));
HIP_CHECK(hipMemcpy(Bd, A, sizeBytes, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bh, Bd, sizeBytes, hipMemcpyDeviceToHost));
// Make sure the copy worked
for (size_t i = 0; i < numElements; i++) {
REQUIRE(Bh[i] == A[i]);
}
if (internalRegister) {
HIP_CHECK(hipHostUnregister(A));
}
}
/*
This testcase verifies the hipHostRegister API by
1. Allocating the memory using malloc
2. hipHostRegister that variable
3. Getting the corresponding device pointer of the registered varible
4. Launching kernel and access the device pointer variable
5. performing hipMemset on the device pointer variable
*/
TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset",
"", int,
float, double) {
size_t sizeBytes{LEN * sizeof(TestType)};
TestType *A, **Ad;
int num_devices;
HIP_CHECK(hipGetDeviceCount(&num_devices));
Ad = new TestType*[num_devices];
A = reinterpret_cast<TestType*>(malloc(sizeBytes));
HIP_CHECK(hipHostRegister(A, sizeBytes, 0));
for (int i = 0; i < LEN; i++) {
A[i] = static_cast<TestType>(1);
}
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&Ad[i]), A, 0));
}
// Reference the registered device pointer Ad from inside the kernel:
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
hipLaunchKernelGGL(Inc, dim3(LEN / 512), dim3(512), 0, 0, Ad[i]);
HIP_CHECK(hipDeviceSynchronize());
}
REQUIRE(A[10] == 1 + static_cast<TestType>(num_devices));
// Reference the registered device pointer Ad in hipMemset:
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipMemset(Ad[i], 0, sizeBytes));
}
REQUIRE(A[10] == 0);
HIP_CHECK(hipHostUnregister(A));
free(A);
delete [] Ad;
}
/*
This testcase verifies hipHostRegister API by
performing memcpy on the hipHostRegistered variable.
*/
TEMPLATE_TEST_CASE("Unit_hipHostRegister_Memcpy", "",
int, float, double) {
// 1 refers to hipHostRegister
// 0 refers to malloc
auto mem_type = GENERATE(0, 1);
HIP_CHECK(hipSetDevice(0));
size_t sizeBytes = LEN * sizeof(TestType);
TestType* A = reinterpret_cast<TestType*>(malloc(sizeBytes));
// Copy to B, this should be optimal pinned malloc copy:
// Note we are using the host pointer here:
TestType *Bh, *Bd;
Bh = reinterpret_cast<TestType*>(malloc(sizeBytes));
HIP_CHECK(hipMalloc(&Bd, sizeBytes));
REQUIRE(LEN > OFFSET);
if (mem_type) {
for (size_t i = 0; i < OFFSET; i++) {
doMemCopy<TestType>(LEN, i, A, Bh, Bd, true /*internalRegister*/);
}
} else {
HIP_CHECK(hipHostRegister(A, sizeBytes, 0));
for (size_t i = 0; i < OFFSET; i++) {
doMemCopy<TestType>(LEN, i, A, Bh, Bd, false /*internalRegister*/);
}
HIP_CHECK(hipHostUnregister(A));
}
free(A);
free(Bh);
hipFree(Bd);
}
@@ -0,0 +1,323 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the hipMallocManaged API in the following scenarios
1. MultiChunkSingleDevice Scenario
2. MultiChunkMultiDevice Scenario
3. Negative Scenarios
4. OverSubscription scenario
5. Device context change
6. Multiple Pointers
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
#include <atomic>
const size_t MAX_GPU{256};
static size_t N{4*1024*1024};
#define INIT_VAL 123
/*
* Kernel function to perform addition operation.
*/
template <typename T>
__global__ void
vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) {
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = offset; i < NUM_ELMTS; i += stride) {
Ad2[i] = Ad1[i] + Ad1[i];
}
}
// The following Test case tests the following scenario:
// A large chunk of hipMallocManaged() memory(Hmm) is created
// Equal parts of Hmm is accessed and
// kernel is launched on acessed chunk of hmm memory
// and checks if there are any inconsistencies or access issues
TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") {
std::atomic<int> DataMismatch{0};
constexpr int Chunks = 4;
int Counter = 0;
int NUM_ELMS = (1024 * 1024);
float *Ad[Chunks], *Hmm = nullptr, *Ah = new float[NUM_ELMS];
hipStream_t stream[Chunks];
for (int i = 0; i < Chunks; ++i) {
HIP_CHECK(hipMalloc(&Ad[i], NUM_ELMS * sizeof(float)));
HIP_CHECK(hipMemset(Ad[i], 0, NUM_ELMS * sizeof(float)));
HIP_CHECK(hipStreamCreate(&stream[i]));
}
HIP_CHECK(hipMallocManaged(&Hmm, (Chunks * NUM_ELMS * sizeof(float))));
for (int i = 0; i < Chunks; ++i) {
for (; Counter < ((i + 1) * NUM_ELMS); ++Counter) {
Hmm[Counter] = (INIT_VAL + i);
}
}
const unsigned threadsPerBlock = 256;
const unsigned blocks = (NUM_ELMS + 255)/256;
for (int k = 0; k < Chunks; ++k) {
vector_sum<float> <<<blocks, threadsPerBlock, 0, stream[k]>>>
(&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS);
}
HIP_CHECK(hipDeviceSynchronize());
for (int m = 0; m < Chunks; ++m) {
HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float),
hipMemcpyDeviceToHost));
for (int n = 0; n < NUM_ELMS; ++n) {
if (Ah[n] != ((INIT_VAL + m) * 2)) {
DataMismatch++;
}
}
}
REQUIRE(DataMismatch.load() == 0);
for (int i = 0; i < Chunks; ++i) {
HIP_CHECK(hipFree(Ad[i]));
HIP_CHECK(hipStreamDestroy(stream[i]));
}
HIP_CHECK(hipFree(Hmm));
delete [] Ah;
}
// The following Test case tests the following scenario:
// A large chunk of hipMallocManaged() memory(Hmm) is created
// Equal parts of Hmm is accessed on available gpus and
// kernel is launched on acessed chunk of hmm memory
// and checks if there are any inconsistencies or access issues
TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
std::atomic<int> DataMismatch{0};
int Counter = 0;
int NumDevices = 0;
HIP_CHECK(hipGetDeviceCount(&NumDevices));
unsigned int NUM_ELMS = (1024 * 1024);
float *Ad[MAX_GPU], *Hmm = NULL, *Ah = new float[NUM_ELMS];
hipStream_t stream[MAX_GPU];
for (int Oloop = 0; Oloop < NumDevices; ++Oloop) {
HIP_CHECK(hipSetDevice(Oloop));
HIP_CHECK(hipMalloc(&Ad[Oloop], NUM_ELMS * sizeof(float)));
HIP_CHECK(hipMemset(Ad[Oloop], 0, NUM_ELMS * sizeof(float)));
HIP_CHECK(hipStreamCreate(&stream[Oloop]));
}
HIP_CHECK(hipMallocManaged(&Hmm, (NumDevices * NUM_ELMS * sizeof(float))));
for (int i = 0; i < NumDevices; ++i) {
for (; Counter < static_cast<int>((i + 1) * NUM_ELMS); ++Counter) {
Hmm[Counter] = INIT_VAL + i;
}
}
const unsigned threadsPerBlock = 256;
const unsigned blocks = (NUM_ELMS + 255)/256;
for (int Klaunch = 0; Klaunch < NumDevices; ++Klaunch) {
HIP_CHECK(hipSetDevice(Klaunch));
vector_sum<float> <<<blocks, threadsPerBlock, 0, stream[Klaunch]>>>
(&Hmm[Klaunch * NUM_ELMS], Ad[Klaunch], NUM_ELMS);
}
HIP_CHECK(hipDeviceSynchronize());
for (int m = 0; m < NumDevices; ++m) {
HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float),
hipMemcpyDeviceToHost));
for (size_t n = 0; n < NUM_ELMS; ++n) {
if (Ah[n] != ((INIT_VAL + m) * 2)) {
DataMismatch++;
}
}
memset(reinterpret_cast<void*>(Ah), 0, NUM_ELMS * sizeof(float));
}
REQUIRE(DataMismatch.load() == 0);
for (int i = 0; i < NumDevices; ++i) {
HIP_CHECK(hipFree(Ad[i]));
HIP_CHECK(hipStreamDestroy(stream[i]));
}
HIP_CHECK(hipFree(Hmm));
delete [] Ah;
}
// The following tests oversubscription hipMallocManaged() api
// Currently disabled.
TEST_CASE("Unit_hipMallocManaged_OverSubscription") {
void *A = nullptr;
size_t total = 0, free = 0;
HIP_CHECK(hipMemGetInfo(&free, &total));
// ToDo: In case of HMM, memory over-subscription is allowed. Hence, relook
// into how out of memory can be tested.
// Demanding more mem size than available
#if HT_AMD
REQUIRE(hipMallocManaged(&A, (free +1), hipMemAttachGlobal) != hipSuccess);
#endif
}
// The following test does negative testing of hipMallocManaged() api
// by passing invalid values and check if the behavior is as expected
TEST_CASE("Unit_hipMallocManaged_Negative") {
void *A;
size_t total = 0, free = 0;
HIP_CHECK(hipMemGetInfo(&free, &total));
SECTION("Nullptr to devPtr") {
REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachGlobal) != hipSuccess);
}
// cuda api doc says : If size is 0, cudaMallocManaged returns
// cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns
// success and contradicts with api doc.
// With size(0), api expected to return error code (or)
// reset ptr while returning success (to accommodate cuda 11.2 api behavior).
SECTION("size 0 with flag hipMemAttachGlobal") {
#if HT_AMD
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachGlobal) != hipSuccess);
#else
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess);
#endif
}
SECTION("devptr is nullptr with flag hipMemAttachHost") {
REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachHost) != hipSuccess);
}
// cuda api doc says : If size is 0, cudaMallocManaged returns
// cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns
// success and contradicts with api doc.
// With size(0), api expected to return error code (or)
// reset ptr while returning success (to accommodate cuda 11.2 api behavior).
SECTION("size 0 with flag hipMemAttachHost") {
#if HT_AMD
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) != hipSuccess);
#else
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess);
#endif
}
SECTION("nullptr to devptr, size 0 and flag 0") {
REQUIRE(hipMallocManaged(NULL, 0, 0) != hipSuccess);
}
SECTION("Numeric value to flag parameter") {
REQUIRE(hipMallocManaged(&A, 1024, 145) != hipSuccess);
}
SECTION("Negative value to size") {
REQUIRE(hipMallocManaged(&A, -10, hipMemAttachGlobal));
}
}
// Allocate two pointers using hipMallocManaged(), initialize,
// then launch kernel using these pointers directly and
// later validate the content without using any Memcpy.
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "",
int, float, double) {
int NumDevices = 0;
HIP_CHECK(hipGetDeviceCount(&NumDevices));
TestType *Hmm1 = nullptr, *Hmm2 = nullptr;
for (int i = 0; i < NumDevices; ++i) {
HIP_CHECK(hipSetDevice(i));
std::atomic<int> DataMismatch{0};
HIP_CHECK(hipMallocManaged(&Hmm1, N * sizeof(TestType)));
HIP_CHECK(hipMallocManaged(&Hmm2, N * sizeof(TestType)));
for (size_t m = 0; m < N; ++m) {
Hmm1[m] = m;
Hmm2[m] = 0;
}
const unsigned threadsPerBlock = 256;
const unsigned blocks = (N + 255)/256;
// Kernel launch
vector_sum <<<blocks, threadsPerBlock>>> (Hmm1, Hmm2, N);
HIP_CHECK(hipDeviceSynchronize());
for (size_t v = 0; v < N; ++v) {
if (Hmm2[v] != static_cast<TestType>(v + v)) {
DataMismatch++;
}
}
REQUIRE(DataMismatch.load() == 0);
HIP_CHECK(hipFree(Hmm1));
HIP_CHECK(hipFree(Hmm2));
}
}
// In the following test, a memory is created using hipMallocManaged() by
// setting a device and verified if it is accessible when the context is set
// to all other devices. This include verification and Device two Device
// transfers and kernel launch o discover if there any access issues.
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "",
unsigned char, int, float, double) {
std::atomic<unsigned int> DataMismatch;
TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr,
*Hmm = nullptr;
int NumDevices = 0;
HIP_CHECK(hipGetDeviceCount(&NumDevices));
for (size_t i =0; i < N; ++i) {
Ah1[i] = INIT_VAL;
Ah2[i] = 0;
}
for (int Oloop = 0; Oloop < NumDevices; ++Oloop) {
DataMismatch = 0;
HIP_CHECK(hipSetDevice(Oloop));
HIP_CHECK(hipMallocManaged(&Hmm, N * sizeof(TestType)));
for (int Iloop = 0; Iloop < NumDevices; ++Iloop) {
HIP_CHECK(hipSetDevice(Iloop));
HIP_CHECK(hipMalloc(&Ad, N * sizeof(TestType)));
// Copy data from host to hipMallocMananged memory and verify
HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType),
hipMemcpyHostToDevice));
for (size_t v = 0; v < N; ++v) {
if (Hmm[v] != INIT_VAL) {
DataMismatch++;
}
}
REQUIRE(DataMismatch.load() == 0);
// Executing D2D transfer with hipMallocManaged memory and verify
HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType),
hipMemcpyDeviceToDevice));
HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType),
hipMemcpyDeviceToHost));
for (size_t k = 0; k < N; ++k) {
if (Ah2[k] != INIT_VAL) {
DataMismatch++;
}
}
REQUIRE(DataMismatch.load() == 0);
HIP_CHECK(hipMemset(Ad, 0, N * sizeof(TestType)));
const unsigned threadsPerBlock = 256;
const unsigned blocks = (N + 255)/256;
// Launching the kernel to check if there is any access issue with
// hipMallocManaged memory and local device's memory
vector_sum <<<blocks, threadsPerBlock>>> (Hmm, Ad, N);
hipDeviceSynchronize();
HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType),
hipMemcpyDeviceToHost));
for (size_t m = 0; m < N; ++m) {
if (Ah2[m] != 246) {
DataMismatch++;
}
}
REQUIRE(DataMismatch.load() == 0);
HIP_CHECK(hipFree(Ad));
}
HIP_CHECK(hipFree(Hmm));
}
free(Ah1);
free(Ah2);
}
@@ -0,0 +1,75 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the hipManagedKeyword basic scenario
*/
#include <hip_test_common.hh>
#include <hip_test_checkers.hh>
#define N 1048576
__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!!
__managed__ float B[N];
__managed__ int x = 0;
__global__ void add(const float *A, float *B) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < N; i += stride)
B[i] = A[i] + B[i];
}
__global__ void GPU_func() {
x++;
}
TEST_CASE("Unit_hipManagedKeyword_SingleGpu") {
for (int i = 0; i < N; i++) {
A[i] = 1.0f;
B[i] = 2.0f;
}
int blockSize = 256;
int numBlocks = (N + blockSize - 1) / blockSize;
dim3 dimGrid(numBlocks, 1, 1);
dim3 dimBlock(blockSize, 1, 1);
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast<const float*>(A),
static_cast<float*>(B));
hipDeviceSynchronize();
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = fmax(maxError, fabs(B[i]-3.0f));
REQUIRE(maxError == 0.0f);
}
TEST_CASE("Unit_hipManagedKeyword_MultiGpu") {
int numDevices = 0;
hipGetDeviceCount(&numDevices);
for (int i = 0; i < numDevices; i++) {
hipSetDevice(i);
GPU_func<<< 1, 1 >>>();
hipDeviceSynchronize();
}
REQUIRE(x == numDevices);
}
@@ -0,0 +1,54 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testfile verifies the basic scenario of hipMemPtrGetInfo API
*/
#include <hip_test_common.hh>
struct MemInfo{
float a;
int b;
void* c;
};
/*
This testcase verifies the basic scenario of
hipMemPtrGetInfo API
1. Allocates specific size of memory for the variables
2. Gets the allocated size of that variable using hipMemPtrGetInfo API
3. Validates the initial size and allocated size
*/
TEST_CASE("Unit_hipMemPtrGetInfo_Basic") {
int* iPtr;
float* fPtr;
MemInfo* sPtr;
size_t sSetSize = 1024, sGetSize;
HIP_CHECK(hipMalloc(&iPtr, sSetSize));
HIP_CHECK(hipMalloc(&fPtr, sSetSize));
HIP_CHECK(hipMalloc(&sPtr, sSetSize));
HIP_CHECK(hipMemPtrGetInfo(iPtr, &sGetSize));
REQUIRE(sGetSize == sSetSize);
HIP_CHECK(hipMemPtrGetInfo(fPtr, &sGetSize));
REQUIRE(sGetSize == sSetSize);
HIP_CHECK(hipMemPtrGetInfo(sPtr, &sGetSize));
REQUIRE(sGetSize == sSetSize);
}
@@ -512,23 +512,6 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_H2H-H2D-D2H-H2PinMem", "", int,
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr, A_Ph,
B_Ph, nullptr, true);
}
/*
This testcase verfies the boundary checks of hipMemcpy API for different sizes
*/
TEST_CASE("Unit_hipMemcpy_BoundaryCheck") {
size_t maxElem = 32 * 1024 * 1024;
DeviceMemory<float> memD(maxElem);
HostMemory<float> memU(maxElem, 0 /*usePinnedHost*/);
HostMemory<float> memP(maxElem, 0 /*usePinnedHost*/);
memcpytest2<float>(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0);
auto sizes = GENERATE(15 * 1024 * 1024, 16 * 1024 * 1024,
16 * 1024 * 1024 + 16 * 1024,
16 * 1024 * 1024 + 512 * 1024,
17 * 1024 * 1024 + 1024,
32 * 1024 * 1024);
memcpytest2<float>(&memD, &memP, sizes, 0, 0, 0);
}
/*
This testcase verifies the multi thread scenario
*/
@@ -543,16 +526,6 @@ TEST_CASE("Unit_hipMemcpy_MultiThreadWithSerialization") {
multiThread_1<float>(true, false);
}
/*
This testcase verifies the device offsets
*/
TEMPLATE_TEST_CASE("Unit_hipMemcpy_DeviceOffsets", "", float, double) {
HIP_CHECK(hipDeviceReset());
size_t maxSize = 256 * 1024;
memcpytest2_offsets<TestType>(maxSize, true, false);
memcpytest2_offsets<TestType>(maxSize, false, true);
}
/*
This testcase verifies hipMemcpy API with pinnedMemory and hostRegister
along with kernel launches
@@ -71,13 +71,14 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
&pitch_E, width, NUM_H));
// Initalizing A_d with C_h
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
NUM_W*sizeof(TestType), NUM_H,
hipMemcpyHostToDevice));
HIP_CHECK(hipSetDevice(1));
hipStream_t stream;
hipStreamCreate(&stream);
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width,
NUM_W*sizeof(TestType), NUM_H,
hipMemcpyHostToDevice, stream));
HIP_CHECK(hipStreamSynchronize(stream));
// Device to Device
hip_Memcpy2D desc = {};
#ifdef __HIP_PLATFORM_NVCC__
@@ -33,7 +33,6 @@ This testfile verifies the following scenarios
7. hipMemcpyWithStream API with testkind TestkindDefault
8. hipMemcpyWithStream API with testkind TestkindDefaultForDtoD
9. hipMemcpyWithStream API DtoD on same device
10.Multi threaded scenario
*/
@@ -540,55 +539,6 @@ void TestkindHtoH(void) {
HIP_CHECK(hipStreamDestroy(stream));
}
TEST_CASE("Unit_hipMemcpyWithStream_MultiThread") {
size_t thread_count = 10;
std::vector<joinable_thread> threads;
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
SUCCEED("deviceCount < 2");
} else {
for (int op = static_cast<int>(ops::TestwithOnestream);
op < static_cast<int>(ops::END_OF_LIST); ++op) {
for (uint32_t i = 0; i < thread_count; i++) {
threads.emplace_back(std::thread{[&] {
switch ( op ) {
case static_cast<int>(ops::TestwithOnestream):
TestwithOnestream();
break;
case static_cast<int>(ops::TestwithTwoStream):
TestwithTwoStream();
break;
case static_cast<int>(ops::TestkindDtoH):
TestkindDtoH();
break;
case static_cast<int>(ops::TestkindHtoH):
TestkindHtoH();
break;
case static_cast<int>(ops::TestkindDtoD):
TestkindDtoD();
break;
case static_cast<int>(ops::TestOnMultiGPUwithOneStream):
TestOnMultiGPUwithOneStream();
break;
case static_cast<int>(ops::TestkindDefault):
TestkindDefault();
break;
#ifndef __HIP_PLATFORM_NVCC__
case static_cast<int>(ops::TestkindDefaultForDtoD):
TestkindDefaultForDtoD();
break;
#endif
case static_cast<int>(ops::TestDtoDonSameDevice):
TestDtoDonSameDevice();
break;
default:{}
}
}});
}
}
}
}
TEST_CASE("Unit_hipMemcpyWithStream_TestWithOneStream") {
TestwithOnestream();
@@ -610,8 +610,8 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(bool &val_res) {
HIPCHECK(hipStreamDestroy(stream));
}
TEST_CASE("Unit_hipMemcpyWithStream_NewMultiThread") {
const auto Threadcount{100};
TEST_CASE("Unit_hipMemcpyWithStream_MultiThread") {
const auto Threadcount{10};
bool ret_val[Threadcount];
std::thread th[Threadcount];
for (int op = static_cast<int>(ops::TestwithOnestream);
@@ -27,7 +27,7 @@ This testfile verifies the following scenarios of all hipMemcpy API
1. Multi thread
*/
static constexpr auto NUM_ELM{1024};
static constexpr auto NUM_THREADS{10};
static constexpr auto NUM_THREADS{5};
static auto Available_Gpus{0};
static constexpr auto MAX_GPU{256};
@@ -0,0 +1,64 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
This testcase verifies the following scenario
1. Allocating the memory and modifying it coherently
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <hip_test_checkers.hh>
constexpr auto wait_sec = 5000;
__global__ void Kernel(float* hostRes, int clkRate) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
hostRes[tid] = tid + 1;
__threadfence_system();
// expecting that the data is getting flushed to host here!
uint64_t start = clock64()/clkRate, cur;
if (clkRate > 1) {
do { cur = clock64()/clkRate-start;}while (cur < wait_sec);
} else {
do { cur = clock64()/start;}while (cur < wait_sec);
}
}
TEST_CASE("Unit_hipHostMalloc_CoherentAccess") {
int blocks = 2;
float* hostRes;
hipHostMalloc(&hostRes, blocks * sizeof(float),
hipHostMallocMapped);
hostRes[0] = 0;
hostRes[1] = 0;
int clkRate;
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
std::cout << clkRate << std::endl;
hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks),
0, 0, hostRes, clkRate);
int eleCounter = 0;
while (eleCounter < blocks) {
// blocks until the value changes
while (hostRes[eleCounter] == 0) {printf("waiting for counter inc\n");}
eleCounter++;
}
hipHostFree(reinterpret_cast<void *>(hostRes));
}
@@ -0,0 +1,338 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/*
Following scenarios are verified for hipPointerGetAttributes API
1. Run through a couple simple cases to test lookups host pointer arithmetic
2. Allocates memory across all devices withing the specified size range
3. Allocates tiny memory across all devices
4. Multi-threaded test with many simul allocs.
*/
#include<hip_test_common.hh>
#include <vector>
#include <iostream>
#include <string>
size_t Nbytes = 0;
constexpr size_t N{1000000};
//=================================================================================================
// Utility Functions:
//=================================================================================================
bool operator==(const hipPointerAttribute_t& lhs,
const hipPointerAttribute_t& rhs) {
return ((lhs.hostPointer == rhs.hostPointer) &&
(lhs.devicePointer == rhs.devicePointer) &&
(lhs.memoryType == rhs.memoryType) && (lhs.device == rhs.device) &&
(lhs.allocationFlags == rhs.allocationFlags));
}
bool operator!=(const hipPointerAttribute_t& lhs,
const hipPointerAttribute_t& rhs) {
return !(lhs == rhs);
}
const char* memoryTypeToString(hipMemoryType memoryType) {
switch (memoryType) {
case hipMemoryTypeHost:
return "[Host]";
case hipMemoryTypeDevice:
return "[Device]";
default:
return "[Unknown]";
}
}
void resetAttribs(hipPointerAttribute_t* attribs) {
attribs->hostPointer = reinterpret_cast<void*>(-1);
attribs->devicePointer = reinterpret_cast<void*>(-1);
attribs->memoryType = hipMemoryTypeHost;
attribs->device = -2;
attribs->isManaged = -1;
attribs->allocationFlags = 0xffff;
}
void printAttribs(const hipPointerAttribute_t* attribs) {
printf(
"hostPointer:%p devicePointer:%p memType:%s deviceId:%d isManaged:%d "
"allocationFlags:%u\n",
attribs->hostPointer, attribs->devicePointer,
memoryTypeToString(attribs->memoryType),
attribs->device, attribs->isManaged, attribs->allocationFlags);
}
inline int zrand(int max) { return rand() % max; }
// Store the hipPointer attrib and some extra info
// so can later compare the looked-up info against
// the reference expectation
struct SuperPointerAttribute {
void* _pointer;
size_t _sizeBytes;
hipPointerAttribute_t _attrib;
};
// Support function to check result against a reference:
void checkPointer(const SuperPointerAttribute& ref, int major,
int minor, void* pointer) {
hipPointerAttribute_t attribs;
resetAttribs(&attribs);
hipError_t e = hipPointerGetAttributes(&attribs, pointer);
if ((e != hipSuccess) || (attribs != ref._attrib)) {
HIP_CHECK(e);
REQUIRE(attribs != ref._attrib);
} else {
printf("#%4d.%d GOOD:%p getattr :: ", major, minor, pointer);
printAttribs(&attribs);
}
}
// Test that allocates memory across all devices withing the
// specified size range
// (minSize...maxSize). Then does lookups to make sure the
// info reported by the tracker matches
// expecations Then deallocates it all.
// Multiple threads can call this function and in fact
// we do this in the testMultiThreaded_1 test.
void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) {
Nbytes = N * sizeof(char);
printf("clusterAllocs numAllocs=%d size=%lu..%lu\n",
numAllocs, minSize, maxSize);
const int Max_Devices = 256;
std::vector<SuperPointerAttribute> reference(numAllocs);
REQUIRE(minSize > 0);
REQUIRE(maxSize >= minSize);
int numDevices;
HIP_CHECK(hipGetDeviceCount(&numDevices));
//---
// Populate with device and host allocations.
size_t totalDeviceAllocated[Max_Devices];
for (int i = 0; i < numDevices; i++) {
totalDeviceAllocated[i] = 0;
}
for (int i = 0; i < numAllocs; i++) {
unsigned rand_seed = time(NULL);
bool isDevice = rand_r(&rand_seed) & 0x1;
reference[i]._sizeBytes = zrand(maxSize - minSize) + minSize;
reference[i]._attrib.device = zrand(numDevices);
HIP_CHECK(hipSetDevice(reference[i]._attrib.device));
reference[i]._attrib.isManaged = 0;
void* ptr;
if (isDevice) {
totalDeviceAllocated[reference[i]._attrib.device] +=
reference[i]._sizeBytes;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ptr),
reference[i]._sizeBytes));
reference[i]._attrib.memoryType = hipMemoryTypeDevice;
reference[i]._attrib.devicePointer = ptr;
reference[i]._attrib.hostPointer = NULL;
reference[i]._attrib.allocationFlags = 0;
} else {
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&ptr),
reference[i]._sizeBytes,
hipHostMallocDefault));
reference[i]._attrib.memoryType = hipMemoryTypeHost;
reference[i]._attrib.devicePointer = ptr;
reference[i]._attrib.hostPointer = ptr;
reference[i]._attrib.allocationFlags = 0;
}
reference[i]._pointer = ptr;
}
for (int i = 0; i < numDevices; i++) {
size_t free, total;
HIP_CHECK(hipSetDevice(i));
HIP_CHECK(hipMemGetInfo(&free, &total));
printf(
" device#%d: hipMemGetInfo: "
"free=%zu (%4.2fMB) totalDevice=%lu (%4.2fMB) total=%zu "
"(%4.2fMB)\n",
i, free, (free / 1024.0 / 1024.0), totalDeviceAllocated[i],
(totalDeviceAllocated[i]) / 1024.0 / 1024.0, total,
(total / 1024.0 / 1024.0));
REQUIRE(free + totalDeviceAllocated[i] <= total);
}
// Now look up each pointer we inserted and verify we can find it:
char * ptr;
for (int i = 0; i < numAllocs; i++) {
SuperPointerAttribute& ref = reference[i];
ptr = static_cast<char *>(ref._pointer);
checkPointer(ref, i, 0, ref._pointer);
checkPointer(ref, i, 1, (ptr +
ref._sizeBytes / 2));
if (ref._sizeBytes > 1) {
checkPointer(ref, i, 2, (ptr +
ref._sizeBytes - 1));
}
if (ref._attrib.memoryType == hipMemoryTypeDevice) {
hipFree(ref._pointer);
} else {
hipHostFree(ref._pointer);
}
}
}
//========================================================================
// Functions to run tests
//=======================================================================
//--
// Run through a couple simple cases to test lookups host pointer arithmetic:
TEST_CASE("Unit_hipPointerGetAttributes_Basic") {
HIP_CHECK(hipSetDevice(0));
Nbytes = N * sizeof(char);
printf("\n");
printf("=============================================================\n");
printf("Simple Tests\n");
printf("=============================================================\n");
char* A_d;
char* A_Pinned_h;
char* A_OSAlloc_h;
hipError_t e;
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_Pinned_h), Nbytes,
hipHostMallocDefault));
A_OSAlloc_h = reinterpret_cast<char*>(malloc(Nbytes));
size_t free, total;
HIP_CHECK(hipMemGetInfo(&free, &total));
printf("hipMemGetInfo: free=%zu (%4.2f) Nbytes=%lu total=%zu (%4.2f)\n", free,
(free / 1024.0 / 1024.0), Nbytes, total,
(total / 1024.0 / 1024.0));
REQUIRE(free + Nbytes <= total);
hipPointerAttribute_t attribs;
hipPointerAttribute_t attribs2;
// Device memory
printf("\nDevice memory (hipMalloc)\n");
HIP_CHECK(hipPointerGetAttributes(&attribs, A_d));
// Check pointer arithmetic cases:
resetAttribs(&attribs2);
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + 100));
char *ptr = reinterpret_cast<char *>(attribs.devicePointer);
REQUIRE(ptr + 100 ==
reinterpret_cast<char*>(attribs2.devicePointer));
// Corner case at end of array:
resetAttribs(&attribs2);
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + Nbytes - 1));
REQUIRE((ptr + Nbytes - 1) ==
reinterpret_cast<char*>(attribs2.devicePointer));
// Pointer just beyond array must be invalid or at least a different pointer
resetAttribs(&attribs2);
e = hipPointerGetAttributes(&attribs2, A_d + Nbytes + 1);
if (e != hipErrorInvalidValue) {
// We might have strayed into another pointer area.
REQUIRE(reinterpret_cast<char*>(ptr) !=
reinterpret_cast<char*>(attribs2.devicePointer));
}
resetAttribs(&attribs2);
e = hipPointerGetAttributes(&attribs2, A_d + Nbytes);
if (e != hipErrorInvalidValue) {
REQUIRE(attribs.devicePointer != attribs2.devicePointer);
}
hipFree(A_d);
e = hipPointerGetAttributes(&attribs, A_d);
REQUIRE(e == hipErrorInvalidValue);
// Device-visible host memory
printf("\nDevice-visible host memory (hipHostMalloc)\n");
HIP_CHECK(hipPointerGetAttributes(&attribs, A_Pinned_h));
resetAttribs(&attribs2);
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_Pinned_h + Nbytes / 2));
char *ptr1 = reinterpret_cast<char *>(attribs.hostPointer);
REQUIRE((ptr1 + Nbytes / 2)
== reinterpret_cast<char*>(attribs2.hostPointer));
hipHostFree(A_Pinned_h);
e = hipPointerGetAttributes(&attribs, A_Pinned_h);
REQUIRE(e == hipErrorInvalidValue);
// OS memory
printf("\nOS-allocated memory (malloc)\n");
e = hipPointerGetAttributes(&attribs, A_OSAlloc_h);
REQUIRE(e == hipErrorInvalidValue);
}
TEST_CASE("Unit_hipPointerGetAttributes_ClusterAlloc") {
srand(0x100);
printf("\n=============================================\n");
clusterAllocs(100, 1024 * 1, 1024 * 1024);
}
TEST_CASE("Unit_hipPointerGetAttributes_TinyClusterAlloc") {
srand(0x200);
printf("\n=============================================\n");
clusterAllocs(1000, 1, 10); // Many tiny allocations;
}
// Multi-threaded test with many simul allocs.
// IN : serialize will force the test to run in serial fashion.
TEST_CASE("Unit_hipPointerGetAttributes_MultiThread") {
srand(0x300);
auto serialize = 1;
printf("\n=============================================\n");
printf("MultiThreaded_1\n");
if (serialize) printf("[SERIALIZE]\n");
printf("===============================================\n");
std::thread t1(clusterAllocs, 1000, 101, 1000);
if (serialize) t1.join();
std::thread t2(clusterAllocs, 1000, 11, 100);
if (serialize) t2.join();
std::thread t3(clusterAllocs, 1000, 5, 10);
if (serialize) t3.join();
std::thread t4(clusterAllocs, 1000, 1, 4);
if (serialize) t4.join();
}