SWDEV-294643 - [catch2][dtest] Migration of Malloc related files to Catch2 (#2314)
Migrated malloc related files under memory folder into catch2 framework
Change-Id: I5aa07fc8148bdf6bef135947091aaf1d3c54663b
[ROCm/hip-tests commit: 4287af6a2f]
This commit is contained in:
@@ -9,6 +9,7 @@ set(LINUX_TEST_SRC
|
||||
hipGetDeviceCountMproc.cc
|
||||
hipGetDevicePropertiesMproc.cc
|
||||
hipSetGetDeviceMproc.cc
|
||||
hipIpcMemAccessTest.cc
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the hipIpcMemAccess APIs by creating memory handle
|
||||
in parent process and access it in child process.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <fcntl.h>
|
||||
#include <semaphore.h>
|
||||
#include <unistd.h>
|
||||
|
||||
typedef struct mem_handle {
|
||||
int device;
|
||||
hipIpcMemHandle_t memHandle;
|
||||
bool IfTestPassed;
|
||||
} hip_ipc_t;
|
||||
|
||||
|
||||
// This testcase verifies the hipIpcMemAccess APIs as follows
|
||||
// The following program spawns a child process and does the following
|
||||
// Parent iterate through each device, create memory -- create hipIpcMemhandle
|
||||
// stores the mem handle in mmaped memory, release the child using sem_post()
|
||||
// and wait for child to release itself(parent process)
|
||||
// child process:
|
||||
// Child process get the ipc mem handle using hipIpcOpenMemHandle
|
||||
// Iterate through all the available gpus and do Device to Device copies
|
||||
// and check for data consistencies and close the hipIpcCloseMemHandle
|
||||
// release the parent and wait for parent to release itself(child)
|
||||
|
||||
TEST_CASE("Unit_hipIpcMemAccess_Semaphores") {
|
||||
hip_ipc_t *shrd_mem = NULL;
|
||||
pid_t pid;
|
||||
size_t N = 1024;
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr};
|
||||
int *A_h{nullptr}, *C_h{nullptr};
|
||||
sem_t *sem_ob1{nullptr}, *sem_ob2{nullptr};
|
||||
int Num_devices = 0, CanAccessPeer = 0;
|
||||
|
||||
std::string cmd_line = "rm -rf /dev/shm/sem.my-sem-object*";
|
||||
int res = system(cmd_line.c_str());
|
||||
REQUIRE(res != -1);
|
||||
sem_ob1 = sem_open("/my-sem-object1", O_CREAT|O_EXCL, 0660, 0);
|
||||
sem_ob2 = sem_open("/my-sem-object2", O_CREAT|O_EXCL, 0660, 0);
|
||||
REQUIRE(sem_ob1 != SEM_FAILED);
|
||||
REQUIRE(sem_ob2 != SEM_FAILED);
|
||||
|
||||
shrd_mem = reinterpret_cast<hip_ipc_t *>(mmap(NULL, sizeof(hip_ipc_t),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS,
|
||||
0, 0));
|
||||
REQUIRE(shrd_mem != NULL);
|
||||
shrd_mem->IfTestPassed = true;
|
||||
HipTest::initArrays<int>(nullptr, nullptr, nullptr,
|
||||
&A_h, nullptr, &C_h, N, false);
|
||||
pid = fork();
|
||||
if (pid != 0) {
|
||||
// Parent process
|
||||
HIP_CHECK(hipGetDeviceCount(&Num_devices));
|
||||
for (int i = 0; i < Num_devices; ++i) {
|
||||
if (shrd_mem->IfTestPassed == true) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipIpcGetMemHandle(reinterpret_cast<hipIpcMemHandle_t *>
|
||||
(&shrd_mem->memHandle),
|
||||
A_d));
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
shrd_mem->device = i;
|
||||
if ((sem_post(sem_ob1)) == -1) {
|
||||
// Need to use inline function to release resources.
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_post() call failed in parent process.");
|
||||
}
|
||||
if ((sem_wait(sem_ob2)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_wait() call failed in parent process.");
|
||||
}
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Child process
|
||||
HIP_CHECK(hipGetDeviceCount(&Num_devices));
|
||||
for (int j = 0; j < Num_devices; ++j) {
|
||||
HIP_CHECK(hipSetDevice(j));
|
||||
if ((sem_wait(sem_ob1)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_wait() call failed in child process.");
|
||||
if ((sem_post(sem_ob2)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_post() call on sem_ob2 failed");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < Num_devices; ++i) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast<void **>(&B_d),
|
||||
shrd_mem->memHandle,
|
||||
hipIpcMemLazyEnablePeerAccess));
|
||||
HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, i, shrd_mem->device));
|
||||
if (CanAccessPeer == 1) {
|
||||
HIP_CHECK(hipMemcpy(C_d, B_d, Nbytes, hipMemcpyDeviceToDevice));
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkTest<int>(A_h, C_h, N);
|
||||
memset(reinterpret_cast<void*>(C_h), 0, Nbytes);
|
||||
// Checking if the data obtained from Ipc shared memory is consistent
|
||||
HIP_CHECK(hipMemcpy(C_h, B_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkTest<int>(A_h, C_h, N);
|
||||
}
|
||||
HIP_CHECK(hipIpcCloseMemHandle(reinterpret_cast<void*>(B_d)));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
}
|
||||
if ((sem_post(sem_ob2)) == -1) {
|
||||
shrd_mem->IfTestPassed = false;
|
||||
WARN("sem_post() call on sem_ob2 failed");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
if ((sem_unlink("/my-sem-object1")) == -1) {
|
||||
WARN("sem_unlink() call on /my-sem-object1 failed");
|
||||
}
|
||||
if ((sem_unlink("/my-sem-object2")) == -1) {
|
||||
WARN("sem_unlink() call on /my-sem-object2 failed");
|
||||
}
|
||||
int rFlag = 0;
|
||||
waitpid(pid, &rFlag, 0);
|
||||
REQUIRE(shrd_mem->IfTestPassed == true);
|
||||
}
|
||||
#endif
|
||||
@@ -1,4 +1,5 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
memset.cc
|
||||
malloc.cc
|
||||
@@ -24,8 +25,49 @@ set(TEST_SRC
|
||||
hipMemcpyAllApiNegative.cc
|
||||
hipMemcpyWithStreamMultiThread.cc
|
||||
hipMemcpy_MultiThread.cc
|
||||
hipHostMalloc.cc
|
||||
hipHostRegister.cc
|
||||
hipMemPtrGetInfo.cc
|
||||
hipPointerGetAttributes.cc
|
||||
hipHostGetFlags.cc
|
||||
hipMemoryAllocateCoherent.cc
|
||||
hipMallocManaged_MultiScenario.cc
|
||||
hipManagedKeyword.cc
|
||||
)
|
||||
|
||||
else()
|
||||
set(TEST_SRC
|
||||
memset.cc
|
||||
malloc.cc
|
||||
hipMemcpy2DToArray.cc
|
||||
hipMemcpy2DToArrayAsync.cc
|
||||
hipMemcpyPeer.cc
|
||||
hipMemcpyPeerAsync.cc
|
||||
hipMemcpy3D.cc
|
||||
hipMemcpy3DAsync.cc
|
||||
hipMemcpyParam2D.cc
|
||||
hipMemcpyParam2DAsync.cc
|
||||
hipMemcpy2D.cc
|
||||
hipMemcpy2DAsync.cc
|
||||
hipMemcpy2DFromArray.cc
|
||||
hipMemcpy2DFromArrayAsync.cc
|
||||
hipMemcpyAtoH.cc
|
||||
hipMemcpyHtoA.cc
|
||||
hipMemcpyDtoD.cc
|
||||
hipMemcpyDtoDAsync.cc
|
||||
hipMemcpyAsync.cc
|
||||
hipMemcpy.cc
|
||||
hipMemcpyWithStream.cc
|
||||
hipMemcpyAllApiNegative.cc
|
||||
hipMemcpyWithStreamMultiThread.cc
|
||||
hipMemcpy_MultiThread.cc
|
||||
hipHostMalloc.cc
|
||||
hipHostRegister.cc
|
||||
hipHostGetFlags.cc
|
||||
hipMemoryAllocateCoherent.cc
|
||||
hipMallocManaged_MultiScenario.cc
|
||||
hipManagedKeyword.cc
|
||||
)
|
||||
endif()
|
||||
# Create shared lib of all tests
|
||||
add_library(MemoryTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the basic scenario of hipHostGetFlags API
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
static constexpr auto LEN{1024*1024};
|
||||
|
||||
/*
|
||||
This testcase verifies hipHostGetFlags API basic scenario
|
||||
1. Allocates the memory using different flags
|
||||
2. Gets the flags of the respective variable using
|
||||
hipHostGetFlags API
|
||||
3. Validates it with the initial flags used while allocating
|
||||
memory
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipHostGetFlags_Basic", "", int,
|
||||
float, double) {
|
||||
constexpr auto SIZE{LEN * sizeof(TestType)};
|
||||
TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr};
|
||||
TestType *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr};
|
||||
unsigned int FlagA, FlagB, FlagC;
|
||||
|
||||
FlagA = hipHostMallocWriteCombined | hipHostMallocMapped;
|
||||
FlagB = hipHostMallocWriteCombined | hipHostMallocMapped;
|
||||
FlagC = hipHostMallocMapped;
|
||||
|
||||
hipDeviceProp_t prop;
|
||||
int device;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop, device));
|
||||
if (prop.canMapHostMemory != 1) {
|
||||
SUCCEED("Device Property canMapHostMemory is not set");
|
||||
} else {
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_h), SIZE,
|
||||
hipHostMallocWriteCombined | hipHostMallocMapped));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&B_h), SIZE,
|
||||
hipHostMallocWriteCombined | hipHostMallocMapped));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C_h), SIZE,
|
||||
hipHostMallocMapped));
|
||||
|
||||
unsigned int flagA, flagB, flagC;
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&A_d), A_h, 0));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&C_d), C_h, 0));
|
||||
HIP_CHECK(hipHostGetFlags(&flagA, A_h));
|
||||
HIP_CHECK(hipHostGetFlags(&flagB, B_h));
|
||||
HIP_CHECK(hipHostGetFlags(&flagC, C_h));
|
||||
|
||||
HipTest::setDefaultData<TestType>(LEN, A_h, B_h, C_h);
|
||||
|
||||
dim3 dimGrid(LEN / 512, 1, 1);
|
||||
dim3 dimBlock(512, 1, 1);
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock,
|
||||
0, 0, static_cast<const TestType*>(A_d),
|
||||
static_cast<const TestType*>(B_d), C_d, LEN);
|
||||
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, SIZE, hipMemcpyDeviceToHost));
|
||||
// Note this really HostToHost not
|
||||
// DeviceToHost, since memory is mapped...
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, LEN);
|
||||
|
||||
REQUIRE(flagA == FlagA);
|
||||
REQUIRE(flagB == FlagB);
|
||||
REQUIRE(flagC == FlagC);
|
||||
HIP_CHECK(hipHostFree(A_h));
|
||||
HIP_CHECK(hipHostFree(B_h));
|
||||
HIP_CHECK(hipHostFree(C_h));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the following scenarios of hipHostMalloc API
|
||||
1. Basic scenario of hipHostMalloc API
|
||||
2. Negative Scenarios of hipHostMalloc API
|
||||
3. Allocating memory using hipHostMalloc with Coherent flag
|
||||
4. Allocating memory using hipHostMalloc with NonCoherent flag
|
||||
5. Allocating memory using hipHostMalloc with default flag
|
||||
*/
|
||||
|
||||
#include<hip_test_checkers.hh>
|
||||
#include<hip_test_kernels.hh>
|
||||
#include<hip_test_common.hh>
|
||||
|
||||
#define SYNC_EVENT 0
|
||||
#define SYNC_STREAM 1
|
||||
#define SYNC_DEVICE 2
|
||||
|
||||
std::vector<std::string> syncMsg = {"event", "stream", "device"};
|
||||
static constexpr int numElements{1024 * 16};
|
||||
static constexpr size_t sizeBytes{numElements * sizeof(int)};
|
||||
|
||||
__global__ void Set(int* Ad, int val) {
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tx] = val;
|
||||
}
|
||||
|
||||
void CheckHostPointer(int numElements, int* ptr, unsigned eventFlags,
|
||||
int syncMethod, std::string msg) {
|
||||
std::cerr << "test: CheckHostPointer "
|
||||
<< msg
|
||||
<< " eventFlags = " << std::hex << eventFlags
|
||||
<< ((eventFlags & hipEventReleaseToDevice) ?
|
||||
" hipEventReleaseToDevice" : "")
|
||||
<< ((eventFlags & hipEventReleaseToSystem) ?
|
||||
" hipEventReleaseToSystem" : "")
|
||||
<< " ptr=" << ptr << " syncMethod="
|
||||
<< syncMsg[syncMethod] << "\n";
|
||||
|
||||
hipStream_t s;
|
||||
hipEvent_t e;
|
||||
|
||||
// Init:
|
||||
HIP_CHECK(hipStreamCreate(&s));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&e, eventFlags))
|
||||
dim3 dimBlock(64, 1, 1);
|
||||
dim3 dimGrid(numElements / dimBlock.x, 1, 1);
|
||||
|
||||
const int expected = 13;
|
||||
|
||||
// Init array to know state:
|
||||
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, ptr, -42);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, ptr, expected);
|
||||
HIP_CHECK(hipEventRecord(e, s));
|
||||
|
||||
// Host waits for event :
|
||||
switch (syncMethod) {
|
||||
case SYNC_EVENT:
|
||||
HIP_CHECK(hipEventSynchronize(e));
|
||||
break;
|
||||
case SYNC_STREAM:
|
||||
HIP_CHECK(hipStreamSynchronize(s));
|
||||
break;
|
||||
case SYNC_DEVICE:
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < numElements; i++) {
|
||||
if (ptr[i] != expected) {
|
||||
printf("mismatch at %d: %d != %d\n", i, ptr[i], expected);
|
||||
REQUIRE(ptr[i] == expected);
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamDestroy(s));
|
||||
HIP_CHECK(hipEventDestroy(e));
|
||||
}
|
||||
/*
|
||||
This testcase performs the basic scenario of hipHostMalloc API
|
||||
Allocates the memory using hipHostMalloc API
|
||||
Launches the kernel and performs vector addition.
|
||||
validates thes result.
|
||||
*/
|
||||
TEST_CASE("Unit_hipHostMalloc_Basic") {
|
||||
static constexpr auto LEN{1024 * 1024};
|
||||
static constexpr auto SIZE{LEN * sizeof(float)};
|
||||
|
||||
hipDeviceProp_t prop;
|
||||
int device;
|
||||
HIP_CHECK(hipGetDevice(&device));
|
||||
HIP_CHECK(hipGetDeviceProperties(&prop, device));
|
||||
if (prop.canMapHostMemory != 1) {
|
||||
SUCCEED("Does support HostPinned Memory");
|
||||
} else {
|
||||
float *A_h, *B_h, *C_h;
|
||||
float *A_d, *B_d, *C_d;
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_h), SIZE,
|
||||
hipHostMallocWriteCombined | hipHostMallocMapped));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&B_h), SIZE,
|
||||
hipHostMallocDefault));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C_h), SIZE,
|
||||
hipHostMallocMapped));
|
||||
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&A_d), A_h, 0));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&C_d), C_h, 0));
|
||||
|
||||
HipTest::setDefaultData<float>(LEN, A_h, B_h, C_h);
|
||||
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&B_d), SIZE));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, SIZE, hipMemcpyHostToDevice));
|
||||
|
||||
dim3 dimGrid(LEN / 512, 1, 1);
|
||||
dim3 dimBlock(512, 1, 1);
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock,
|
||||
0, 0, static_cast<const float*>(A_d),
|
||||
static_cast<const float*>(B_d), C_d, LEN);
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, LEN*sizeof(float),
|
||||
hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD<float>(A_h, B_h, C_h, numElements);
|
||||
|
||||
HIP_CHECK(hipHostFree(A_h));
|
||||
HIP_CHECK(hipHostFree(B_h));
|
||||
HIP_CHECK(hipHostFree(C_h));
|
||||
}
|
||||
}
|
||||
/*
|
||||
This testcase verifies the hipHostMalloc API by passing nullptr
|
||||
to the pointer variable
|
||||
*/
|
||||
TEST_CASE("Unit_hipHostMalloc_Negative") {
|
||||
#if HT_AMD
|
||||
{
|
||||
// Stimulate error condition:
|
||||
int* A = nullptr;
|
||||
REQUIRE(hipHostMalloc(reinterpret_cast<void**>(&A), sizeBytes,
|
||||
hipHostMallocCoherent | hipHostMallocNonCoherent)
|
||||
!= hipSuccess);
|
||||
REQUIRE(A == nullptr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/*
|
||||
This testcase verifies the hipHostMalloc API by
|
||||
1.Allocating memory using noncoherent flag
|
||||
2. Launches the kernel and modifies the variable
|
||||
using different synchronization
|
||||
techniquies
|
||||
3. validates the result.
|
||||
*/
|
||||
TEST_CASE("Unit_hipHostMalloc_NonCoherent") {
|
||||
int* A = nullptr;
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A),
|
||||
sizeBytes, hipHostMallocNonCoherent));
|
||||
const char* ptrType = "non-coherent";
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
|
||||
SYNC_DEVICE, ptrType);
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
|
||||
SYNC_STREAM, ptrType);
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
|
||||
SYNC_EVENT, ptrType);
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the hipHostMalloc API by
|
||||
1.Allocating memory using coherent flag
|
||||
2. Launches the kernel and modifies the variable
|
||||
using different synchronization
|
||||
techniquies
|
||||
3. validates the result.
|
||||
*/
|
||||
TEST_CASE("Unit_hipHostMalloc_Coherent") {
|
||||
int* A = nullptr;
|
||||
if (hipHostMalloc(reinterpret_cast<void**>(&A), sizeBytes,
|
||||
hipHostMallocCoherent) == hipSuccess) {
|
||||
const char* ptrType = "coherent";
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToDevice,
|
||||
SYNC_DEVICE, ptrType);
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToDevice,
|
||||
SYNC_STREAM, ptrType);
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToDevice,
|
||||
SYNC_EVENT, ptrType);
|
||||
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
|
||||
SYNC_DEVICE, ptrType);
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
|
||||
SYNC_STREAM, ptrType);
|
||||
CheckHostPointer(numElements, A, hipEventReleaseToSystem,
|
||||
SYNC_EVENT, ptrType);
|
||||
} else {
|
||||
SUCCEED("Coherence memory allocation failed. Is SVM atomic supported?");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the hipHostMalloc API by
|
||||
1.Allocating memory using default flag
|
||||
2. Launches the kernel and modifies the variable
|
||||
using different synchronization
|
||||
techniquies
|
||||
3. validates the result.
|
||||
*/
|
||||
TEST_CASE("Unit_hipHostMalloc_Default") {
|
||||
int* A = nullptr;
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A), sizeBytes));
|
||||
const char* ptrType = "default";
|
||||
CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType);
|
||||
CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType);
|
||||
CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType);
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the following scenarios of hipHostRegister API
|
||||
1. Referencing the hipHostRegister variable from kernel and performing
|
||||
memset on that variable.This is verified for different datatypes.
|
||||
2. hipHostRegister and perform hipMemcpy on it.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#define OFFSET 128
|
||||
static constexpr auto LEN{1024*1024};
|
||||
|
||||
template<typename T>
|
||||
__global__ void Inc(T* Ad) {
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tx] = Ad[tx] + static_cast<T>(1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void doMemCopy(size_t numElements, int offset, T* A, T* Bh, T* Bd,
|
||||
bool internalRegister) {
|
||||
constexpr auto memsetval = 13.0f;
|
||||
A = A + offset;
|
||||
numElements -= offset;
|
||||
|
||||
size_t sizeBytes = numElements * sizeof(T);
|
||||
|
||||
if (internalRegister) {
|
||||
HIP_CHECK(hipHostRegister(A, sizeBytes, 0));
|
||||
}
|
||||
|
||||
// Reset
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
A[i] = static_cast<float>(i);
|
||||
Bh[i] = 0.0f;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemset(Bd, memsetval, sizeBytes));
|
||||
|
||||
HIP_CHECK(hipMemcpy(Bd, A, sizeBytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bh, Bd, sizeBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
// Make sure the copy worked
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
REQUIRE(Bh[i] == A[i]);
|
||||
}
|
||||
|
||||
if (internalRegister) {
|
||||
HIP_CHECK(hipHostUnregister(A));
|
||||
}
|
||||
}
|
||||
/*
|
||||
This testcase verifies the hipHostRegister API by
|
||||
1. Allocating the memory using malloc
|
||||
2. hipHostRegister that variable
|
||||
3. Getting the corresponding device pointer of the registered varible
|
||||
4. Launching kernel and access the device pointer variable
|
||||
5. performing hipMemset on the device pointer variable
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset",
|
||||
"", int,
|
||||
float, double) {
|
||||
size_t sizeBytes{LEN * sizeof(TestType)};
|
||||
TestType *A, **Ad;
|
||||
int num_devices;
|
||||
HIP_CHECK(hipGetDeviceCount(&num_devices));
|
||||
Ad = new TestType*[num_devices];
|
||||
A = reinterpret_cast<TestType*>(malloc(sizeBytes));
|
||||
HIP_CHECK(hipHostRegister(A, sizeBytes, 0));
|
||||
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
A[i] = static_cast<TestType>(1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&Ad[i]), A, 0));
|
||||
}
|
||||
|
||||
// Reference the registered device pointer Ad from inside the kernel:
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
hipLaunchKernelGGL(Inc, dim3(LEN / 512), dim3(512), 0, 0, Ad[i]);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
REQUIRE(A[10] == 1 + static_cast<TestType>(num_devices));
|
||||
// Reference the registered device pointer Ad in hipMemset:
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipMemset(Ad[i], 0, sizeBytes));
|
||||
}
|
||||
REQUIRE(A[10] == 0);
|
||||
|
||||
HIP_CHECK(hipHostUnregister(A));
|
||||
|
||||
free(A);
|
||||
delete [] Ad;
|
||||
}
|
||||
/*
|
||||
This testcase verifies hipHostRegister API by
|
||||
performing memcpy on the hipHostRegistered variable.
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipHostRegister_Memcpy", "",
|
||||
int, float, double) {
|
||||
// 1 refers to hipHostRegister
|
||||
// 0 refers to malloc
|
||||
auto mem_type = GENERATE(0, 1);
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
|
||||
|
||||
size_t sizeBytes = LEN * sizeof(TestType);
|
||||
TestType* A = reinterpret_cast<TestType*>(malloc(sizeBytes));
|
||||
|
||||
// Copy to B, this should be optimal pinned malloc copy:
|
||||
// Note we are using the host pointer here:
|
||||
TestType *Bh, *Bd;
|
||||
Bh = reinterpret_cast<TestType*>(malloc(sizeBytes));
|
||||
HIP_CHECK(hipMalloc(&Bd, sizeBytes));
|
||||
|
||||
REQUIRE(LEN > OFFSET);
|
||||
if (mem_type) {
|
||||
for (size_t i = 0; i < OFFSET; i++) {
|
||||
doMemCopy<TestType>(LEN, i, A, Bh, Bd, true /*internalRegister*/);
|
||||
}
|
||||
} else {
|
||||
HIP_CHECK(hipHostRegister(A, sizeBytes, 0));
|
||||
for (size_t i = 0; i < OFFSET; i++) {
|
||||
doMemCopy<TestType>(LEN, i, A, Bh, Bd, false /*internalRegister*/);
|
||||
}
|
||||
HIP_CHECK(hipHostUnregister(A));
|
||||
}
|
||||
|
||||
free(A);
|
||||
free(Bh);
|
||||
hipFree(Bd);
|
||||
}
|
||||
@@ -0,0 +1,323 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the hipMallocManaged API in the following scenarios
|
||||
1. MultiChunkSingleDevice Scenario
|
||||
2. MultiChunkMultiDevice Scenario
|
||||
3. Negative Scenarios
|
||||
4. OverSubscription scenario
|
||||
5. Device context change
|
||||
6. Multiple Pointers
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <atomic>
|
||||
|
||||
const size_t MAX_GPU{256};
|
||||
static size_t N{4*1024*1024};
|
||||
#define INIT_VAL 123
|
||||
|
||||
|
||||
/*
|
||||
* Kernel function to perform addition operation.
|
||||
*/
|
||||
template <typename T>
|
||||
__global__ void
|
||||
vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NUM_ELMTS; i += stride) {
|
||||
Ad2[i] = Ad1[i] + Ad1[i];
|
||||
}
|
||||
}
|
||||
|
||||
// The following Test case tests the following scenario:
|
||||
// A large chunk of hipMallocManaged() memory(Hmm) is created
|
||||
// Equal parts of Hmm is accessed and
|
||||
// kernel is launched on acessed chunk of hmm memory
|
||||
// and checks if there are any inconsistencies or access issues
|
||||
TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") {
|
||||
std::atomic<int> DataMismatch{0};
|
||||
constexpr int Chunks = 4;
|
||||
int Counter = 0;
|
||||
int NUM_ELMS = (1024 * 1024);
|
||||
float *Ad[Chunks], *Hmm = nullptr, *Ah = new float[NUM_ELMS];
|
||||
hipStream_t stream[Chunks];
|
||||
for (int i = 0; i < Chunks; ++i) {
|
||||
HIP_CHECK(hipMalloc(&Ad[i], NUM_ELMS * sizeof(float)));
|
||||
HIP_CHECK(hipMemset(Ad[i], 0, NUM_ELMS * sizeof(float)));
|
||||
HIP_CHECK(hipStreamCreate(&stream[i]));
|
||||
}
|
||||
HIP_CHECK(hipMallocManaged(&Hmm, (Chunks * NUM_ELMS * sizeof(float))));
|
||||
for (int i = 0; i < Chunks; ++i) {
|
||||
for (; Counter < ((i + 1) * NUM_ELMS); ++Counter) {
|
||||
Hmm[Counter] = (INIT_VAL + i);
|
||||
}
|
||||
}
|
||||
const unsigned threadsPerBlock = 256;
|
||||
const unsigned blocks = (NUM_ELMS + 255)/256;
|
||||
for (int k = 0; k < Chunks; ++k) {
|
||||
vector_sum<float> <<<blocks, threadsPerBlock, 0, stream[k]>>>
|
||||
(&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
for (int m = 0; m < Chunks; ++m) {
|
||||
HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (int n = 0; n < NUM_ELMS; ++n) {
|
||||
if (Ah[n] != ((INIT_VAL + m) * 2)) {
|
||||
DataMismatch++;
|
||||
}
|
||||
}
|
||||
}
|
||||
REQUIRE(DataMismatch.load() == 0);
|
||||
for (int i = 0; i < Chunks; ++i) {
|
||||
HIP_CHECK(hipFree(Ad[i]));
|
||||
HIP_CHECK(hipStreamDestroy(stream[i]));
|
||||
}
|
||||
HIP_CHECK(hipFree(Hmm));
|
||||
delete [] Ah;
|
||||
}
|
||||
|
||||
// The following Test case tests the following scenario:
|
||||
// A large chunk of hipMallocManaged() memory(Hmm) is created
|
||||
// Equal parts of Hmm is accessed on available gpus and
|
||||
// kernel is launched on acessed chunk of hmm memory
|
||||
// and checks if there are any inconsistencies or access issues
|
||||
TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") {
|
||||
std::atomic<int> DataMismatch{0};
|
||||
int Counter = 0;
|
||||
int NumDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&NumDevices));
|
||||
unsigned int NUM_ELMS = (1024 * 1024);
|
||||
float *Ad[MAX_GPU], *Hmm = NULL, *Ah = new float[NUM_ELMS];
|
||||
hipStream_t stream[MAX_GPU];
|
||||
for (int Oloop = 0; Oloop < NumDevices; ++Oloop) {
|
||||
HIP_CHECK(hipSetDevice(Oloop));
|
||||
HIP_CHECK(hipMalloc(&Ad[Oloop], NUM_ELMS * sizeof(float)));
|
||||
HIP_CHECK(hipMemset(Ad[Oloop], 0, NUM_ELMS * sizeof(float)));
|
||||
HIP_CHECK(hipStreamCreate(&stream[Oloop]));
|
||||
}
|
||||
HIP_CHECK(hipMallocManaged(&Hmm, (NumDevices * NUM_ELMS * sizeof(float))));
|
||||
for (int i = 0; i < NumDevices; ++i) {
|
||||
for (; Counter < static_cast<int>((i + 1) * NUM_ELMS); ++Counter) {
|
||||
Hmm[Counter] = INIT_VAL + i;
|
||||
}
|
||||
}
|
||||
const unsigned threadsPerBlock = 256;
|
||||
const unsigned blocks = (NUM_ELMS + 255)/256;
|
||||
for (int Klaunch = 0; Klaunch < NumDevices; ++Klaunch) {
|
||||
HIP_CHECK(hipSetDevice(Klaunch));
|
||||
vector_sum<float> <<<blocks, threadsPerBlock, 0, stream[Klaunch]>>>
|
||||
(&Hmm[Klaunch * NUM_ELMS], Ad[Klaunch], NUM_ELMS);
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
for (int m = 0; m < NumDevices; ++m) {
|
||||
HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (size_t n = 0; n < NUM_ELMS; ++n) {
|
||||
if (Ah[n] != ((INIT_VAL + m) * 2)) {
|
||||
DataMismatch++;
|
||||
}
|
||||
}
|
||||
memset(reinterpret_cast<void*>(Ah), 0, NUM_ELMS * sizeof(float));
|
||||
}
|
||||
REQUIRE(DataMismatch.load() == 0);
|
||||
for (int i = 0; i < NumDevices; ++i) {
|
||||
HIP_CHECK(hipFree(Ad[i]));
|
||||
HIP_CHECK(hipStreamDestroy(stream[i]));
|
||||
}
|
||||
HIP_CHECK(hipFree(Hmm));
|
||||
delete [] Ah;
|
||||
}
|
||||
|
||||
// The following tests oversubscription hipMallocManaged() api
|
||||
// Currently disabled.
|
||||
TEST_CASE("Unit_hipMallocManaged_OverSubscription") {
|
||||
void *A = nullptr;
|
||||
size_t total = 0, free = 0;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
// ToDo: In case of HMM, memory over-subscription is allowed. Hence, relook
|
||||
// into how out of memory can be tested.
|
||||
// Demanding more mem size than available
|
||||
#if HT_AMD
|
||||
REQUIRE(hipMallocManaged(&A, (free +1), hipMemAttachGlobal) != hipSuccess);
|
||||
#endif
|
||||
}
|
||||
|
||||
// The following test does negative testing of hipMallocManaged() api
|
||||
// by passing invalid values and check if the behavior is as expected
|
||||
TEST_CASE("Unit_hipMallocManaged_Negative") {
|
||||
void *A;
|
||||
size_t total = 0, free = 0;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
SECTION("Nullptr to devPtr") {
|
||||
REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachGlobal) != hipSuccess);
|
||||
}
|
||||
|
||||
// cuda api doc says : If size is 0, cudaMallocManaged returns
|
||||
// cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns
|
||||
// success and contradicts with api doc.
|
||||
|
||||
// With size(0), api expected to return error code (or)
|
||||
// reset ptr while returning success (to accommodate cuda 11.2 api behavior).
|
||||
SECTION("size 0 with flag hipMemAttachGlobal") {
|
||||
#if HT_AMD
|
||||
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachGlobal) != hipSuccess);
|
||||
#else
|
||||
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess);
|
||||
#endif
|
||||
}
|
||||
|
||||
SECTION("devptr is nullptr with flag hipMemAttachHost") {
|
||||
REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachHost) != hipSuccess);
|
||||
}
|
||||
|
||||
// cuda api doc says : If size is 0, cudaMallocManaged returns
|
||||
// cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns
|
||||
// success and contradicts with api doc.
|
||||
|
||||
// With size(0), api expected to return error code (or)
|
||||
// reset ptr while returning success (to accommodate cuda 11.2 api behavior).
|
||||
SECTION("size 0 with flag hipMemAttachHost") {
|
||||
#if HT_AMD
|
||||
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) != hipSuccess);
|
||||
#else
|
||||
REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess);
|
||||
#endif
|
||||
}
|
||||
SECTION("nullptr to devptr, size 0 and flag 0") {
|
||||
REQUIRE(hipMallocManaged(NULL, 0, 0) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Numeric value to flag parameter") {
|
||||
REQUIRE(hipMallocManaged(&A, 1024, 145) != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("Negative value to size") {
|
||||
REQUIRE(hipMallocManaged(&A, -10, hipMemAttachGlobal));
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate two pointers using hipMallocManaged(), initialize,
|
||||
// then launch kernel using these pointers directly and
|
||||
// later validate the content without using any Memcpy.
|
||||
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "",
|
||||
int, float, double) {
|
||||
int NumDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&NumDevices));
|
||||
TestType *Hmm1 = nullptr, *Hmm2 = nullptr;
|
||||
|
||||
for (int i = 0; i < NumDevices; ++i) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
std::atomic<int> DataMismatch{0};
|
||||
HIP_CHECK(hipMallocManaged(&Hmm1, N * sizeof(TestType)));
|
||||
HIP_CHECK(hipMallocManaged(&Hmm2, N * sizeof(TestType)));
|
||||
for (size_t m = 0; m < N; ++m) {
|
||||
Hmm1[m] = m;
|
||||
Hmm2[m] = 0;
|
||||
}
|
||||
const unsigned threadsPerBlock = 256;
|
||||
const unsigned blocks = (N + 255)/256;
|
||||
// Kernel launch
|
||||
vector_sum <<<blocks, threadsPerBlock>>> (Hmm1, Hmm2, N);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
for (size_t v = 0; v < N; ++v) {
|
||||
if (Hmm2[v] != static_cast<TestType>(v + v)) {
|
||||
DataMismatch++;
|
||||
}
|
||||
}
|
||||
REQUIRE(DataMismatch.load() == 0);
|
||||
HIP_CHECK(hipFree(Hmm1));
|
||||
HIP_CHECK(hipFree(Hmm2));
|
||||
}
|
||||
}
|
||||
|
||||
// In the following test, a memory is created using hipMallocManaged() by
|
||||
// setting a device and verified if it is accessible when the context is set
|
||||
// to all other devices. This include verification and Device two Device
|
||||
// transfers and kernel launch o discover if there any access issues.
|
||||
|
||||
TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "",
|
||||
unsigned char, int, float, double) {
|
||||
std::atomic<unsigned int> DataMismatch;
|
||||
TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr,
|
||||
*Hmm = nullptr;
|
||||
int NumDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&NumDevices));
|
||||
|
||||
for (size_t i =0; i < N; ++i) {
|
||||
Ah1[i] = INIT_VAL;
|
||||
Ah2[i] = 0;
|
||||
}
|
||||
for (int Oloop = 0; Oloop < NumDevices; ++Oloop) {
|
||||
DataMismatch = 0;
|
||||
HIP_CHECK(hipSetDevice(Oloop));
|
||||
HIP_CHECK(hipMallocManaged(&Hmm, N * sizeof(TestType)));
|
||||
for (int Iloop = 0; Iloop < NumDevices; ++Iloop) {
|
||||
HIP_CHECK(hipSetDevice(Iloop));
|
||||
HIP_CHECK(hipMalloc(&Ad, N * sizeof(TestType)));
|
||||
// Copy data from host to hipMallocMananged memory and verify
|
||||
HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType),
|
||||
hipMemcpyHostToDevice));
|
||||
for (size_t v = 0; v < N; ++v) {
|
||||
if (Hmm[v] != INIT_VAL) {
|
||||
DataMismatch++;
|
||||
}
|
||||
}
|
||||
REQUIRE(DataMismatch.load() == 0);
|
||||
|
||||
// Executing D2D transfer with hipMallocManaged memory and verify
|
||||
HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType),
|
||||
hipMemcpyDeviceToDevice));
|
||||
HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (size_t k = 0; k < N; ++k) {
|
||||
if (Ah2[k] != INIT_VAL) {
|
||||
DataMismatch++;
|
||||
}
|
||||
}
|
||||
REQUIRE(DataMismatch.load() == 0);
|
||||
HIP_CHECK(hipMemset(Ad, 0, N * sizeof(TestType)));
|
||||
const unsigned threadsPerBlock = 256;
|
||||
const unsigned blocks = (N + 255)/256;
|
||||
// Launching the kernel to check if there is any access issue with
|
||||
// hipMallocManaged memory and local device's memory
|
||||
vector_sum <<<blocks, threadsPerBlock>>> (Hmm, Ad, N);
|
||||
hipDeviceSynchronize();
|
||||
HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType),
|
||||
hipMemcpyDeviceToHost));
|
||||
for (size_t m = 0; m < N; ++m) {
|
||||
if (Ah2[m] != 246) {
|
||||
DataMismatch++;
|
||||
}
|
||||
}
|
||||
REQUIRE(DataMismatch.load() == 0);
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
}
|
||||
HIP_CHECK(hipFree(Hmm));
|
||||
}
|
||||
free(Ah1);
|
||||
free(Ah2);
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the hipManagedKeyword basic scenario
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
#define N 1048576
|
||||
__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!!
|
||||
__managed__ float B[N];
|
||||
__managed__ int x = 0;
|
||||
|
||||
__global__ void add(const float *A, float *B) {
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < N; i += stride)
|
||||
B[i] = A[i] + B[i];
|
||||
}
|
||||
|
||||
__global__ void GPU_func() {
|
||||
x++;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipManagedKeyword_SingleGpu") {
|
||||
for (int i = 0; i < N; i++) {
|
||||
A[i] = 1.0f;
|
||||
B[i] = 2.0f;
|
||||
}
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (N + blockSize - 1) / blockSize;
|
||||
dim3 dimGrid(numBlocks, 1, 1);
|
||||
dim3 dimBlock(blockSize, 1, 1);
|
||||
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast<const float*>(A),
|
||||
static_cast<float*>(B));
|
||||
|
||||
hipDeviceSynchronize();
|
||||
|
||||
float maxError = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
maxError = fmax(maxError, fabs(B[i]-3.0f));
|
||||
|
||||
REQUIRE(maxError == 0.0f);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipManagedKeyword_MultiGpu") {
|
||||
int numDevices = 0;
|
||||
hipGetDeviceCount(&numDevices);
|
||||
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
hipSetDevice(i);
|
||||
GPU_func<<< 1, 1 >>>();
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
REQUIRE(x == numDevices);
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testfile verifies the basic scenario of hipMemPtrGetInfo API
|
||||
*/
|
||||
#include <hip_test_common.hh>
|
||||
struct MemInfo{
|
||||
float a;
|
||||
int b;
|
||||
void* c;
|
||||
};
|
||||
|
||||
/*
|
||||
This testcase verifies the basic scenario of
|
||||
hipMemPtrGetInfo API
|
||||
1. Allocates specific size of memory for the variables
|
||||
2. Gets the allocated size of that variable using hipMemPtrGetInfo API
|
||||
3. Validates the initial size and allocated size
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemPtrGetInfo_Basic") {
|
||||
int* iPtr;
|
||||
float* fPtr;
|
||||
MemInfo* sPtr;
|
||||
size_t sSetSize = 1024, sGetSize;
|
||||
HIP_CHECK(hipMalloc(&iPtr, sSetSize));
|
||||
HIP_CHECK(hipMalloc(&fPtr, sSetSize));
|
||||
HIP_CHECK(hipMalloc(&sPtr, sSetSize));
|
||||
HIP_CHECK(hipMemPtrGetInfo(iPtr, &sGetSize));
|
||||
REQUIRE(sGetSize == sSetSize);
|
||||
HIP_CHECK(hipMemPtrGetInfo(fPtr, &sGetSize));
|
||||
REQUIRE(sGetSize == sSetSize);
|
||||
HIP_CHECK(hipMemPtrGetInfo(sPtr, &sGetSize));
|
||||
REQUIRE(sGetSize == sSetSize);
|
||||
}
|
||||
@@ -512,23 +512,6 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_H2H-H2D-D2H-H2PinMem", "", int,
|
||||
HipTest::freeArrays<TestType>(nullptr, nullptr, nullptr, A_Ph,
|
||||
B_Ph, nullptr, true);
|
||||
}
|
||||
/*
|
||||
This testcase verfies the boundary checks of hipMemcpy API for different sizes
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemcpy_BoundaryCheck") {
|
||||
size_t maxElem = 32 * 1024 * 1024;
|
||||
DeviceMemory<float> memD(maxElem);
|
||||
HostMemory<float> memU(maxElem, 0 /*usePinnedHost*/);
|
||||
HostMemory<float> memP(maxElem, 0 /*usePinnedHost*/);
|
||||
memcpytest2<float>(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0);
|
||||
auto sizes = GENERATE(15 * 1024 * 1024, 16 * 1024 * 1024,
|
||||
16 * 1024 * 1024 + 16 * 1024,
|
||||
16 * 1024 * 1024 + 512 * 1024,
|
||||
17 * 1024 * 1024 + 1024,
|
||||
32 * 1024 * 1024);
|
||||
memcpytest2<float>(&memD, &memP, sizes, 0, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the multi thread scenario
|
||||
*/
|
||||
@@ -543,16 +526,6 @@ TEST_CASE("Unit_hipMemcpy_MultiThreadWithSerialization") {
|
||||
multiThread_1<float>(true, false);
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies the device offsets
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_hipMemcpy_DeviceOffsets", "", float, double) {
|
||||
HIP_CHECK(hipDeviceReset());
|
||||
size_t maxSize = 256 * 1024;
|
||||
memcpytest2_offsets<TestType>(maxSize, true, false);
|
||||
memcpytest2_offsets<TestType>(maxSize, false, true);
|
||||
}
|
||||
|
||||
/*
|
||||
This testcase verifies hipMemcpy API with pinnedMemory and hostRegister
|
||||
along with kernel launches
|
||||
|
||||
@@ -71,13 +71,14 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice",
|
||||
&pitch_E, width, NUM_H));
|
||||
|
||||
// Initalizing A_d with C_h
|
||||
HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width,
|
||||
NUM_W*sizeof(TestType), NUM_H,
|
||||
hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipSetDevice(1));
|
||||
hipStream_t stream;
|
||||
hipStreamCreate(&stream);
|
||||
|
||||
HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width,
|
||||
NUM_W*sizeof(TestType), NUM_H,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
// Device to Device
|
||||
hip_Memcpy2D desc = {};
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
|
||||
@@ -33,7 +33,6 @@ This testfile verifies the following scenarios
|
||||
7. hipMemcpyWithStream API with testkind TestkindDefault
|
||||
8. hipMemcpyWithStream API with testkind TestkindDefaultForDtoD
|
||||
9. hipMemcpyWithStream API DtoD on same device
|
||||
10.Multi threaded scenario
|
||||
*/
|
||||
|
||||
|
||||
@@ -540,55 +539,6 @@ void TestkindHtoH(void) {
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyWithStream_MultiThread") {
|
||||
size_t thread_count = 10;
|
||||
std::vector<joinable_thread> threads;
|
||||
int deviceCount = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&deviceCount));
|
||||
if (deviceCount < 2) {
|
||||
SUCCEED("deviceCount < 2");
|
||||
} else {
|
||||
for (int op = static_cast<int>(ops::TestwithOnestream);
|
||||
op < static_cast<int>(ops::END_OF_LIST); ++op) {
|
||||
for (uint32_t i = 0; i < thread_count; i++) {
|
||||
threads.emplace_back(std::thread{[&] {
|
||||
switch ( op ) {
|
||||
case static_cast<int>(ops::TestwithOnestream):
|
||||
TestwithOnestream();
|
||||
break;
|
||||
case static_cast<int>(ops::TestwithTwoStream):
|
||||
TestwithTwoStream();
|
||||
break;
|
||||
case static_cast<int>(ops::TestkindDtoH):
|
||||
TestkindDtoH();
|
||||
break;
|
||||
case static_cast<int>(ops::TestkindHtoH):
|
||||
TestkindHtoH();
|
||||
break;
|
||||
case static_cast<int>(ops::TestkindDtoD):
|
||||
TestkindDtoD();
|
||||
break;
|
||||
case static_cast<int>(ops::TestOnMultiGPUwithOneStream):
|
||||
TestOnMultiGPUwithOneStream();
|
||||
break;
|
||||
case static_cast<int>(ops::TestkindDefault):
|
||||
TestkindDefault();
|
||||
break;
|
||||
#ifndef __HIP_PLATFORM_NVCC__
|
||||
case static_cast<int>(ops::TestkindDefaultForDtoD):
|
||||
TestkindDefaultForDtoD();
|
||||
break;
|
||||
#endif
|
||||
case static_cast<int>(ops::TestDtoDonSameDevice):
|
||||
TestDtoDonSameDevice();
|
||||
break;
|
||||
default:{}
|
||||
}
|
||||
}});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyWithStream_TestWithOneStream") {
|
||||
TestwithOnestream();
|
||||
|
||||
@@ -610,8 +610,8 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(bool &val_res) {
|
||||
HIPCHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipMemcpyWithStream_NewMultiThread") {
|
||||
const auto Threadcount{100};
|
||||
TEST_CASE("Unit_hipMemcpyWithStream_MultiThread") {
|
||||
const auto Threadcount{10};
|
||||
bool ret_val[Threadcount];
|
||||
std::thread th[Threadcount];
|
||||
for (int op = static_cast<int>(ops::TestwithOnestream);
|
||||
|
||||
@@ -27,7 +27,7 @@ This testfile verifies the following scenarios of all hipMemcpy API
|
||||
1. Multi thread
|
||||
*/
|
||||
static constexpr auto NUM_ELM{1024};
|
||||
static constexpr auto NUM_THREADS{10};
|
||||
static constexpr auto NUM_THREADS{5};
|
||||
static auto Available_Gpus{0};
|
||||
static constexpr auto MAX_GPU{256};
|
||||
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This testcase verifies the following scenario
|
||||
1. Allocating the memory and modifying it coherently
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
|
||||
constexpr auto wait_sec = 5000;
|
||||
|
||||
__global__ void Kernel(float* hostRes, int clkRate) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
hostRes[tid] = tid + 1;
|
||||
__threadfence_system();
|
||||
// expecting that the data is getting flushed to host here!
|
||||
uint64_t start = clock64()/clkRate, cur;
|
||||
if (clkRate > 1) {
|
||||
do { cur = clock64()/clkRate-start;}while (cur < wait_sec);
|
||||
} else {
|
||||
do { cur = clock64()/start;}while (cur < wait_sec);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipHostMalloc_CoherentAccess") {
|
||||
int blocks = 2;
|
||||
float* hostRes;
|
||||
hipHostMalloc(&hostRes, blocks * sizeof(float),
|
||||
hipHostMallocMapped);
|
||||
hostRes[0] = 0;
|
||||
hostRes[1] = 0;
|
||||
int clkRate;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
std::cout << clkRate << std::endl;
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks),
|
||||
0, 0, hostRes, clkRate);
|
||||
int eleCounter = 0;
|
||||
while (eleCounter < blocks) {
|
||||
// blocks until the value changes
|
||||
while (hostRes[eleCounter] == 0) {printf("waiting for counter inc\n");}
|
||||
eleCounter++;
|
||||
}
|
||||
hipHostFree(reinterpret_cast<void *>(hostRes));
|
||||
}
|
||||
|
||||
@@ -0,0 +1,338 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
Following scenarios are verified for hipPointerGetAttributes API
|
||||
1. Run through a couple simple cases to test lookups host pointer arithmetic
|
||||
2. Allocates memory across all devices withing the specified size range
|
||||
3. Allocates tiny memory across all devices
|
||||
4. Multi-threaded test with many simul allocs.
|
||||
|
||||
*/
|
||||
#include<hip_test_common.hh>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
size_t Nbytes = 0;
|
||||
constexpr size_t N{1000000};
|
||||
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
// Utility Functions:
|
||||
//=================================================================================================
|
||||
|
||||
bool operator==(const hipPointerAttribute_t& lhs,
|
||||
const hipPointerAttribute_t& rhs) {
|
||||
return ((lhs.hostPointer == rhs.hostPointer) &&
|
||||
(lhs.devicePointer == rhs.devicePointer) &&
|
||||
(lhs.memoryType == rhs.memoryType) && (lhs.device == rhs.device) &&
|
||||
(lhs.allocationFlags == rhs.allocationFlags));
|
||||
}
|
||||
|
||||
|
||||
bool operator!=(const hipPointerAttribute_t& lhs,
|
||||
const hipPointerAttribute_t& rhs) {
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
|
||||
const char* memoryTypeToString(hipMemoryType memoryType) {
|
||||
switch (memoryType) {
|
||||
case hipMemoryTypeHost:
|
||||
return "[Host]";
|
||||
case hipMemoryTypeDevice:
|
||||
return "[Device]";
|
||||
default:
|
||||
return "[Unknown]";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void resetAttribs(hipPointerAttribute_t* attribs) {
|
||||
attribs->hostPointer = reinterpret_cast<void*>(-1);
|
||||
attribs->devicePointer = reinterpret_cast<void*>(-1);
|
||||
attribs->memoryType = hipMemoryTypeHost;
|
||||
attribs->device = -2;
|
||||
attribs->isManaged = -1;
|
||||
attribs->allocationFlags = 0xffff;
|
||||
}
|
||||
|
||||
|
||||
void printAttribs(const hipPointerAttribute_t* attribs) {
|
||||
printf(
|
||||
"hostPointer:%p devicePointer:%p memType:%s deviceId:%d isManaged:%d "
|
||||
"allocationFlags:%u\n",
|
||||
attribs->hostPointer, attribs->devicePointer,
|
||||
memoryTypeToString(attribs->memoryType),
|
||||
attribs->device, attribs->isManaged, attribs->allocationFlags);
|
||||
}
|
||||
|
||||
|
||||
inline int zrand(int max) { return rand() % max; }
|
||||
|
||||
|
||||
|
||||
// Store the hipPointer attrib and some extra info
|
||||
// so can later compare the looked-up info against
|
||||
// the reference expectation
|
||||
struct SuperPointerAttribute {
|
||||
void* _pointer;
|
||||
size_t _sizeBytes;
|
||||
hipPointerAttribute_t _attrib;
|
||||
};
|
||||
|
||||
|
||||
// Support function to check result against a reference:
|
||||
void checkPointer(const SuperPointerAttribute& ref, int major,
|
||||
int minor, void* pointer) {
|
||||
hipPointerAttribute_t attribs;
|
||||
resetAttribs(&attribs);
|
||||
|
||||
hipError_t e = hipPointerGetAttributes(&attribs, pointer);
|
||||
if ((e != hipSuccess) || (attribs != ref._attrib)) {
|
||||
HIP_CHECK(e);
|
||||
REQUIRE(attribs != ref._attrib);
|
||||
} else {
|
||||
printf("#%4d.%d GOOD:%p getattr :: ", major, minor, pointer);
|
||||
printAttribs(&attribs);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Test that allocates memory across all devices withing the
|
||||
// specified size range
|
||||
// (minSize...maxSize). Then does lookups to make sure the
|
||||
// info reported by the tracker matches
|
||||
// expecations Then deallocates it all.
|
||||
// Multiple threads can call this function and in fact
|
||||
// we do this in the testMultiThreaded_1 test.
|
||||
void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) {
|
||||
Nbytes = N * sizeof(char);
|
||||
printf("clusterAllocs numAllocs=%d size=%lu..%lu\n",
|
||||
numAllocs, minSize, maxSize);
|
||||
const int Max_Devices = 256;
|
||||
std::vector<SuperPointerAttribute> reference(numAllocs);
|
||||
|
||||
REQUIRE(minSize > 0);
|
||||
REQUIRE(maxSize >= minSize);
|
||||
|
||||
int numDevices;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
|
||||
//---
|
||||
// Populate with device and host allocations.
|
||||
size_t totalDeviceAllocated[Max_Devices];
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
totalDeviceAllocated[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < numAllocs; i++) {
|
||||
unsigned rand_seed = time(NULL);
|
||||
bool isDevice = rand_r(&rand_seed) & 0x1;
|
||||
reference[i]._sizeBytes = zrand(maxSize - minSize) + minSize;
|
||||
|
||||
reference[i]._attrib.device = zrand(numDevices);
|
||||
HIP_CHECK(hipSetDevice(reference[i]._attrib.device));
|
||||
reference[i]._attrib.isManaged = 0;
|
||||
|
||||
void* ptr;
|
||||
if (isDevice) {
|
||||
totalDeviceAllocated[reference[i]._attrib.device] +=
|
||||
reference[i]._sizeBytes;
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ptr),
|
||||
reference[i]._sizeBytes));
|
||||
reference[i]._attrib.memoryType = hipMemoryTypeDevice;
|
||||
reference[i]._attrib.devicePointer = ptr;
|
||||
reference[i]._attrib.hostPointer = NULL;
|
||||
reference[i]._attrib.allocationFlags = 0;
|
||||
} else {
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&ptr),
|
||||
reference[i]._sizeBytes,
|
||||
hipHostMallocDefault));
|
||||
reference[i]._attrib.memoryType = hipMemoryTypeHost;
|
||||
reference[i]._attrib.devicePointer = ptr;
|
||||
reference[i]._attrib.hostPointer = ptr;
|
||||
reference[i]._attrib.allocationFlags = 0;
|
||||
}
|
||||
reference[i]._pointer = ptr;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
printf(
|
||||
" device#%d: hipMemGetInfo: "
|
||||
"free=%zu (%4.2fMB) totalDevice=%lu (%4.2fMB) total=%zu "
|
||||
"(%4.2fMB)\n",
|
||||
i, free, (free / 1024.0 / 1024.0), totalDeviceAllocated[i],
|
||||
(totalDeviceAllocated[i]) / 1024.0 / 1024.0, total,
|
||||
(total / 1024.0 / 1024.0));
|
||||
REQUIRE(free + totalDeviceAllocated[i] <= total);
|
||||
}
|
||||
|
||||
// Now look up each pointer we inserted and verify we can find it:
|
||||
char * ptr;
|
||||
for (int i = 0; i < numAllocs; i++) {
|
||||
SuperPointerAttribute& ref = reference[i];
|
||||
ptr = static_cast<char *>(ref._pointer);
|
||||
checkPointer(ref, i, 0, ref._pointer);
|
||||
checkPointer(ref, i, 1, (ptr +
|
||||
ref._sizeBytes / 2));
|
||||
if (ref._sizeBytes > 1) {
|
||||
checkPointer(ref, i, 2, (ptr +
|
||||
ref._sizeBytes - 1));
|
||||
}
|
||||
|
||||
if (ref._attrib.memoryType == hipMemoryTypeDevice) {
|
||||
hipFree(ref._pointer);
|
||||
} else {
|
||||
hipHostFree(ref._pointer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//========================================================================
|
||||
// Functions to run tests
|
||||
//=======================================================================
|
||||
//--
|
||||
// Run through a couple simple cases to test lookups host pointer arithmetic:
|
||||
TEST_CASE("Unit_hipPointerGetAttributes_Basic") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
Nbytes = N * sizeof(char);
|
||||
printf("\n");
|
||||
printf("=============================================================\n");
|
||||
printf("Simple Tests\n");
|
||||
printf("=============================================================\n");
|
||||
|
||||
char* A_d;
|
||||
char* A_Pinned_h;
|
||||
char* A_OSAlloc_h;
|
||||
hipError_t e;
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&A_Pinned_h), Nbytes,
|
||||
hipHostMallocDefault));
|
||||
A_OSAlloc_h = reinterpret_cast<char*>(malloc(Nbytes));
|
||||
|
||||
size_t free, total;
|
||||
HIP_CHECK(hipMemGetInfo(&free, &total));
|
||||
printf("hipMemGetInfo: free=%zu (%4.2f) Nbytes=%lu total=%zu (%4.2f)\n", free,
|
||||
(free / 1024.0 / 1024.0), Nbytes, total,
|
||||
(total / 1024.0 / 1024.0));
|
||||
REQUIRE(free + Nbytes <= total);
|
||||
|
||||
|
||||
hipPointerAttribute_t attribs;
|
||||
hipPointerAttribute_t attribs2;
|
||||
|
||||
// Device memory
|
||||
printf("\nDevice memory (hipMalloc)\n");
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs, A_d));
|
||||
|
||||
// Check pointer arithmetic cases:
|
||||
resetAttribs(&attribs2);
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + 100));
|
||||
char *ptr = reinterpret_cast<char *>(attribs.devicePointer);
|
||||
REQUIRE(ptr + 100 ==
|
||||
reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
|
||||
// Corner case at end of array:
|
||||
resetAttribs(&attribs2);
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + Nbytes - 1));
|
||||
REQUIRE((ptr + Nbytes - 1) ==
|
||||
reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
|
||||
// Pointer just beyond array must be invalid or at least a different pointer
|
||||
resetAttribs(&attribs2);
|
||||
e = hipPointerGetAttributes(&attribs2, A_d + Nbytes + 1);
|
||||
if (e != hipErrorInvalidValue) {
|
||||
// We might have strayed into another pointer area.
|
||||
REQUIRE(reinterpret_cast<char*>(ptr) !=
|
||||
reinterpret_cast<char*>(attribs2.devicePointer));
|
||||
}
|
||||
|
||||
|
||||
resetAttribs(&attribs2);
|
||||
e = hipPointerGetAttributes(&attribs2, A_d + Nbytes);
|
||||
if (e != hipErrorInvalidValue) {
|
||||
REQUIRE(attribs.devicePointer != attribs2.devicePointer);
|
||||
}
|
||||
hipFree(A_d);
|
||||
e = hipPointerGetAttributes(&attribs, A_d);
|
||||
REQUIRE(e == hipErrorInvalidValue);
|
||||
|
||||
// Device-visible host memory
|
||||
printf("\nDevice-visible host memory (hipHostMalloc)\n");
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs, A_Pinned_h));
|
||||
|
||||
resetAttribs(&attribs2);
|
||||
HIP_CHECK(hipPointerGetAttributes(&attribs2, A_Pinned_h + Nbytes / 2));
|
||||
char *ptr1 = reinterpret_cast<char *>(attribs.hostPointer);
|
||||
REQUIRE((ptr1 + Nbytes / 2)
|
||||
== reinterpret_cast<char*>(attribs2.hostPointer));
|
||||
|
||||
|
||||
hipHostFree(A_Pinned_h);
|
||||
e = hipPointerGetAttributes(&attribs, A_Pinned_h);
|
||||
REQUIRE(e == hipErrorInvalidValue);
|
||||
|
||||
// OS memory
|
||||
printf("\nOS-allocated memory (malloc)\n");
|
||||
e = hipPointerGetAttributes(&attribs, A_OSAlloc_h);
|
||||
REQUIRE(e == hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipPointerGetAttributes_ClusterAlloc") {
|
||||
srand(0x100);
|
||||
printf("\n=============================================\n");
|
||||
clusterAllocs(100, 1024 * 1, 1024 * 1024);
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipPointerGetAttributes_TinyClusterAlloc") {
|
||||
srand(0x200);
|
||||
printf("\n=============================================\n");
|
||||
clusterAllocs(1000, 1, 10); // Many tiny allocations;
|
||||
}
|
||||
|
||||
// Multi-threaded test with many simul allocs.
|
||||
// IN : serialize will force the test to run in serial fashion.
|
||||
TEST_CASE("Unit_hipPointerGetAttributes_MultiThread") {
|
||||
srand(0x300);
|
||||
auto serialize = 1;
|
||||
printf("\n=============================================\n");
|
||||
printf("MultiThreaded_1\n");
|
||||
if (serialize) printf("[SERIALIZE]\n");
|
||||
printf("===============================================\n");
|
||||
std::thread t1(clusterAllocs, 1000, 101, 1000);
|
||||
if (serialize) t1.join();
|
||||
|
||||
std::thread t2(clusterAllocs, 1000, 11, 100);
|
||||
if (serialize) t2.join();
|
||||
|
||||
std::thread t3(clusterAllocs, 1000, 5, 10);
|
||||
if (serialize) t3.join();
|
||||
|
||||
std::thread t4(clusterAllocs, 1000, 1, 4);
|
||||
if (serialize) t4.join();
|
||||
}
|
||||
Reference in New Issue
Block a user