Files

832 regels
26 KiB
C++

/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip_test_helper.hh>
#include <utils.hh>
#include <hip_test_process.hh>
static constexpr int N = 2 * 1024 * 1024;
static constexpr size_t NBYTES = N * sizeof(int);
/**
* Local Function to fill the array with given value
*/
static void fillHostArray(int* arr, int size, int value) {
for (int i = 0; i < size; i++) {
arr[i] = value;
}
}
/**
* Kernel to fill the array with given value
*/
static __global__ void fillArray(int* arr, int size, int value) {
for (int i = 0; i < size; i++) {
arr[i] = value;
}
}
/**
* Local Function to fill the device array with given value
*/
static void fillDeviceArray(int* arr, int size, int value) {
fillArray<<<1, 1>>>(arr, size, value);
}
/**
* In addOneKernel function, all elements of the array a increased by 1
*/
static __global__ void addOneKernel(int* a, int size) {
int offset = blockDim.x * blockIdx.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = offset; i < size; i += stride) {
a[i] += 1;
}
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario:-
* - Do Memory copy Asynchronously H2D in user defined blocking stream,
* - do Memory copy Asynchronously D2H in Legacy stream.
* - The task 2 which is in legacy stream should wait till the
* - task 1 completes its execution which is in userdefined stream.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_WithBlockingStream") {
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 1);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
fillDeviceArray(devArr, N, 2);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 3);
hipStream_t stream;
HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamDefault));
HIP_CHECK(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i] << " Expected value : 1 \n");
REQUIRE(hostArrDst[i] == 1);
}
HIP_CHECK(hipStreamDestroy(stream));
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}
/**
* Local Function to perform the below operations:-
* Do Memory copy Asynchronously H2D in Legacy stream
* and then do Memory copy Asynchronously D2H in Legacy stream.
* Task 2 should wait till the execution of task 1.
*/
void launchFunction(hipStream_t stream) {
int* hostArrSrc = new int[N];
REQUIRE_THREAD(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 5);
int* devArr = nullptr;
HIP_CHECK_THREAD(hipMalloc(&devArr, NBYTES));
REQUIRE_THREAD(devArr != nullptr);
fillDeviceArray(devArr, N, 6);
int* hostArrDst = new int[N];
REQUIRE_THREAD(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 7);
HIP_CHECK_THREAD(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, stream));
HIP_CHECK_THREAD(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, stream));
HIP_CHECK_THREAD(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
if (hostArrDst[i] != 5) {
std::cout << "At index : " << i << " Got value : " << hostArrDst[i]
<< " Expected value : 5 \n"
<< std::endl;
REQUIRE_THREAD(false);
}
}
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK_THREAD(hipFree(devArr));
}
/**
* Test Description
* ------------------------
* - This test case tests hipStreamLegacy in multi threaded scenario:-
* - All threads should launch successfully and run independently
* - and uses the same legacy stream.
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_MultipleThreads") {
const unsigned int threadsSupported = std::thread::hardware_concurrency();
const int numberOfThreads = (threadsSupported >= 10) ? 10 : threadsSupported;
std::vector<std::thread> threads;
for (int t = 0; t < numberOfThreads; t++) {
threads.push_back(std::thread(launchFunction, hipStreamLegacy));
}
for (int t = 0; (t < numberOfThreads) && (t < threads.size()); t++) {
threads[t].join();
}
HIP_CHECK_THREAD_FINALIZE();
}
/**
* Test Description
* ------------------------
* - Pass the hipStreamLegacy to hipStreamBeginCapture() api
* - and the api should return hipErrorStreamCaptureUnsupported.
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_NegetiveCase") {
hipStream_t stream = hipStreamLegacy;
REQUIRE(hipStreamBeginCapture(stream, hipStreamCaptureModeGlobal) ==
hipErrorStreamCaptureUnsupported);
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario:-
* - Do Memory copy Asynchronously H2D in user defined Non-blocking stream
* - and do Memory copy Asynchronously D2H in Legacy stream.
* - The task 2 which is in legacy stream should not wait till the
* - task 1 completes its execution, two streams should run concurrently.
* - And the host thread should wait for the task 2 which is in legacy stream.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_WithNonBlockingStream") {
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 10);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
fillDeviceArray(devArr, N, 11);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 12);
hipStream_t stream;
HIP_CHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
HIP_CHECK(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i] << " Expected value : 10 or 11 \n");
REQUIRE(((hostArrDst[i] == 10) || (hostArrDst[i] == 11)));
}
HIP_CHECK(hipStreamDestroy(stream));
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario:-
* - Do Memory copy Asynchronously H2D using hipStreamPerThread
* - and do Memory copy Asynchronously D2H using hipStreamLegacy.
* - The task 2 which is in legacy stream should wait till the
* - task 1 completes its execution which is in hipStreamPerThread.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_WithStreamPerThread") {
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 15);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
fillDeviceArray(devArr, N, 16);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
HIP_CHECK(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, hipStreamPerThread));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i] << " Expected value : 15 \n");
REQUIRE(hostArrDst[i] == 15);
}
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario with all available devices :-
* - Do Memory copy Asynchronously H2D in Legacy stream
* - and do Memory copy Asynchronously D2H in Legacy stream.
* - The task 2 which is in legacy stream should wait till the
* - task 1 completes its execution, in all the devices.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_MultiDevice", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Skipping because this machine has total GPUs < 2");
return;
}
for (int deviceId = 0; deviceId < deviceCount; deviceId++) {
HIP_CHECK(hipSetDevice(deviceId));
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 20);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
fillDeviceArray(devArr, N, 21);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 22);
HIP_CHECK(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, hipStreamLegacy));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i]
<< " Expected value : 20 "
" For deviceId : "
<< deviceId << "\n");
REQUIRE(hostArrDst[i] == 20);
}
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}
}
/**
* Test Description
* ------------------------
* - This test case testing the hipStreamLegacy with hipMemcpyAsync
* - in all the ways like H2H, H2D, D2D, D2H, also with the hipMemcpyDefault.
* - All the operations should success.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_H2H_H2D_D2D_D2H_Default") {
int* hostArr1 = new int[N];
REQUIRE(hostArr1 != nullptr);
fillHostArray(hostArr1, N, 30);
int* hostArr2 = new int[N];
REQUIRE(hostArr2 != nullptr);
fillHostArray(hostArr2, N, 31);
int* devArr1 = nullptr;
HIP_CHECK(hipMalloc(&devArr1, NBYTES));
REQUIRE(devArr1 != nullptr);
fillDeviceArray(devArr1, N, 32);
int* devArr2 = nullptr;
HIP_CHECK(hipMalloc(&devArr2, NBYTES));
REQUIRE(devArr2 != nullptr);
fillDeviceArray(devArr2, N, 33);
int* hostArr3 = new int[N];
REQUIRE(hostArr3 != nullptr);
fillHostArray(hostArr3, N, 34);
int* hostArr4 = new int[N];
REQUIRE(hostArr4 != nullptr);
fillHostArray(hostArr4, N, 35);
HIP_CHECK(hipMemcpyAsync(hostArr2, hostArr1, NBYTES, hipMemcpyHostToHost, hipStreamLegacy));
HIP_CHECK(hipMemcpyAsync(devArr1, hostArr2, NBYTES, hipMemcpyHostToDevice, hipStreamLegacy));
HIP_CHECK(hipMemcpyAsync(devArr2, devArr1, NBYTES, hipMemcpyDeviceToDevice, hipStreamLegacy));
HIP_CHECK(hipMemcpyAsync(hostArr3, devArr2, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipMemcpyAsync(hostArr4, hostArr3, NBYTES, hipMemcpyDefault, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArr4[i] << " Expected value : 30 \n");
REQUIRE(hostArr4[i] == 30);
}
delete[] hostArr1;
delete[] hostArr2;
HIP_CHECK(hipFree(devArr1));
HIP_CHECK(hipFree(devArr2));
delete[] hostArr3;
delete[] hostArr4;
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario with two devices:-
* - h1Dev0 -> d1Dev0 : memcpy from host to device 0
* - d1Dev0 -> d1Dev1 : memcpy from device 0 to device 1
* - d1Dev1 -> d2Dev1 : memcpy from device to device with in device 1
* - d2Dev1 -> d2Dev0 : memcpy from device 1 to device 0
* - d2Dev0 -> h2Dev0 : memcpy from device 0 to host
* - The opeations in device 0 and device 1 should run concurrently.
* - Multiple opeations in multiple devices should run without
* - any conflicts.
* - And the final host array should have value other than 45,
* - since host thread waits.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_MultiDeviceMultiOperation", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Skipping because this machine has total GPUs < 2");
return;
}
int currentDevice = 0;
int peerDevice = 1;
// Set arrays in device 0
HIP_CHECK(hipSetDevice(currentDevice));
int* h1Dev0 = new int[N];
REQUIRE(h1Dev0 != nullptr);
fillHostArray(h1Dev0, N, 40);
int* d1Dev0 = nullptr;
HIP_CHECK(hipMalloc(&d1Dev0, NBYTES));
REQUIRE(d1Dev0 != nullptr);
fillDeviceArray(d1Dev0, N, 41);
// Set arrays in device 1
HIP_CHECK(hipSetDevice(peerDevice));
int* d1Dev1 = nullptr;
HIP_CHECK(hipMalloc(&d1Dev1, NBYTES));
REQUIRE(d1Dev1 != nullptr);
fillDeviceArray(d1Dev1, N, 42);
int* d2Dev1 = nullptr;
HIP_CHECK(hipMalloc(&d2Dev1, NBYTES));
REQUIRE(d2Dev1 != nullptr);
fillDeviceArray(d2Dev1, N, 43);
// Set destination arrays in device 0
HIP_CHECK(hipSetDevice(currentDevice));
int* d2Dev0 = nullptr;
HIP_CHECK(hipMalloc(&d2Dev0, NBYTES));
REQUIRE(d2Dev0 != nullptr);
fillDeviceArray(d2Dev0, N, 44);
int* h2Dev0 = new int[N];
REQUIRE(h2Dev0 != nullptr);
fillHostArray(h2Dev0, N, 45);
// Do operations in current device
HIP_CHECK(hipSetDevice(currentDevice));
HIP_CHECK(hipMemcpyAsync(d1Dev0, h1Dev0, NBYTES, hipMemcpyHostToHost, hipStreamLegacy));
// Copy from current device to peer device
HIP_CHECK(hipMemcpyPeerAsync(d1Dev1, peerDevice, // des
d1Dev0, currentDevice, // src
NBYTES, hipStreamLegacy));
// Do operations in peer device
HIP_CHECK(hipSetDevice(peerDevice));
HIP_CHECK(hipMemcpyAsync(d2Dev1, d1Dev1, NBYTES, hipMemcpyDeviceToDevice, hipStreamLegacy));
// Copy from peer device to current device
HIP_CHECK(hipMemcpyPeerAsync(d2Dev0, currentDevice, // des
d2Dev1, peerDevice, // src
NBYTES, hipStreamLegacy));
// Finally copy daat to hostArr4
HIP_CHECK(hipSetDevice(currentDevice));
HIP_CHECK(hipMemcpyAsync(h2Dev0, d2Dev0, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << h2Dev0[i]
<< " Expected value : 40/41/42/43/44 \n");
REQUIRE(h2Dev0[i] != 45);
}
HIP_CHECK(hipSetDevice(currentDevice));
delete[] h1Dev0;
delete[] h2Dev0;
HIP_CHECK(hipFree(d1Dev0));
HIP_CHECK(hipFree(d2Dev0));
HIP_CHECK(hipSetDevice(peerDevice));
HIP_CHECK(hipFree(d1Dev1));
HIP_CHECK(hipFree(d2Dev1));
HIP_CHECK(hipSetDevice(currentDevice));
}
/*
* Local helper function to copy data from host to device
*/
static void copyFromHostToDevice(int* hostArr, int* devArr) {
HIP_CHECK(hipMemcpyAsync(devArr, hostArr, NBYTES, hipMemcpyHostToDevice, hipStreamLegacy));
}
/*
* Local helper function to copy data from device to host
*/
static void copyFromDeviceToHost(int* devArr, int* hostArr) {
HIP_CHECK(hipMemcpyAsync(hostArr, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario:-
* - Launch two threads,
* - In thread 1 : H -> D Copy
* - In thread 2 : D -> H Copy
* - These two thredas should run sequentially as they are using one stream.
* - Note : Joined first thread before launching second thread just to avoid,
* - the scenario of second thread can be launched before first
* - thread while scheduling.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_TwoThreadsEachOneDiffOperation") {
const unsigned int threadsSupported = std::thread::hardware_concurrency();
if (threadsSupported < 2) {
HipTest::HIP_SKIP_TEST(
"Skipping due to machine does't "
"support two concurrent threads");
return;
}
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 50);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
fillDeviceArray(devArr, N, 51);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 52);
std::thread H2D_Thread(copyFromHostToDevice, hostArrSrc, devArr);
H2D_Thread.join();
std::thread D2H_Thread(copyFromDeviceToHost, devArr, hostArrDst);
D2H_Thread.join();
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i] << " Expected value : 50 \n");
REQUIRE(hostArrDst[i] == 50);
}
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}
/**
* Test Description
* ------------------------
* - This test case tests the following scenario with two devices:-
* - devArrDev0 -> devArrDev1 : memcpy from device 0 to device 1
* - devArrDev1 -> hostArrDst : memcpy from device 1 to host
* - The opeations in device 0 and device 1 should run concurrently.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_TwoDevicesEachOneDiffOperation", "[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Skipping because this machine has total GPUs < 2");
return;
}
// Set arrays in device 0
HIP_CHECK(hipSetDevice(0));
int* devArrDev0 = nullptr;
HIP_CHECK(hipMalloc(&devArrDev0, NBYTES));
REQUIRE(devArrDev0 != nullptr);
fillDeviceArray(devArrDev0, N, 500);
// Set arrays in device 1
HIP_CHECK(hipSetDevice(1));
int* devArrDev1 = nullptr;
HIP_CHECK(hipMalloc(&devArrDev1, NBYTES));
REQUIRE(devArrDev1 != nullptr);
fillDeviceArray(devArrDev1, N, 501);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 502);
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipMemcpyPeerAsync(devArrDev1, 1, // des
devArrDev0, 0, // src
NBYTES, hipStreamLegacy));
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArrDev1, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i]
<< " Expected value : 500 or 501 \n");
REQUIRE(((hostArrDst[i] == 500) || (hostArrDst[i] == 501)));
}
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipFree(devArrDev1));
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipFree(devArrDev0));
delete[] hostArrDst;
}
/*
* Local helper function to copy data from device 0 to device 1
*/
static void operationsInDev0(int* devArrDev0, int* devArrDev1) {
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipMemcpyPeerAsync(devArrDev1, 1, // des
devArrDev0, 0, // src
NBYTES, hipStreamLegacy));
}
/*
* Local helper function to copy data from device to host
*/
static void operationsInDev1(int* devArrDev1, int* hostArrDst) {
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArrDev1, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
}
/**
* Test Description
* ------------------------
* - This test case tests the below scenario with two devices in two threads:-
* - In thread 1 Dev 0 : devArrDev0 -> devArrDev1 : memcpy from dev 0 to dev 1
* - In thread 2 Dev 1 : devArrDev1 -> hostArrDst : memcpy from dev 1 to host
* - The opeations in device 0 and device 1, thread 1 and thread 2
* - should run concurrently.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_TwoThreadsInTwoDevicesEachOneDiffOperation",
"[multigpu]") {
int deviceCount = 0;
HIP_CHECK(hipGetDeviceCount(&deviceCount));
if (deviceCount < 2) {
HipTest::HIP_SKIP_TEST("Skipping because this machine has total GPUs < 2");
return;
}
// Set arrays in device 0
HIP_CHECK(hipSetDevice(0));
int* devArrDev0 = nullptr;
HIP_CHECK(hipMalloc(&devArrDev0, NBYTES));
REQUIRE(devArrDev0 != nullptr);
fillDeviceArray(devArrDev0, N, 999);
// Set arrays in device 1
HIP_CHECK(hipSetDevice(1));
int* devArrDev1 = nullptr;
HIP_CHECK(hipMalloc(&devArrDev1, NBYTES));
REQUIRE(devArrDev1 != nullptr);
fillDeviceArray(devArrDev1, N, 888);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 777);
HIP_CHECK(hipSetDevice(0));
std::thread dev0Thread(operationsInDev0, devArrDev0, devArrDev1);
dev0Thread.join();
std::thread dev1Thread(operationsInDev1, devArrDev1, hostArrDst);
dev1Thread.join();
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i]
<< " Expected value : 999 or 888 \n");
REQUIRE(((hostArrDst[i] == 999) || (hostArrDst[i] == 888)));
}
HIP_CHECK(hipSetDevice(1));
HIP_CHECK(hipFree(devArrDev1));
HIP_CHECK(hipSetDevice(0));
HIP_CHECK(hipFree(devArrDev0));
delete[] hostArrDst;
}
/**
* Test Description
* ------------------------
* - This test creates the child process and use the hipStreamLegacy flag
* - in the child process. The hipStreamLegacy flag should work properly
* - in child process.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_InChildProcess") {
hip::SpawnProc proc("hipStreamLegacy_exe", true);
REQUIRE(proc.run() == 0);
}
/**
* Test Description
* ------------------------
* - This test case, tests the hipStreamLegacy with Kernel.
* - Kernel launch should success and should give proper result.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_WithKernel") {
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 1);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 6);
HIP_CHECK(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, hipStreamLegacy));
addOneKernel<<<1, 1, 0, hipStreamLegacy>>>(devArr, N);
addOneKernel<<<1, 1, 0, hipStreamLegacy>>>(devArr, N);
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i] << " Expected value : 3 \n");
REQUIRE(hostArrDst[i] == 3);
}
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}
/**
* Test Description
* ------------------------
* - This test case, tests the hipStreamSynchronize with
* - the hipStreamLegacy. It should work without any conflicts.
* Test source
* ------------------------
* - unit/stream/hipStreamLegacy_Ext.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.3
*/
TEST_CASE("Unit_hipStreamLegacy_hipStreamSynchronize") {
int* hostArrSrc = new int[N];
REQUIRE(hostArrSrc != nullptr);
fillHostArray(hostArrSrc, N, 1);
int* devArr = nullptr;
HIP_CHECK(hipMalloc(&devArr, NBYTES));
REQUIRE(devArr != nullptr);
int* hostArrDst = new int[N];
REQUIRE(hostArrDst != nullptr);
fillHostArray(hostArrDst, N, 3);
HIP_CHECK(hipMemcpyAsync(devArr, hostArrSrc, NBYTES, hipMemcpyHostToDevice, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
HIP_CHECK(hipMemcpyAsync(hostArrDst, devArr, NBYTES, hipMemcpyDeviceToHost, hipStreamLegacy));
HIP_CHECK(hipStreamSynchronize(hipStreamLegacy));
for (int i = 0; i < N; i++) {
INFO("At index : " << i << " Got value : " << hostArrDst[i] << " Expected value : 1 \n");
REQUIRE(hostArrDst[i] == 1);
}
delete[] hostArrSrc;
delete[] hostArrDst;
HIP_CHECK(hipFree(devArr));
}