SWDEV-292643 - [dtest] Catch2 unit tests for stream management apis. (#2343)

Catch2 tests for : hipStreamAddCallback, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipExtStreamCreateWithCUMask

Change-Id: Ia99c06b1e97fc945f1a740e47710f4dcd70f38cd

[ROCm/hip commit: 2a17c5662d]
Этот коммит содержится в:
sumanthtg
2021-09-14 13:38:07 +05:30
коммит произвёл GitHub
родитель b8cc5e4a33
Коммит bc851a78ac
6 изменённых файлов: 1411 добавлений и 0 удалений
+18
Просмотреть файл
@@ -1,9 +1,27 @@
if(HIP_PLATFORM MATCHES "amd")
set(TEST_SRC
hipStreamCreate.cc
hipStreamGetFlags.cc
hipStreamGetPriority.cc
hipMultiStream.cc
hipStreamACb_MultiThread.cc
hipStreamAddCallback.cc
hipStreamCreateWithFlags.cc
hipStreamCreateWithPriority.cc
hipStreamWithCUMask.cc
)
else()
set(TEST_SRC
hipStreamCreate.cc
hipStreamGetFlags.cc
hipStreamGetPriority.cc
hipMultiStream.cc
hipStreamACb_MultiThread.cc
hipStreamAddCallback.cc
hipStreamCreateWithFlags.cc
hipStreamCreateWithPriority.cc
)
endif()
# Create shared lib of all tests
add_library(StreamTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
+168
Просмотреть файл
@@ -0,0 +1,168 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenario :
Validate behaviour of HIP when multiple hipStreaAddCallback() are called over
multiple Threads.
*/
#include <hip_test_common.hh>
#include <atomic>
static constexpr size_t N = 4096;
static constexpr int numThreads = 1000;
static std::atomic<int> Cb_count{0}, Data_mismatch{0};
static hipStream_t mystream;
static float *A1_h, *C1_h;
#if HT_AMD
#define HIPRT_CB
#endif
static __global__ void device_function(float* C_d, float* A_d, size_t Num) {
size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x);
size_t stride = blockDim.x * gridDim.x;
for (size_t i = gputhread; i < Num; i += stride) {
C_d[i] = A_d[i] * A_d[i];
}
// Delay thread 1 only in the GPU
if (gputhread == 1) {
uint64_t wait_t = 3200000000, start = clock64(), cur;
do {
cur = clock64() - start;
} while (cur < wait_t);
}
}
static void HIPRT_CB Thread1_Callback(hipStream_t stream, hipError_t status,
void* userData) {
HIPASSERT(stream == mystream);
HIPASSERT(userData == nullptr);
HIPCHECK(status);
for (size_t i = 0; i < N; i++) {
// Validate the data and update Data_mismatch
if (C1_h[i] != A1_h[i] * A1_h[i]) {
Data_mismatch++;
}
}
// Increment the Cb_count to indicate that the callback is processed.
++Cb_count;
}
static void HIPRT_CB Thread2_Callback(hipStream_t stream, hipError_t status,
void* userData) {
HIPASSERT(stream == mystream);
HIPASSERT(userData == nullptr);
HIPCHECK(status);
for (size_t i = 0; i < N; i++) {
// Validate the data and update Data_mismatch
if (C1_h[i] != A1_h[i] * A1_h[i]) {
Data_mismatch++;
}
}
// Increment the Cb_count to indicate that the callback is processed.
++Cb_count;
}
void Thread1_func() {
HIPCHECK(hipStreamAddCallback(mystream, Thread1_Callback, nullptr, 0));
}
void Thread2_func() {
HIPCHECK(hipStreamAddCallback(mystream, Thread2_Callback, nullptr, 0));
}
/**
Test multiple hipStreamAddCallback() called over
multiple Threads.
*/
TEST_CASE("Unit_hipStreamAddCallback_MultipleThreads") {
float *A_d, *C_d;
size_t Nbytes = (N) * sizeof(float);
constexpr float Phi = 1.618f;
A1_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A1_h != nullptr);
C1_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(C1_h != nullptr);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A1_h[i] = Phi + i;
}
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
HIP_CHECK(
hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking));
HIP_CHECK(
hipMemcpyAsync(A_d, A1_h, Nbytes, hipMemcpyHostToDevice,
mystream));
constexpr unsigned threadsPerBlock = 256;
constexpr unsigned blocks = (N + 255)/threadsPerBlock;
hipLaunchKernelGGL((device_function), dim3(blocks),
dim3(threadsPerBlock), 0,
mystream, C_d, A_d, N);
HIP_CHECK(
hipMemcpyAsync(C1_h, C_d, Nbytes,
hipMemcpyDeviceToHost, mystream));
std::thread *T = new std::thread[numThreads];
for (int i = 0; i < numThreads; i++) {
// Use different callback for every even thread
// The callbacks will be added to same stream from different threads
if ((i%2) == 0)
T[i] = std::thread(Thread1_func);
else
T[i] = std::thread(Thread2_func);
}
// Wait until all the threads finish their execution
for (int i = 0; i < numThreads; i++) {
T[i].join();
}
HIP_CHECK(hipStreamSynchronize(mystream));
HIP_CHECK(hipStreamDestroy(mystream));
HIP_CHECK(hipFree(A_d));
HIP_CHECK(hipFree(C_d));
free(A1_h);
free(C1_h);
// Cb_count should match total number of callbacks added from both threads
// Data_mismatch will be updated if there is problem in data validation
REQUIRE(Cb_count.load() == numThreads);
REQUIRE(Data_mismatch.load() == 0);
delete[] T;
}
+231
Просмотреть файл
@@ -0,0 +1,231 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
1) Validates parameter list of hipStreamAddCallback.
2) Validates hipStreamAddCallback functionality with default stream.
3) Validates hipStreamAddCallback functionality with defined stream.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <unistd.h>
#define UNUSED(expr) do { (void)(expr); } while (0)
#ifdef __HIP_PLATFORM_AMD__
#define HIPRT_CB
#endif
namespace hipStreaAddCallbackTest {
size_t NSize = 4 * 1024 * 1024;
float *A_h, *C_h;
bool gcbDone = false;
bool gPassed = true;
void *ptr0xff = reinterpret_cast<void *>(0xffffffff);
void *gusrptr;
hipStream_t gstream;
void HIPRT_CB Callback(hipStream_t stream, hipError_t status,
void* userData) {
UNUSED(stream);
HIP_CHECK(status);
REQUIRE(userData == NULL);
gPassed = true;
for (size_t i = 0; i < NSize; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
UNSCOPED_INFO("Data mismatch :" << i);
gPassed = false;
break;
}
}
gcbDone = true;
}
/**
* Validates functionality of hipStreamAddCallback with default/created stream.
*/
bool testStreamCallbackFunctionality(bool isDefault) {
float *A_d, *C_d;
size_t Nbytes = NSize * sizeof(float);
gcbDone = false;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(C_h != nullptr);
// Fill with Phi + i
for (size_t i = 0; i < NSize; i++) {
A_h[i] = 1.618f + i;
}
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
if (isDefault) {
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
0));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0, 0, A_d, C_d, NSize);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
0));
HIP_CHECK(hipStreamAddCallback(0, Callback, nullptr, 0));
while (!gcbDone) usleep(100000); // Sleep for 100 ms
} else {
hipStream_t mystream;
HIP_CHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking));
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
mystream));
const unsigned blocks = 512;
const unsigned threadsPerBlock = 256;
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0, mystream, A_d, C_d, NSize);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
mystream));
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
while (!gcbDone) usleep(100000); // Sleep for 100 ms
HIP_CHECK(hipStreamDestroy(mystream));
}
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d)));
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d)));
free(C_h);
free(A_h);
return gPassed;
}
/**
* Scenario1: Validates if callback = nullptr returns error code for created stream.
* Scenario2: Validates if callback = nullptr returns error code for default stream.
* Scenario3: Validates if flag != 0 returns error code for created stream.
* Scenario4: Validates if flag != 0 returns error code for default stream.
* Scenario5: Validates if userData pointer is passed properly to callback.
* Scenario6: Validates if stream value is passed properly to callback.
*/
void Callback_ChkUsrdataPtr(hipStream_t stream, hipError_t status,
void* userData) {
REQUIRE(stream == gstream);
HIP_CHECK(status);
gPassed = true;
if (gusrptr != userData) {
gPassed = false;
}
gcbDone = true;
}
void Callback_ChkStreamValue(hipStream_t stream, hipError_t status,
void* userData) {
REQUIRE(userData == nullptr);
HIP_CHECK(status);
gPassed = true;
if (stream != gstream) {
gPassed = false;
}
gcbDone = true;
}
} // namespace hipStreaAddCallbackTest
using hipStreaAddCallbackTest::gcbDone;
using hipStreaAddCallbackTest::gPassed;
using hipStreaAddCallbackTest::ptr0xff;
using hipStreaAddCallbackTest::gusrptr;
using hipStreaAddCallbackTest::gstream;
using hipStreaAddCallbackTest::testStreamCallbackFunctionality;
using hipStreaAddCallbackTest::Callback;
using hipStreaAddCallbackTest::Callback_ChkUsrdataPtr;
using hipStreaAddCallbackTest::Callback_ChkStreamValue;
/*
* Validates parameter list of hipStreamAddCallback.
*/
TEST_CASE("Unit_hipStreamAddCallback_ParamTst") {
hipStream_t mystream;
HIP_CHECK(hipStreamCreate(&mystream));
// Scenario1
SECTION("callback is nullptr") {
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(mystream, nullptr,
nullptr, 0));
}
// Scenario2
SECTION("stream is default") {
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(0, nullptr,
nullptr, 0));
}
// Scenario3
SECTION("flag is nonzero for non-default stream") {
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(mystream, Callback,
nullptr, 10));
}
// Scenario4
SECTION("flag is nonzero for default stream") {
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(0, Callback,
nullptr, 10));
}
// Scenario5
SECTION("userData pointer value validation") {
gstream = mystream;
gusrptr = ptr0xff;
gPassed = true;
gcbDone = false;
HIP_CHECK(hipStreamAddCallback(mystream, Callback_ChkUsrdataPtr,
gusrptr, 0));
while (!gcbDone) {
usleep(100000); // Sleep for 100 ms
}
REQUIRE_FALSE(!gPassed);
}
// Scenario6
SECTION("stream value validation") {
gstream = mystream;
gPassed = true;
gcbDone = false;
HIP_CHECK(hipStreamAddCallback(mystream, Callback_ChkStreamValue,
nullptr, 0));
while (!gcbDone) {
usleep(100000); // Sleep for 100 ms
}
REQUIRE_FALSE(!gPassed);
}
HIP_CHECK(hipStreamDestroy(mystream));
}
/*
* Validates hipStreamAddCallback functionality with default stream.
*/
TEST_CASE("Unit_hipStreamAddCallback_WithDefaultStream") {
bool TestPassed = true;
TestPassed = testStreamCallbackFunctionality(true);
REQUIRE(TestPassed);
}
/*
* Validates hipStreamAddCallback functionality with defined stream.
*/
TEST_CASE("Unit_hipStreamAddCallback_WithCreatedStream") {
bool TestPassed = true;
TestPassed = testStreamCallbackFunctionality(false);
REQUIRE(TestPassed);
}
+39
Просмотреть файл
@@ -0,0 +1,39 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
1) Validates functionality of hipStreamCreateWithFlags when stream = nullptr.
2) Validates functionality of hipStreamCreateWithFlags when flag = 0xffffffff.
*/
#include <hip_test_common.hh>
TEST_CASE("Unit_hipStreamCreateWithFlags_ArgValidation") {
// stream = nullptr test
SECTION("stream is nullptr") {
REQUIRE(hipStreamCreateWithFlags(nullptr, hipStreamDefault) != hipSuccess);
}
// flag value invalid test
SECTION("flag value invalid") {
hipStream_t stream;
REQUIRE(hipStreamCreateWithFlags(&stream, 0xffffffff) != hipSuccess);
}
}
+605
Просмотреть файл
@@ -0,0 +1,605 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
1)Create streams with default flag for all available priority levels and
queue tasks in each of these streams, perform device synchronize and validate behavior.
2)Create streams with non-blocking flag for all available priority levels and
queue tasks in each of these streams, perform stream synchronize and validate behavior.
3)Create streams with default flag for all available priority levels and
queue tasks in each of these streams, perform stream synchronize and validate behavior.
4)Create streams with non-blocking flag for all available priority levels and
queue tasks in each of these streams, perform device synchronize and validate behavior.
5)Create a stream for each priority level with default flag, Launch memcpy and kernel
tasks on these streams from multiple threads. Validate all the results.
6)Create a stream for each priority level with non-blocking flag, Launch memcpy and
kernel tasks on these streams from multiple threads. Validate all the results.
7) Validate negative scenarios for hipStreamCreateWithPriority api.
8) Validate stream priorities with event after classifying them as low, medium, high.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <atomic>
#include <vector>
#define MEMCPYSIZE 64*1024*1024
#define MEMCPYSIZE2 1024*1024
#define NUMITERS 2
#define GRIDSIZE 1024
#define BLOCKSIZE 256
#define TOTALTHREADS 16
namespace hipStreamCreateWithPriorityTest {
std::atomic<int> g_thTestPassed(1);
// helper rountine to initialize memory
template <typename T>
void mem_init(T* buf, size_t n) {
for (size_t i = 0; i < n; i++) {
buf[i] = i;
}
}
// kernel to copy n elements from src to dst
template <typename T>
__global__ void memcpy_kernel(T* dst, T* src, size_t n) {
int num = gridDim.x * blockDim.x;
int id = blockDim.x * blockIdx.x + threadIdx.x;
for (size_t i = id; i < n; i += num) {
dst[i] = src[i];
}
}
/**
* Scenario: Create a stream for all available priority levels
* and queue tasks in each of these streams and default stream.
* Validate the calculated results.
*/
void funcTestsForAllPriorityLevelsWrtNullStrm(unsigned int flags,
bool deviceSynchronize) {
int priority;
int priority_low{};
int priority_high{};
size_t size = MEMCPYSIZE2*sizeof(int);
// Test is to get the Stream Priority Range
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
// Check if priorities are indeed supported
if (priority_low == priority_high) {
WARN("Stream priority range not supported. Skipping test.");
return;
}
int numOfPriorities = priority_low - priority_high;
INFO("numOfPriorities = " << numOfPriorities);
const int arr_size = numOfPriorities + 1;
// 0 idx is for default stream
hipStream_t *stream = reinterpret_cast<hipStream_t*>(
malloc(arr_size*sizeof(hipStream_t)));
REQUIRE(stream != nullptr);
stream[0] = 0;
int count = 1;
// Create a stream for each of the priority levels
for (priority = priority_high; priority < priority_low; priority++) {
HIP_CHECK(hipStreamCreateWithPriority(&stream[count++],
flags, priority));
}
// Allocate memory
int **A_d = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
int **C_d = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
int **A_h = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
int **C_h = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
REQUIRE(A_d != nullptr);
REQUIRE(C_d != nullptr);
REQUIRE(A_h != nullptr);
REQUIRE(C_h != nullptr);
for (int idx = 0; idx < arr_size; idx++) {
A_h[idx] = reinterpret_cast<int*>(malloc(size));
REQUIRE(A_h[idx] != nullptr);
C_h[idx] = reinterpret_cast<int*>(malloc(size));
REQUIRE(C_h[idx] != nullptr);
HIP_CHECK(hipMalloc(&A_d[idx], size));
HIP_CHECK(hipMalloc(&C_d[idx], size));
}
// Initialize host memory
constexpr int initVal = 2;
for (int idx = 0; idx < arr_size; idx++) {
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
A_h[idx][idy] = initVal;
}
}
// Launch task on each stream
for (int idx = 0; idx < arr_size; idx++) {
HIP_CHECK(hipMemcpyAsync(A_d[idx], A_h[idx], size,
hipMemcpyHostToDevice, stream[idx]));
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
dim3(BLOCKSIZE), 0, stream[idx], A_d[idx],
C_d[idx], MEMCPYSIZE2);
HIP_CHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
hipMemcpyDeviceToHost, stream[idx]));
}
if (deviceSynchronize) {
HIP_CHECK(hipDeviceSynchronize());
}
// Validate the output of each queue
for (int idx = 0; idx < arr_size; idx++) {
if (!deviceSynchronize) {
HIP_CHECK(hipStreamSynchronize(stream[idx]));
}
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
if (C_h[idx][idy] != A_h[idx][idy] * A_h[idx][idy]) {
INFO("Data mismatch at idx:" << idx << " idy:" << idy);
REQUIRE(false);
}
}
}
// Deallocate memory
for (int idx = 0; idx < arr_size; idx++) {
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d[idx])));
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d[idx])));
free(C_h[idx]);
free(A_h[idx]);
}
// Destroy the stream for each of the priority levels
count = 1;
for (priority = priority_high; priority < priority_low; priority++) {
HIP_CHECK(hipStreamDestroy(stream[count++]));
}
free(stream);
free(A_d);
free(C_d);
free(A_h);
free(C_h);
}
/**
* Scenario: Queue tasks in each of these streams and default stream.
* Validate the calculated results.
*/
void queueTasksInStreams(std::vector<hipStream_t> *stream,
const int arrsize) {
size_t size = MEMCPYSIZE2*sizeof(int);
// Allocate memory
int **A_d = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
int **C_d = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
int **A_h = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
int **C_h = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
HIPASSERT(A_d != nullptr);
HIPASSERT(C_d != nullptr);
HIPASSERT(A_h != nullptr);
HIPASSERT(C_h != nullptr);
for (int idx = 0; idx < arrsize; idx++) {
A_h[idx] = reinterpret_cast<int*>(malloc(size));
HIPASSERT(A_h[idx] != nullptr);
C_h[idx] = reinterpret_cast<int*>(malloc(size));
HIPASSERT(C_h[idx] != nullptr);
HIPCHECK(hipMalloc(&A_d[idx], size));
HIPCHECK(hipMalloc(&C_d[idx], size));
}
// Initialize host memory
constexpr int initVal = 2;
for (int idx = 0; idx < arrsize; idx++) {
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
A_h[idx][idy] = initVal;
}
}
// Launch task on each stream
for (int idx = 0; idx < arrsize; idx++) {
HIPCHECK(hipMemcpyAsync(A_d[idx], A_h[idx], size,
hipMemcpyHostToDevice, (*stream)[idx]));
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
dim3(BLOCKSIZE), 0, (*stream)[idx], A_d[idx],
C_d[idx], MEMCPYSIZE2);
HIPCHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
hipMemcpyDeviceToHost, (*stream)[idx]));
}
bool isPassed = true;
// Validate the output of each queue
for (int idx = 0; idx < arrsize; idx++) {
HIPCHECK(hipStreamSynchronize((*stream)[idx]));
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
if (C_h[idx][idy] != A_h[idx][idy] * A_h[idx][idy]) {
UNSCOPED_INFO("Data mismatch at idx:" << idx << " idy:" << idy);
isPassed = false;
break;
}
}
if (false == isPassed) break;
}
// Deallocate memory
for (int idx = 0; idx < arrsize; idx++) {
HIPCHECK(hipFree(reinterpret_cast<void*>(C_d[idx])));
HIPCHECK(hipFree(reinterpret_cast<void*>(A_d[idx])));
free(C_h[idx]);
free(A_h[idx]);
}
free(A_d);
free(C_d);
free(A_h);
free(C_h);
g_thTestPassed &= static_cast<int>(isPassed);
}
/**
* Scenario:
* Common streams used across multiple threads:Create a stream for each
* priority level (flag = hipStreamDefault/hipStreamNonBlocking)
* and 1 default stream.
* Launch memcpy and kernel tasks on these streams from multiple threads
* (use 16 threads). Validate all the results.
*/
bool runFuncTestsForAllPriorityLevelsMultThread(unsigned int flags) {
bool TestPassed = true;
std::thread T[TOTALTHREADS];
int priority;
int priority_low;
int priority_high;
// Test is to get the Stream Priority Range
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
// Check if priorities are indeed supported
if (priority_low == priority_high) {
WARN("Stream priority range not supported. Skipping test.");
return true;
}
int numOfPriorities = priority_low - priority_high;
INFO("numOfPriorities : " << numOfPriorities);
// 0 idx is for default stream
std::vector<hipStream_t> stream(numOfPriorities + 1);
stream[0] = 0;
// Create a stream for each of the priority levels
int count = 1;
for (priority = priority_high; priority < priority_low; priority++) {
HIP_CHECK(hipStreamCreateWithPriority(&stream[count++], flags,
priority));
}
for (int i = 0; i < TOTALTHREADS; i++) {
T[i] = std::thread(queueTasksInStreams,
&stream, numOfPriorities + 1);
}
for (int i=0; i < TOTALTHREADS; i++) {
T[i].join();
}
if (g_thTestPassed) {
TestPassed = true;
} else {
TestPassed = false;
}
// Destroy the stream for each of the priority levels
count = 1;
for (priority = priority_high; priority < priority_low; priority++) {
HIP_CHECK(hipStreamDestroy(stream[count++]));
}
return TestPassed;
}
template <typename T>
bool validateStreamPrioritiesWithEvents() {
size_t size = NUMITERS*MEMCPYSIZE;
// get the range of priorities available
#define OP(x) \
int priority_##x; \
bool enable_priority_##x = false;
OP(low)
OP(normal)
OP(high)
#undef OP
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
INFO("HIP stream priority range - low: " << priority_low << ",high: "
<< priority_high << ",normal: "
<< (priority_low + priority_high)/2);
// Check if priorities are indeed supported
if (priority_low == priority_high) {
WARN("Stream priority range not supported. Skipping test.");
return true;
}
// Enable/disable priorities based on number of available priority levels
enable_priority_low = true;
enable_priority_high = true;
if ((priority_low - priority_high) > 1) {
enable_priority_normal = true;
}
if (enable_priority_normal) {
priority_normal = ((priority_low + priority_high) / 2);
}
// create streams with highest and lowest available priorities
#define OP(x)\
hipStream_t stream_##x;\
if (enable_priority_##x) {\
HIP_CHECK(hipStreamCreateWithPriority(&stream_##x, \
hipStreamDefault, priority_##x));\
}
OP(low)
OP(normal)
OP(high)
#undef OP
// allocate and initialise host source and destination buffers
#define OP(x) \
T* src_h_##x; \
T* dst_h_##x; \
if (enable_priority_##x) { \
src_h_##x = reinterpret_cast<T*>(malloc(size)); \
REQUIRE(src_h_##x != nullptr); \
mem_init<T>(src_h_##x, (size / sizeof(T))); \
dst_h_##x = reinterpret_cast<T*>(malloc(size)); \
REQUIRE(dst_h_##x != nullptr); \
memset(dst_h_##x, 0, size); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// allocate and initialize device source and destination buffers
#define OP(x) \
T* src_d_##x; \
T* dst_d_##x; \
if (enable_priority_##x) { \
HIP_CHECK(hipMalloc(&src_d_##x, size)); \
HIP_CHECK( \
hipMemcpy(src_d_##x, src_h_##x, size, hipMemcpyHostToDevice)); \
HIP_CHECK(hipMalloc(&dst_d_##x, size)); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// create events for measuring time spent in kernel execution
#define OP(x) \
hipEvent_t event_start_##x; \
hipEvent_t event_end_##x; \
if (enable_priority_##x) { \
HIP_CHECK(hipEventCreate(&event_start_##x)); \
HIP_CHECK(hipEventCreate(&event_end_##x)); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// record start events for each of the priority streams
#define OP(x) \
if (enable_priority_##x) { \
HIP_CHECK(\
hipEventRecord(event_start_##x, stream_##x)); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// launch kernels repeatedly on each of the prioritiy streams
for (int i = 0; i < static_cast<int>(size); i += MEMCPYSIZE) {
int j = i / sizeof(T);
#define OP(x) \
if (enable_priority_##x) { \
hipLaunchKernelGGL((memcpy_kernel<T>), dim3(GRIDSIZE), \
dim3(BLOCKSIZE), 0, stream_##x, dst_d_##x + j, src_d_##x + j, \
(MEMCPYSIZE / sizeof(T))); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
}
// record end events for each of the priority streams
#define OP(x) \
if (enable_priority_##x) { \
HIP_CHECK(hipEventRecord(event_end_##x, stream_##x)); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// synchronize events for each of the priority streams
#define OP(x) \
if (enable_priority_##x) { \
HIP_CHECK(hipEventSynchronize(event_end_##x)); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// compute time spent for memcpy in each stream
#define OP(x) \
float time_spent_##x; \
if (enable_priority_##x) { \
HIP_CHECK(hipEventElapsedTime(&time_spent_##x, \
event_start_##x, event_end_##x)); \
INFO("time spent for memcpy in " << #x << \
" priority stream: " << time_spent_##x << " ms"); \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// sanity check
#define OP(x) \
if (enable_priority_##x) { \
HIP_CHECK(hipMemcpy(dst_h_##x, dst_d_##x, size, \
hipMemcpyDeviceToHost)); \
if (memcmp(dst_h_##x, src_h_##x, size) != 0) { \
REQUIRE(false); \
} \
}
OP(low)
OP(normal)
OP(high)
#undef OP
// validate that stream priorities are working as expected
#define OP(x, y) \
if (enable_priority_##x && enable_priority_##y) { \
if ((1.05f * time_spent_##x) < time_spent_##y) { \
INFO("time_spent_##x : " << time_spent_##x << \
"time_spent_##y : " << time_spent_##y); \
REQUIRE(false); \
} \
}
OP(low, normal)
OP(normal, high)
OP(low, high)
#undef OP
return true;
}
} // namespace hipStreamCreateWithPriorityTest
/**
Tests following scenarios.
1)Create streams with default flag for all available priority levels and
queue tasks in each of these streams, perform device synchronize and validate behavior.
2)Create streams with non-blocking flag for all available priority levels and
queue tasks in each of these streams, perform stream synchronize and validate behavior.
3)Create streams with default flag for all available priority levels and
queue tasks in each of these streams, perform stream synchronize and validate behavior.
4)Create streams with non-blocking flag for all available priority levels and
queue tasks in each of these streams, perform device synchronize and validate behavior.
*/
TEST_CASE("Unit_hipStreamCreateWithPriority_FunctionalForAllPriorities") {
SECTION("Default flag and device synchronize") {
hipStreamCreateWithPriorityTest::
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamDefault, true);
}
SECTION("Stream non-blocking flag and stream synchronize") {
hipStreamCreateWithPriorityTest::
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamNonBlocking, false);
}
SECTION("Default flag and stream synchronize") {
hipStreamCreateWithPriorityTest::
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamDefault, false);
}
SECTION("Stream non-blocking flag and device synchronize") {
hipStreamCreateWithPriorityTest::
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamNonBlocking, true);
}
}
/**
* Create a stream for each priority level with default flag, Launch memcpy and kernel
* tasks on these streams from multiple threads. Validate all the results.
*/
TEST_CASE("Unit_hipStreamCreateWithPriority_MulthreadDefaultflag") {
bool TestPassed = true;
TestPassed = hipStreamCreateWithPriorityTest::
runFuncTestsForAllPriorityLevelsMultThread(hipStreamDefault);
REQUIRE(TestPassed);
}
/**
* Create a stream for each priority level with non-blocking flag, Launch memcpy and
* kernel tasks on these streams from multiple threads. Validate all the results.
*/
TEST_CASE("Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag") {
bool TestPassed = true;
TestPassed = hipStreamCreateWithPriorityTest::
runFuncTestsForAllPriorityLevelsMultThread(hipStreamNonBlocking);
REQUIRE(TestPassed);
}
/**
* Scenario1: Validates functionality of hipStreamCreateWithPriority when
stream = nullptr.
* Scenario2: Validates functionality of hipStreamCreateWithPriority when
flag = 0xffffffff.
*/
TEST_CASE("Unit_hipStreamCreateWithPriority_NegTst") {
hipStream_t stream;
int priority_low;
int priority_high;
hipError_t ret;
// Test is to get the Stream Priority Range
HIP_CHECK(
hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
// Check if priorities are indeed supported
if (priority_low == priority_high) {
WARN("Stream priority range not supported. Skipping test.");
return; // exit the test since priorities are not supported
}
SECTION("stream = nullptr test") {
ret = hipStreamCreateWithPriority(nullptr, hipStreamDefault,
priority_low);
REQUIRE(ret != hipSuccess);
}
SECTION("flag value invalid test") {
ret = hipStreamCreateWithPriority(&stream, 0xffffffff,
priority_low);
REQUIRE(ret != hipSuccess);
}
}
/**
* Validate stream priorities with event after classifying them as low, medium and high.
*/
TEST_CASE("Unit_hipStreamCreateWithPriority_ValidateWithEvents") {
bool TestPassed = true;
TestPassed = hipStreamCreateWithPriorityTest::
validateStreamPrioritiesWithEvents<int>();
REQUIRE(TestPassed);
}
+350
Просмотреть файл
@@ -0,0 +1,350 @@
/*
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/**
Testcase Scenarios :
1)Validates functionality of hipStreamAddCallback with created stream.
2)Validates functionality of stream with cu mask.
3)Create a stream with all CU masks disabled (0x00000000).
Verify that default CU mask is set for the stream.
4)Size is greater than physical CU number. In this case the extra elements
are ignored and hipExtStreamCreateWithCUMask must return hipSuccess.
5)Negative Testing of hipExtStreamCreateWithCUMask.
*/
#include <hip_test_common.hh>
#include <hip_test_kernels.hh>
#include <unistd.h>
#include <iostream>
#include <vector>
#define NUM_CU_PARTITIONS 4
#define CONSTANT 1.618f
#define SIZE_INBYTES_OF_MB (1024*1024)
#define GRIDSIZE 512
#define BLOCKSIZE 256
#define ZERO_MASK 0x00000000
namespace hipExtStreamCreateWithCUMaskTest {
float *A_h, *C_h;
bool cbDone = false;
bool isPassed = true;
size_t N = 4 * SIZE_INBYTES_OF_MB;
// Make a default CU mask bit-array where all CUs are active
// this default mask is expected to be returned when there is no
// custom or global CU mask defined.
void createDefaultCUMask(std::vector<uint32_t> *pdefaultCUMask,
int numOfCUs) {
uint32_t temp = 0;
uint32_t bit_index = 0;
for (int i = 0; i < numOfCUs; i++) {
temp |= 1UL << bit_index;
if (bit_index >= 32) {
(*pdefaultCUMask).push_back(temp);
temp = 0;
bit_index = 0;
temp |= 1UL << bit_index;
}
bit_index += 1;
}
if (bit_index != 0) {
(*pdefaultCUMask).push_back(temp);
}
}
// Create masks of disabled CU masks.
void createDisabledCUMask(std::vector<uint32_t> *pdisabledCUMask,
int numOfCUs) {
uint32_t temp = ZERO_MASK;
uint32_t bit_index = 0;
for (int i = 0; i < numOfCUs; i++) {
if (bit_index >= 32) {
(*pdisabledCUMask).push_back(temp);
temp = ZERO_MASK;
bit_index = 0;
}
bit_index += 1;
}
if (bit_index != 0) {
(*pdisabledCUMask).push_back(temp);
}
}
void Callback(hipStream_t stream, hipError_t status,
void* userData) {
isPassed = true;
stream = 0;
HIP_CHECK(status);
REQUIRE(userData == nullptr);
for (size_t i = 0; i < N; i++) {
if (C_h[i] != A_h[i] * A_h[i]) {
UNSCOPED_INFO("Data mismatch at index: " << i);
isPassed = false;
break;
}
}
cbDone = true;
}
} // namespace hipExtStreamCreateWithCUMaskTest
using hipExtStreamCreateWithCUMaskTest::A_h;
using hipExtStreamCreateWithCUMaskTest::C_h;
using hipExtStreamCreateWithCUMaskTest::cbDone;
using hipExtStreamCreateWithCUMaskTest::isPassed;
using hipExtStreamCreateWithCUMaskTest::N;
using hipExtStreamCreateWithCUMaskTest::createDefaultCUMask;
using hipExtStreamCreateWithCUMaskTest::createDisabledCUMask;
using hipExtStreamCreateWithCUMaskTest::Callback;
/**
* Scenario: Validates functionality of hipStreamAddCallback with created stream.
*/
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_ValidateCallbackFunc") {
float *A_d, *C_d;
size_t Nbytes = N * sizeof(float);
cbDone = false;
A_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(A_h != nullptr);
C_h = reinterpret_cast<float*>(malloc(Nbytes));
REQUIRE(C_h != nullptr);
// Fill with Phi + i
for (size_t i = 0; i < N; i++) {
A_h[i] = CONSTANT + i;
}
HIP_CHECK(hipMalloc(&A_d, Nbytes));
HIP_CHECK(hipMalloc(&C_d, Nbytes));
hipStream_t mystream;
std::vector<uint32_t> defaultCUMask;
HIP_CHECK(hipSetDevice(0));
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, 0));
createDefaultCUMask(&defaultCUMask, props.multiProcessorCount);
hipExtStreamCreateWithCUMask(&mystream, defaultCUMask.size(),
defaultCUMask.data());
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
mystream));
const unsigned blocks = GRIDSIZE;
const unsigned threadsPerBlock = BLOCKSIZE;
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0, mystream, A_d, C_d, N);
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
mystream));
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
while (!cbDone) usleep(100000); // Sleep for 100 ms
HIP_CHECK(hipStreamDestroy(mystream));
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d)));
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d)));
free(C_h);
free(A_h);
REQUIRE(isPassed == true);
}
/**
* Scenario: Validates functionality of stream with cu mask.
*/
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") {
const int KNumPartition = NUM_CU_PARTITIONS;
float *dA[KNumPartition], *dC[KNumPartition];
float *hA, *hC;
size_t N = 25*SIZE_INBYTES_OF_MB;
size_t Nbytes = N * sizeof(float);
std::vector<hipStream_t> streams(KNumPartition);
std::vector<std::vector<uint32_t>> cuMasks(KNumPartition);
std::stringstream ss[KNumPartition];
int nGpu = 0;
HIP_CHECK(hipGetDeviceCount(&nGpu));
if (nGpu < 1) {
WARN("Didn't find any GPU! skipping the test!");
return;
}
static int device = 0;
HIP_CHECK(hipSetDevice(device));
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, device));
INFO("info: running on bus " << "0x" << props.pciBusID << " "
<< props.name << " with " << props.multiProcessorCount << " CUs");
hA = new float[Nbytes];
REQUIRE(hA != nullptr);
hC = new float[Nbytes];
REQUIRE(hC != nullptr);
for (size_t i = 0; i < N; i++) {
hA[i] = CONSTANT + i;
}
for (int np = 0; np < KNumPartition; np++) {
HIP_CHECK(hipMalloc(&dA[np], Nbytes));
HIP_CHECK(hipMalloc(&dC[np], Nbytes));
// make unique CU masks in the multiple of dwords for each stream
uint32_t temp = 0;
uint32_t bit_index = np;
for (int i = np; i < props.multiProcessorCount; i = i + 4) {
temp |= 1UL << bit_index;
if (bit_index >= 32) {
cuMasks[np].push_back(temp);
temp = 0;
bit_index = np;
temp |= 1UL << bit_index;
}
bit_index += 4;
}
if (bit_index != 0) {
cuMasks[np].push_back(temp);
}
HIP_CHECK(hipExtStreamCreateWithCUMask(&streams[np], cuMasks[np].size(),
cuMasks[np].data()));
HIP_CHECK(hipMemcpy(dA[np], hA, Nbytes, hipMemcpyHostToDevice));
ss[np] << std::hex;
for (int i = cuMasks[np].size() - 1; i >= 0; i--) {
ss[np] << cuMasks[np][i];
}
}
const unsigned blocks = GRIDSIZE;
const unsigned threadsPerBlock = BLOCKSIZE;
auto single_start = std::chrono::steady_clock::now();
INFO("info: launch 'vector_square' kernel on one stream " <<
streams[0] << " with CU mask: 0x" << ss[0].str().c_str());
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, streams[0], dA[0], dC[0], N);
hipDeviceSynchronize();
auto single_end = std::chrono::steady_clock::now();
std::chrono::duration<double> single_kernel_time = single_end - single_start;
HIP_CHECK(hipMemcpy(hC, dC[0], Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
REQUIRE(hC[i] == (hA[i] * hA[i]));
}
INFO("info: launch 'vector_square' kernel on "
<< KNumPartition << " streams:");
auto all_start = std::chrono::steady_clock::now();
for (int np = 0; np < KNumPartition; np++) {
INFO("info: launch 'vector_square' kernel on the stream "
<< streams[np] << " with CU mask: 0x" << ss[np].str().c_str());
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, streams[np], dA[np], dC[np], N);
}
hipDeviceSynchronize();
auto all_end = std::chrono::steady_clock::now();
std::chrono::duration<double> all_kernel_time = all_end - all_start;
for (int np = 0; np < KNumPartition; np++) {
HIP_CHECK(hipMemcpy(hC, dC[np], Nbytes, hipMemcpyDeviceToHost));
for (size_t i = 0; i < N; i++) {
REQUIRE(hC[i] == (hA[i] * hA[i]));
}
}
INFO("info: kernel launched on one stream took: " <<
single_kernel_time.count() << " seconds");
INFO("info: kernels launched on " << KNumPartition <<
" streams took: " << all_kernel_time.count() << " seconds");
INFO("info: launching kernels on " << KNumPartition <<
" streams asynchronously is " <<
single_kernel_time.count() / (all_kernel_time.count() / KNumPartition)
<< " times faster per stream than launching on one stream alone");
delete [] hA;
delete [] hC;
for (int np = 0; np < KNumPartition; np++) {
hipFree(dC[np]);
hipFree(dA[np]);
HIP_CHECK(hipStreamDestroy(streams[np]));
}
}
/**
* Scenario: Create a stream with all CU masks disabled (0x00000000).
* Verify that default CU mask is set for the stream.
*/
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_AllCUsMasked") {
HIP_CHECK(hipSetDevice(0));
hipDeviceProp_t props;
HIP_CHECK(hipGetDeviceProperties(&props, 0));
// make a CU mask with all CUs disabled.
std::vector<uint32_t> allCUDisabled;
createDisabledCUMask(&allCUDisabled, props.multiProcessorCount);
hipStream_t stream;
HIP_CHECK(hipExtStreamCreateWithCUMask(&stream, allCUDisabled.size(),
allCUDisabled.data()));
// Verify whether default CU mask is set for the stream.
uint32_t size = (props.multiProcessorCount / 32) + 1;
std::vector<uint32_t> cuMask(size);
std::vector<uint32_t> defaultCUMask;
createDefaultCUMask(&defaultCUMask, props.multiProcessorCount);
HIP_CHECK(hipExtStreamGetCUMask(stream, cuMask.size(), &cuMask[0]));
for (int i = 0; i < static_cast<int>(defaultCUMask.size()); i++) {
REQUIRE(defaultCUMask[i] == cuMask[i]);
}
HIP_CHECK(hipStreamDestroy(stream));
}
/**
* Scenario: Negative Testing of hipExtStreamCreateWithCUMask.
*/
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_NegTst") {
std::vector<uint32_t> defaultCUMask;
REQUIRE(hipSuccess == hipSetDevice(0));
hipDeviceProp_t props;
REQUIRE(hipSuccess == hipGetDeviceProperties(&props, 0));
createDefaultCUMask(&defaultCUMask, props.multiProcessorCount);
hipStream_t stream;
// Negative Scenario 1: stream = nullptr
SECTION("stream is nullptr") {
REQUIRE_FALSE(hipExtStreamCreateWithCUMask(nullptr,
defaultCUMask.size(),
defaultCUMask.data()) == hipSuccess);
}
// Negative Scenario 2: cuMaskSize = 0
SECTION("cuMaskSize is 0") {
REQUIRE_FALSE(hipExtStreamCreateWithCUMask(&stream, 0,
defaultCUMask.data()) == hipSuccess);
}
// Negative Scenario 3: cuMask = nullptr
SECTION("cuMask is nullptr") {
REQUIRE_FALSE(hipExtStreamCreateWithCUMask(&stream,
defaultCUMask.size(),
nullptr) == hipSuccess);
}
}