SWDEV-292643 - [dtest] Catch2 unit tests for stream management apis. (#2343)
Catch2 tests for : hipStreamAddCallback, hipStreamCreateWithFlags, hipStreamCreateWithPriority, hipExtStreamCreateWithCUMask
Change-Id: Ia99c06b1e97fc945f1a740e47710f4dcd70f38cd
[ROCm/hip commit: 2a17c5662d]
Этот коммит содержится в:
@@ -1,9 +1,27 @@
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC
|
||||
hipStreamCreate.cc
|
||||
hipStreamGetFlags.cc
|
||||
hipStreamGetPriority.cc
|
||||
hipMultiStream.cc
|
||||
hipStreamACb_MultiThread.cc
|
||||
hipStreamAddCallback.cc
|
||||
hipStreamCreateWithFlags.cc
|
||||
hipStreamCreateWithPriority.cc
|
||||
hipStreamWithCUMask.cc
|
||||
)
|
||||
else()
|
||||
set(TEST_SRC
|
||||
hipStreamCreate.cc
|
||||
hipStreamGetFlags.cc
|
||||
hipStreamGetPriority.cc
|
||||
hipMultiStream.cc
|
||||
hipStreamACb_MultiThread.cc
|
||||
hipStreamAddCallback.cc
|
||||
hipStreamCreateWithFlags.cc
|
||||
hipStreamCreateWithPriority.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
# Create shared lib of all tests
|
||||
add_library(StreamTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenario :
|
||||
Validate behaviour of HIP when multiple hipStreaAddCallback() are called over
|
||||
multiple Threads.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <atomic>
|
||||
|
||||
static constexpr size_t N = 4096;
|
||||
static constexpr int numThreads = 1000;
|
||||
static std::atomic<int> Cb_count{0}, Data_mismatch{0};
|
||||
static hipStream_t mystream;
|
||||
static float *A1_h, *C1_h;
|
||||
|
||||
#if HT_AMD
|
||||
#define HIPRT_CB
|
||||
#endif
|
||||
|
||||
static __global__ void device_function(float* C_d, float* A_d, size_t Num) {
|
||||
size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = gputhread; i < Num; i += stride) {
|
||||
C_d[i] = A_d[i] * A_d[i];
|
||||
}
|
||||
|
||||
// Delay thread 1 only in the GPU
|
||||
if (gputhread == 1) {
|
||||
uint64_t wait_t = 3200000000, start = clock64(), cur;
|
||||
do {
|
||||
cur = clock64() - start;
|
||||
} while (cur < wait_t);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void HIPRT_CB Thread1_Callback(hipStream_t stream, hipError_t status,
|
||||
void* userData) {
|
||||
HIPASSERT(stream == mystream);
|
||||
HIPASSERT(userData == nullptr);
|
||||
HIPCHECK(status);
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
// Validate the data and update Data_mismatch
|
||||
if (C1_h[i] != A1_h[i] * A1_h[i]) {
|
||||
Data_mismatch++;
|
||||
}
|
||||
}
|
||||
|
||||
// Increment the Cb_count to indicate that the callback is processed.
|
||||
++Cb_count;
|
||||
}
|
||||
|
||||
static void HIPRT_CB Thread2_Callback(hipStream_t stream, hipError_t status,
|
||||
void* userData) {
|
||||
HIPASSERT(stream == mystream);
|
||||
HIPASSERT(userData == nullptr);
|
||||
HIPCHECK(status);
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
// Validate the data and update Data_mismatch
|
||||
if (C1_h[i] != A1_h[i] * A1_h[i]) {
|
||||
Data_mismatch++;
|
||||
}
|
||||
}
|
||||
|
||||
// Increment the Cb_count to indicate that the callback is processed.
|
||||
++Cb_count;
|
||||
}
|
||||
|
||||
void Thread1_func() {
|
||||
HIPCHECK(hipStreamAddCallback(mystream, Thread1_Callback, nullptr, 0));
|
||||
}
|
||||
|
||||
void Thread2_func() {
|
||||
HIPCHECK(hipStreamAddCallback(mystream, Thread2_Callback, nullptr, 0));
|
||||
}
|
||||
|
||||
/**
|
||||
Test multiple hipStreamAddCallback() called over
|
||||
multiple Threads.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamAddCallback_MultipleThreads") {
|
||||
float *A_d, *C_d;
|
||||
size_t Nbytes = (N) * sizeof(float);
|
||||
constexpr float Phi = 1.618f;
|
||||
|
||||
A1_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A1_h != nullptr);
|
||||
C1_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(C1_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A1_h[i] = Phi + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
HIP_CHECK(
|
||||
hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking));
|
||||
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(A_d, A1_h, Nbytes, hipMemcpyHostToDevice,
|
||||
mystream));
|
||||
|
||||
constexpr unsigned threadsPerBlock = 256;
|
||||
constexpr unsigned blocks = (N + 255)/threadsPerBlock;
|
||||
|
||||
hipLaunchKernelGGL((device_function), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0,
|
||||
mystream, C_d, A_d, N);
|
||||
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(C1_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, mystream));
|
||||
|
||||
std::thread *T = new std::thread[numThreads];
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
// Use different callback for every even thread
|
||||
// The callbacks will be added to same stream from different threads
|
||||
if ((i%2) == 0)
|
||||
T[i] = std::thread(Thread1_func);
|
||||
else
|
||||
T[i] = std::thread(Thread2_func);
|
||||
}
|
||||
|
||||
// Wait until all the threads finish their execution
|
||||
for (int i = 0; i < numThreads; i++) {
|
||||
T[i].join();
|
||||
}
|
||||
|
||||
HIP_CHECK(hipStreamSynchronize(mystream));
|
||||
HIP_CHECK(hipStreamDestroy(mystream));
|
||||
|
||||
HIP_CHECK(hipFree(A_d));
|
||||
HIP_CHECK(hipFree(C_d));
|
||||
|
||||
free(A1_h);
|
||||
free(C1_h);
|
||||
|
||||
// Cb_count should match total number of callbacks added from both threads
|
||||
// Data_mismatch will be updated if there is problem in data validation
|
||||
REQUIRE(Cb_count.load() == numThreads);
|
||||
REQUIRE(Data_mismatch.load() == 0);
|
||||
delete[] T;
|
||||
}
|
||||
@@ -0,0 +1,231 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
1) Validates parameter list of hipStreamAddCallback.
|
||||
2) Validates hipStreamAddCallback functionality with default stream.
|
||||
3) Validates hipStreamAddCallback functionality with defined stream.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <unistd.h>
|
||||
|
||||
#define UNUSED(expr) do { (void)(expr); } while (0)
|
||||
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
#define HIPRT_CB
|
||||
#endif
|
||||
|
||||
namespace hipStreaAddCallbackTest {
|
||||
size_t NSize = 4 * 1024 * 1024;
|
||||
float *A_h, *C_h;
|
||||
bool gcbDone = false;
|
||||
bool gPassed = true;
|
||||
void *ptr0xff = reinterpret_cast<void *>(0xffffffff);
|
||||
void *gusrptr;
|
||||
hipStream_t gstream;
|
||||
|
||||
void HIPRT_CB Callback(hipStream_t stream, hipError_t status,
|
||||
void* userData) {
|
||||
UNUSED(stream);
|
||||
HIP_CHECK(status);
|
||||
REQUIRE(userData == NULL);
|
||||
gPassed = true;
|
||||
for (size_t i = 0; i < NSize; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
UNSCOPED_INFO("Data mismatch :" << i);
|
||||
gPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
gcbDone = true;
|
||||
}
|
||||
/**
|
||||
* Validates functionality of hipStreamAddCallback with default/created stream.
|
||||
*/
|
||||
bool testStreamCallbackFunctionality(bool isDefault) {
|
||||
float *A_d, *C_d;
|
||||
size_t Nbytes = NSize * sizeof(float);
|
||||
gcbDone = false;
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < NSize; i++) {
|
||||
A_h[i] = 1.618f + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
if (isDefault) {
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
|
||||
0));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, 0, A_d, C_d, NSize);
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
|
||||
0));
|
||||
HIP_CHECK(hipStreamAddCallback(0, Callback, nullptr, 0));
|
||||
while (!gcbDone) usleep(100000); // Sleep for 100 ms
|
||||
} else {
|
||||
hipStream_t mystream;
|
||||
HIP_CHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking));
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
|
||||
mystream));
|
||||
|
||||
const unsigned blocks = 512;
|
||||
const unsigned threadsPerBlock = 256;
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, mystream, A_d, C_d, NSize);
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
|
||||
mystream));
|
||||
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
|
||||
while (!gcbDone) usleep(100000); // Sleep for 100 ms
|
||||
HIP_CHECK(hipStreamDestroy(mystream));
|
||||
}
|
||||
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d)));
|
||||
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d)));
|
||||
free(C_h);
|
||||
free(A_h);
|
||||
return gPassed;
|
||||
}
|
||||
/**
|
||||
* Scenario1: Validates if callback = nullptr returns error code for created stream.
|
||||
* Scenario2: Validates if callback = nullptr returns error code for default stream.
|
||||
* Scenario3: Validates if flag != 0 returns error code for created stream.
|
||||
* Scenario4: Validates if flag != 0 returns error code for default stream.
|
||||
* Scenario5: Validates if userData pointer is passed properly to callback.
|
||||
* Scenario6: Validates if stream value is passed properly to callback.
|
||||
*/
|
||||
void Callback_ChkUsrdataPtr(hipStream_t stream, hipError_t status,
|
||||
void* userData) {
|
||||
REQUIRE(stream == gstream);
|
||||
HIP_CHECK(status);
|
||||
gPassed = true;
|
||||
if (gusrptr != userData) {
|
||||
gPassed = false;
|
||||
}
|
||||
gcbDone = true;
|
||||
}
|
||||
|
||||
void Callback_ChkStreamValue(hipStream_t stream, hipError_t status,
|
||||
void* userData) {
|
||||
REQUIRE(userData == nullptr);
|
||||
HIP_CHECK(status);
|
||||
gPassed = true;
|
||||
if (stream != gstream) {
|
||||
gPassed = false;
|
||||
}
|
||||
gcbDone = true;
|
||||
}
|
||||
} // namespace hipStreaAddCallbackTest
|
||||
|
||||
|
||||
using hipStreaAddCallbackTest::gcbDone;
|
||||
using hipStreaAddCallbackTest::gPassed;
|
||||
using hipStreaAddCallbackTest::ptr0xff;
|
||||
using hipStreaAddCallbackTest::gusrptr;
|
||||
using hipStreaAddCallbackTest::gstream;
|
||||
using hipStreaAddCallbackTest::testStreamCallbackFunctionality;
|
||||
using hipStreaAddCallbackTest::Callback;
|
||||
using hipStreaAddCallbackTest::Callback_ChkUsrdataPtr;
|
||||
using hipStreaAddCallbackTest::Callback_ChkStreamValue;
|
||||
|
||||
|
||||
/*
|
||||
* Validates parameter list of hipStreamAddCallback.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamAddCallback_ParamTst") {
|
||||
hipStream_t mystream;
|
||||
HIP_CHECK(hipStreamCreate(&mystream));
|
||||
// Scenario1
|
||||
SECTION("callback is nullptr") {
|
||||
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(mystream, nullptr,
|
||||
nullptr, 0));
|
||||
}
|
||||
// Scenario2
|
||||
SECTION("stream is default") {
|
||||
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(0, nullptr,
|
||||
nullptr, 0));
|
||||
}
|
||||
// Scenario3
|
||||
SECTION("flag is nonzero for non-default stream") {
|
||||
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(mystream, Callback,
|
||||
nullptr, 10));
|
||||
}
|
||||
// Scenario4
|
||||
SECTION("flag is nonzero for default stream") {
|
||||
REQUIRE_FALSE(hipSuccess == hipStreamAddCallback(0, Callback,
|
||||
nullptr, 10));
|
||||
}
|
||||
// Scenario5
|
||||
SECTION("userData pointer value validation") {
|
||||
gstream = mystream;
|
||||
gusrptr = ptr0xff;
|
||||
gPassed = true;
|
||||
gcbDone = false;
|
||||
HIP_CHECK(hipStreamAddCallback(mystream, Callback_ChkUsrdataPtr,
|
||||
gusrptr, 0));
|
||||
while (!gcbDone) {
|
||||
usleep(100000); // Sleep for 100 ms
|
||||
}
|
||||
REQUIRE_FALSE(!gPassed);
|
||||
}
|
||||
// Scenario6
|
||||
SECTION("stream value validation") {
|
||||
gstream = mystream;
|
||||
gPassed = true;
|
||||
gcbDone = false;
|
||||
HIP_CHECK(hipStreamAddCallback(mystream, Callback_ChkStreamValue,
|
||||
nullptr, 0));
|
||||
while (!gcbDone) {
|
||||
usleep(100000); // Sleep for 100 ms
|
||||
}
|
||||
REQUIRE_FALSE(!gPassed);
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(mystream));
|
||||
}
|
||||
|
||||
/*
|
||||
* Validates hipStreamAddCallback functionality with default stream.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamAddCallback_WithDefaultStream") {
|
||||
bool TestPassed = true;
|
||||
TestPassed = testStreamCallbackFunctionality(true);
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validates hipStreamAddCallback functionality with defined stream.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamAddCallback_WithCreatedStream") {
|
||||
bool TestPassed = true;
|
||||
TestPassed = testStreamCallbackFunctionality(false);
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
1) Validates functionality of hipStreamCreateWithFlags when stream = nullptr.
|
||||
2) Validates functionality of hipStreamCreateWithFlags when flag = 0xffffffff.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
|
||||
TEST_CASE("Unit_hipStreamCreateWithFlags_ArgValidation") {
|
||||
// stream = nullptr test
|
||||
SECTION("stream is nullptr") {
|
||||
REQUIRE(hipStreamCreateWithFlags(nullptr, hipStreamDefault) != hipSuccess);
|
||||
}
|
||||
// flag value invalid test
|
||||
SECTION("flag value invalid") {
|
||||
hipStream_t stream;
|
||||
REQUIRE(hipStreamCreateWithFlags(&stream, 0xffffffff) != hipSuccess);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,605 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
|
||||
1)Create streams with default flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform device synchronize and validate behavior.
|
||||
|
||||
2)Create streams with non-blocking flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform stream synchronize and validate behavior.
|
||||
|
||||
3)Create streams with default flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform stream synchronize and validate behavior.
|
||||
|
||||
4)Create streams with non-blocking flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform device synchronize and validate behavior.
|
||||
|
||||
5)Create a stream for each priority level with default flag, Launch memcpy and kernel
|
||||
tasks on these streams from multiple threads. Validate all the results.
|
||||
|
||||
6)Create a stream for each priority level with non-blocking flag, Launch memcpy and
|
||||
kernel tasks on these streams from multiple threads. Validate all the results.
|
||||
|
||||
7) Validate negative scenarios for hipStreamCreateWithPriority api.
|
||||
|
||||
8) Validate stream priorities with event after classifying them as low, medium, high.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
|
||||
#define MEMCPYSIZE 64*1024*1024
|
||||
#define MEMCPYSIZE2 1024*1024
|
||||
#define NUMITERS 2
|
||||
#define GRIDSIZE 1024
|
||||
#define BLOCKSIZE 256
|
||||
#define TOTALTHREADS 16
|
||||
|
||||
namespace hipStreamCreateWithPriorityTest {
|
||||
|
||||
std::atomic<int> g_thTestPassed(1);
|
||||
// helper rountine to initialize memory
|
||||
template <typename T>
|
||||
void mem_init(T* buf, size_t n) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
buf[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// kernel to copy n elements from src to dst
|
||||
template <typename T>
|
||||
__global__ void memcpy_kernel(T* dst, T* src, size_t n) {
|
||||
int num = gridDim.x * blockDim.x;
|
||||
int id = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
for (size_t i = id; i < n; i += num) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Create a stream for all available priority levels
|
||||
* and queue tasks in each of these streams and default stream.
|
||||
* Validate the calculated results.
|
||||
*/
|
||||
void funcTestsForAllPriorityLevelsWrtNullStrm(unsigned int flags,
|
||||
bool deviceSynchronize) {
|
||||
int priority;
|
||||
int priority_low{};
|
||||
int priority_high{};
|
||||
size_t size = MEMCPYSIZE2*sizeof(int);
|
||||
// Test is to get the Stream Priority Range
|
||||
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
|
||||
|
||||
// Check if priorities are indeed supported
|
||||
if (priority_low == priority_high) {
|
||||
WARN("Stream priority range not supported. Skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
int numOfPriorities = priority_low - priority_high;
|
||||
INFO("numOfPriorities = " << numOfPriorities);
|
||||
const int arr_size = numOfPriorities + 1;
|
||||
// 0 idx is for default stream
|
||||
hipStream_t *stream = reinterpret_cast<hipStream_t*>(
|
||||
malloc(arr_size*sizeof(hipStream_t)));
|
||||
REQUIRE(stream != nullptr);
|
||||
stream[0] = 0;
|
||||
int count = 1;
|
||||
// Create a stream for each of the priority levels
|
||||
for (priority = priority_high; priority < priority_low; priority++) {
|
||||
HIP_CHECK(hipStreamCreateWithPriority(&stream[count++],
|
||||
flags, priority));
|
||||
}
|
||||
// Allocate memory
|
||||
int **A_d = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
|
||||
int **C_d = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
|
||||
int **A_h = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
|
||||
int **C_h = reinterpret_cast<int**>(malloc(arr_size*sizeof(int*)));
|
||||
|
||||
REQUIRE(A_d != nullptr);
|
||||
REQUIRE(C_d != nullptr);
|
||||
REQUIRE(A_h != nullptr);
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
for (int idx = 0; idx < arr_size; idx++) {
|
||||
A_h[idx] = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(A_h[idx] != nullptr);
|
||||
C_h[idx] = reinterpret_cast<int*>(malloc(size));
|
||||
REQUIRE(C_h[idx] != nullptr);
|
||||
HIP_CHECK(hipMalloc(&A_d[idx], size));
|
||||
HIP_CHECK(hipMalloc(&C_d[idx], size));
|
||||
}
|
||||
|
||||
// Initialize host memory
|
||||
constexpr int initVal = 2;
|
||||
for (int idx = 0; idx < arr_size; idx++) {
|
||||
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
|
||||
A_h[idx][idy] = initVal;
|
||||
}
|
||||
}
|
||||
|
||||
// Launch task on each stream
|
||||
for (int idx = 0; idx < arr_size; idx++) {
|
||||
HIP_CHECK(hipMemcpyAsync(A_d[idx], A_h[idx], size,
|
||||
hipMemcpyHostToDevice, stream[idx]));
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
|
||||
dim3(BLOCKSIZE), 0, stream[idx], A_d[idx],
|
||||
C_d[idx], MEMCPYSIZE2);
|
||||
HIP_CHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
|
||||
hipMemcpyDeviceToHost, stream[idx]));
|
||||
}
|
||||
|
||||
if (deviceSynchronize) {
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
// Validate the output of each queue
|
||||
for (int idx = 0; idx < arr_size; idx++) {
|
||||
if (!deviceSynchronize) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream[idx]));
|
||||
}
|
||||
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
|
||||
if (C_h[idx][idy] != A_h[idx][idy] * A_h[idx][idy]) {
|
||||
INFO("Data mismatch at idx:" << idx << " idy:" << idy);
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deallocate memory
|
||||
for (int idx = 0; idx < arr_size; idx++) {
|
||||
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d[idx])));
|
||||
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d[idx])));
|
||||
free(C_h[idx]);
|
||||
free(A_h[idx]);
|
||||
}
|
||||
|
||||
// Destroy the stream for each of the priority levels
|
||||
count = 1;
|
||||
for (priority = priority_high; priority < priority_low; priority++) {
|
||||
HIP_CHECK(hipStreamDestroy(stream[count++]));
|
||||
}
|
||||
free(stream);
|
||||
free(A_d);
|
||||
free(C_d);
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Queue tasks in each of these streams and default stream.
|
||||
* Validate the calculated results.
|
||||
*/
|
||||
void queueTasksInStreams(std::vector<hipStream_t> *stream,
|
||||
const int arrsize) {
|
||||
size_t size = MEMCPYSIZE2*sizeof(int);
|
||||
// Allocate memory
|
||||
int **A_d = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
|
||||
int **C_d = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
|
||||
int **A_h = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
|
||||
int **C_h = reinterpret_cast<int**>(malloc(arrsize*sizeof(int *)));
|
||||
|
||||
HIPASSERT(A_d != nullptr);
|
||||
HIPASSERT(C_d != nullptr);
|
||||
HIPASSERT(A_h != nullptr);
|
||||
HIPASSERT(C_h != nullptr);
|
||||
|
||||
for (int idx = 0; idx < arrsize; idx++) {
|
||||
A_h[idx] = reinterpret_cast<int*>(malloc(size));
|
||||
HIPASSERT(A_h[idx] != nullptr);
|
||||
C_h[idx] = reinterpret_cast<int*>(malloc(size));
|
||||
HIPASSERT(C_h[idx] != nullptr);
|
||||
HIPCHECK(hipMalloc(&A_d[idx], size));
|
||||
HIPCHECK(hipMalloc(&C_d[idx], size));
|
||||
}
|
||||
// Initialize host memory
|
||||
constexpr int initVal = 2;
|
||||
for (int idx = 0; idx < arrsize; idx++) {
|
||||
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
|
||||
A_h[idx][idy] = initVal;
|
||||
}
|
||||
}
|
||||
// Launch task on each stream
|
||||
for (int idx = 0; idx < arrsize; idx++) {
|
||||
HIPCHECK(hipMemcpyAsync(A_d[idx], A_h[idx], size,
|
||||
hipMemcpyHostToDevice, (*stream)[idx]));
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
|
||||
dim3(BLOCKSIZE), 0, (*stream)[idx], A_d[idx],
|
||||
C_d[idx], MEMCPYSIZE2);
|
||||
HIPCHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
|
||||
hipMemcpyDeviceToHost, (*stream)[idx]));
|
||||
}
|
||||
|
||||
bool isPassed = true;
|
||||
// Validate the output of each queue
|
||||
for (int idx = 0; idx < arrsize; idx++) {
|
||||
HIPCHECK(hipStreamSynchronize((*stream)[idx]));
|
||||
for (int idy = 0; idy < MEMCPYSIZE2; idy++) {
|
||||
if (C_h[idx][idy] != A_h[idx][idy] * A_h[idx][idy]) {
|
||||
UNSCOPED_INFO("Data mismatch at idx:" << idx << " idy:" << idy);
|
||||
isPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (false == isPassed) break;
|
||||
}
|
||||
// Deallocate memory
|
||||
for (int idx = 0; idx < arrsize; idx++) {
|
||||
HIPCHECK(hipFree(reinterpret_cast<void*>(C_d[idx])));
|
||||
HIPCHECK(hipFree(reinterpret_cast<void*>(A_d[idx])));
|
||||
free(C_h[idx]);
|
||||
free(A_h[idx]);
|
||||
}
|
||||
free(A_d);
|
||||
free(C_d);
|
||||
free(A_h);
|
||||
free(C_h);
|
||||
g_thTestPassed &= static_cast<int>(isPassed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario:
|
||||
* Common streams used across multiple threads:Create a stream for each
|
||||
* priority level (flag = hipStreamDefault/hipStreamNonBlocking)
|
||||
* and 1 default stream.
|
||||
* Launch memcpy and kernel tasks on these streams from multiple threads
|
||||
* (use 16 threads). Validate all the results.
|
||||
*/
|
||||
bool runFuncTestsForAllPriorityLevelsMultThread(unsigned int flags) {
|
||||
bool TestPassed = true;
|
||||
std::thread T[TOTALTHREADS];
|
||||
int priority;
|
||||
int priority_low;
|
||||
int priority_high;
|
||||
// Test is to get the Stream Priority Range
|
||||
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
|
||||
|
||||
// Check if priorities are indeed supported
|
||||
if (priority_low == priority_high) {
|
||||
WARN("Stream priority range not supported. Skipping test.");
|
||||
return true;
|
||||
}
|
||||
|
||||
int numOfPriorities = priority_low - priority_high;
|
||||
INFO("numOfPriorities : " << numOfPriorities);
|
||||
|
||||
// 0 idx is for default stream
|
||||
std::vector<hipStream_t> stream(numOfPriorities + 1);
|
||||
stream[0] = 0;
|
||||
|
||||
// Create a stream for each of the priority levels
|
||||
int count = 1;
|
||||
for (priority = priority_high; priority < priority_low; priority++) {
|
||||
HIP_CHECK(hipStreamCreateWithPriority(&stream[count++], flags,
|
||||
priority));
|
||||
}
|
||||
|
||||
for (int i = 0; i < TOTALTHREADS; i++) {
|
||||
T[i] = std::thread(queueTasksInStreams,
|
||||
&stream, numOfPriorities + 1);
|
||||
}
|
||||
|
||||
for (int i=0; i < TOTALTHREADS; i++) {
|
||||
T[i].join();
|
||||
}
|
||||
if (g_thTestPassed) {
|
||||
TestPassed = true;
|
||||
} else {
|
||||
TestPassed = false;
|
||||
}
|
||||
|
||||
// Destroy the stream for each of the priority levels
|
||||
count = 1;
|
||||
for (priority = priority_high; priority < priority_low; priority++) {
|
||||
HIP_CHECK(hipStreamDestroy(stream[count++]));
|
||||
}
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
bool validateStreamPrioritiesWithEvents() {
|
||||
size_t size = NUMITERS*MEMCPYSIZE;
|
||||
|
||||
// get the range of priorities available
|
||||
#define OP(x) \
|
||||
int priority_##x; \
|
||||
bool enable_priority_##x = false;
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
HIP_CHECK(hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
|
||||
|
||||
INFO("HIP stream priority range - low: " << priority_low << ",high: "
|
||||
<< priority_high << ",normal: "
|
||||
<< (priority_low + priority_high)/2);
|
||||
// Check if priorities are indeed supported
|
||||
if (priority_low == priority_high) {
|
||||
WARN("Stream priority range not supported. Skipping test.");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Enable/disable priorities based on number of available priority levels
|
||||
enable_priority_low = true;
|
||||
enable_priority_high = true;
|
||||
if ((priority_low - priority_high) > 1) {
|
||||
enable_priority_normal = true;
|
||||
}
|
||||
if (enable_priority_normal) {
|
||||
priority_normal = ((priority_low + priority_high) / 2);
|
||||
}
|
||||
// create streams with highest and lowest available priorities
|
||||
#define OP(x)\
|
||||
hipStream_t stream_##x;\
|
||||
if (enable_priority_##x) {\
|
||||
HIP_CHECK(hipStreamCreateWithPriority(&stream_##x, \
|
||||
hipStreamDefault, priority_##x));\
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// allocate and initialise host source and destination buffers
|
||||
#define OP(x) \
|
||||
T* src_h_##x; \
|
||||
T* dst_h_##x; \
|
||||
if (enable_priority_##x) { \
|
||||
src_h_##x = reinterpret_cast<T*>(malloc(size)); \
|
||||
REQUIRE(src_h_##x != nullptr); \
|
||||
mem_init<T>(src_h_##x, (size / sizeof(T))); \
|
||||
dst_h_##x = reinterpret_cast<T*>(malloc(size)); \
|
||||
REQUIRE(dst_h_##x != nullptr); \
|
||||
memset(dst_h_##x, 0, size); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// allocate and initialize device source and destination buffers
|
||||
#define OP(x) \
|
||||
T* src_d_##x; \
|
||||
T* dst_d_##x; \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(hipMalloc(&src_d_##x, size)); \
|
||||
HIP_CHECK( \
|
||||
hipMemcpy(src_d_##x, src_h_##x, size, hipMemcpyHostToDevice)); \
|
||||
HIP_CHECK(hipMalloc(&dst_d_##x, size)); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// create events for measuring time spent in kernel execution
|
||||
#define OP(x) \
|
||||
hipEvent_t event_start_##x; \
|
||||
hipEvent_t event_end_##x; \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(hipEventCreate(&event_start_##x)); \
|
||||
HIP_CHECK(hipEventCreate(&event_end_##x)); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// record start events for each of the priority streams
|
||||
#define OP(x) \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(\
|
||||
hipEventRecord(event_start_##x, stream_##x)); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// launch kernels repeatedly on each of the prioritiy streams
|
||||
for (int i = 0; i < static_cast<int>(size); i += MEMCPYSIZE) {
|
||||
int j = i / sizeof(T);
|
||||
#define OP(x) \
|
||||
if (enable_priority_##x) { \
|
||||
hipLaunchKernelGGL((memcpy_kernel<T>), dim3(GRIDSIZE), \
|
||||
dim3(BLOCKSIZE), 0, stream_##x, dst_d_##x + j, src_d_##x + j, \
|
||||
(MEMCPYSIZE / sizeof(T))); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
}
|
||||
|
||||
// record end events for each of the priority streams
|
||||
#define OP(x) \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(hipEventRecord(event_end_##x, stream_##x)); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// synchronize events for each of the priority streams
|
||||
#define OP(x) \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(hipEventSynchronize(event_end_##x)); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// compute time spent for memcpy in each stream
|
||||
#define OP(x) \
|
||||
float time_spent_##x; \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(hipEventElapsedTime(&time_spent_##x, \
|
||||
event_start_##x, event_end_##x)); \
|
||||
INFO("time spent for memcpy in " << #x << \
|
||||
" priority stream: " << time_spent_##x << " ms"); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// sanity check
|
||||
#define OP(x) \
|
||||
if (enable_priority_##x) { \
|
||||
HIP_CHECK(hipMemcpy(dst_h_##x, dst_d_##x, size, \
|
||||
hipMemcpyDeviceToHost)); \
|
||||
if (memcmp(dst_h_##x, src_h_##x, size) != 0) { \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
OP(high)
|
||||
#undef OP
|
||||
|
||||
// validate that stream priorities are working as expected
|
||||
#define OP(x, y) \
|
||||
if (enable_priority_##x && enable_priority_##y) { \
|
||||
if ((1.05f * time_spent_##x) < time_spent_##y) { \
|
||||
INFO("time_spent_##x : " << time_spent_##x << \
|
||||
"time_spent_##y : " << time_spent_##y); \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
OP(low, normal)
|
||||
OP(normal, high)
|
||||
OP(low, high)
|
||||
#undef OP
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace hipStreamCreateWithPriorityTest
|
||||
|
||||
|
||||
|
||||
/**
|
||||
Tests following scenarios.
|
||||
1)Create streams with default flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform device synchronize and validate behavior.
|
||||
2)Create streams with non-blocking flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform stream synchronize and validate behavior.
|
||||
3)Create streams with default flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform stream synchronize and validate behavior.
|
||||
4)Create streams with non-blocking flag for all available priority levels and
|
||||
queue tasks in each of these streams, perform device synchronize and validate behavior.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamCreateWithPriority_FunctionalForAllPriorities") {
|
||||
SECTION("Default flag and device synchronize") {
|
||||
hipStreamCreateWithPriorityTest::
|
||||
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamDefault, true);
|
||||
}
|
||||
|
||||
SECTION("Stream non-blocking flag and stream synchronize") {
|
||||
hipStreamCreateWithPriorityTest::
|
||||
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamNonBlocking, false);
|
||||
}
|
||||
|
||||
SECTION("Default flag and stream synchronize") {
|
||||
hipStreamCreateWithPriorityTest::
|
||||
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamDefault, false);
|
||||
}
|
||||
|
||||
SECTION("Stream non-blocking flag and device synchronize") {
|
||||
hipStreamCreateWithPriorityTest::
|
||||
funcTestsForAllPriorityLevelsWrtNullStrm(hipStreamNonBlocking, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a stream for each priority level with default flag, Launch memcpy and kernel
|
||||
* tasks on these streams from multiple threads. Validate all the results.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamCreateWithPriority_MulthreadDefaultflag") {
|
||||
bool TestPassed = true;
|
||||
TestPassed = hipStreamCreateWithPriorityTest::
|
||||
runFuncTestsForAllPriorityLevelsMultThread(hipStreamDefault);
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a stream for each priority level with non-blocking flag, Launch memcpy and
|
||||
* kernel tasks on these streams from multiple threads. Validate all the results.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamCreateWithPriority_MulthreadNonblockingflag") {
|
||||
bool TestPassed = true;
|
||||
TestPassed = hipStreamCreateWithPriorityTest::
|
||||
runFuncTestsForAllPriorityLevelsMultThread(hipStreamNonBlocking);
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Scenario1: Validates functionality of hipStreamCreateWithPriority when
|
||||
stream = nullptr.
|
||||
* Scenario2: Validates functionality of hipStreamCreateWithPriority when
|
||||
flag = 0xffffffff.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamCreateWithPriority_NegTst") {
|
||||
hipStream_t stream;
|
||||
int priority_low;
|
||||
int priority_high;
|
||||
hipError_t ret;
|
||||
|
||||
// Test is to get the Stream Priority Range
|
||||
HIP_CHECK(
|
||||
hipDeviceGetStreamPriorityRange(&priority_low, &priority_high));
|
||||
// Check if priorities are indeed supported
|
||||
if (priority_low == priority_high) {
|
||||
WARN("Stream priority range not supported. Skipping test.");
|
||||
return; // exit the test since priorities are not supported
|
||||
}
|
||||
|
||||
SECTION("stream = nullptr test") {
|
||||
ret = hipStreamCreateWithPriority(nullptr, hipStreamDefault,
|
||||
priority_low);
|
||||
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
|
||||
SECTION("flag value invalid test") {
|
||||
ret = hipStreamCreateWithPriority(&stream, 0xffffffff,
|
||||
priority_low);
|
||||
|
||||
REQUIRE(ret != hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate stream priorities with event after classifying them as low, medium and high.
|
||||
*/
|
||||
TEST_CASE("Unit_hipStreamCreateWithPriority_ValidateWithEvents") {
|
||||
bool TestPassed = true;
|
||||
TestPassed = hipStreamCreateWithPriorityTest::
|
||||
validateStreamPrioritiesWithEvents<int>();
|
||||
REQUIRE(TestPassed);
|
||||
}
|
||||
@@ -0,0 +1,350 @@
|
||||
/*
|
||||
Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
Testcase Scenarios :
|
||||
|
||||
1)Validates functionality of hipStreamAddCallback with created stream.
|
||||
|
||||
2)Validates functionality of stream with cu mask.
|
||||
|
||||
3)Create a stream with all CU masks disabled (0x00000000).
|
||||
Verify that default CU mask is set for the stream.
|
||||
|
||||
4)Size is greater than physical CU number. In this case the extra elements
|
||||
are ignored and hipExtStreamCreateWithCUMask must return hipSuccess.
|
||||
|
||||
5)Negative Testing of hipExtStreamCreateWithCUMask.
|
||||
*/
|
||||
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#define NUM_CU_PARTITIONS 4
|
||||
#define CONSTANT 1.618f
|
||||
#define SIZE_INBYTES_OF_MB (1024*1024)
|
||||
#define GRIDSIZE 512
|
||||
#define BLOCKSIZE 256
|
||||
#define ZERO_MASK 0x00000000
|
||||
|
||||
|
||||
namespace hipExtStreamCreateWithCUMaskTest {
|
||||
|
||||
float *A_h, *C_h;
|
||||
bool cbDone = false;
|
||||
bool isPassed = true;
|
||||
size_t N = 4 * SIZE_INBYTES_OF_MB;
|
||||
|
||||
|
||||
// Make a default CU mask bit-array where all CUs are active
|
||||
// this default mask is expected to be returned when there is no
|
||||
// custom or global CU mask defined.
|
||||
void createDefaultCUMask(std::vector<uint32_t> *pdefaultCUMask,
|
||||
int numOfCUs) {
|
||||
uint32_t temp = 0;
|
||||
uint32_t bit_index = 0;
|
||||
for (int i = 0; i < numOfCUs; i++) {
|
||||
temp |= 1UL << bit_index;
|
||||
if (bit_index >= 32) {
|
||||
(*pdefaultCUMask).push_back(temp);
|
||||
temp = 0;
|
||||
bit_index = 0;
|
||||
temp |= 1UL << bit_index;
|
||||
}
|
||||
bit_index += 1;
|
||||
}
|
||||
if (bit_index != 0) {
|
||||
(*pdefaultCUMask).push_back(temp);
|
||||
}
|
||||
}
|
||||
// Create masks of disabled CU masks.
|
||||
void createDisabledCUMask(std::vector<uint32_t> *pdisabledCUMask,
|
||||
int numOfCUs) {
|
||||
uint32_t temp = ZERO_MASK;
|
||||
uint32_t bit_index = 0;
|
||||
for (int i = 0; i < numOfCUs; i++) {
|
||||
if (bit_index >= 32) {
|
||||
(*pdisabledCUMask).push_back(temp);
|
||||
temp = ZERO_MASK;
|
||||
bit_index = 0;
|
||||
}
|
||||
bit_index += 1;
|
||||
}
|
||||
if (bit_index != 0) {
|
||||
(*pdisabledCUMask).push_back(temp);
|
||||
}
|
||||
}
|
||||
|
||||
void Callback(hipStream_t stream, hipError_t status,
|
||||
void* userData) {
|
||||
isPassed = true;
|
||||
stream = 0;
|
||||
HIP_CHECK(status);
|
||||
REQUIRE(userData == nullptr);
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (C_h[i] != A_h[i] * A_h[i]) {
|
||||
UNSCOPED_INFO("Data mismatch at index: " << i);
|
||||
isPassed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
cbDone = true;
|
||||
}
|
||||
} // namespace hipExtStreamCreateWithCUMaskTest
|
||||
|
||||
using hipExtStreamCreateWithCUMaskTest::A_h;
|
||||
using hipExtStreamCreateWithCUMaskTest::C_h;
|
||||
using hipExtStreamCreateWithCUMaskTest::cbDone;
|
||||
using hipExtStreamCreateWithCUMaskTest::isPassed;
|
||||
using hipExtStreamCreateWithCUMaskTest::N;
|
||||
using hipExtStreamCreateWithCUMaskTest::createDefaultCUMask;
|
||||
using hipExtStreamCreateWithCUMaskTest::createDisabledCUMask;
|
||||
using hipExtStreamCreateWithCUMaskTest::Callback;
|
||||
|
||||
|
||||
/**
|
||||
* Scenario: Validates functionality of hipStreamAddCallback with created stream.
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_ValidateCallbackFunc") {
|
||||
float *A_d, *C_d;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
cbDone = false;
|
||||
A_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(A_h != nullptr);
|
||||
C_h = reinterpret_cast<float*>(malloc(Nbytes));
|
||||
REQUIRE(C_h != nullptr);
|
||||
|
||||
// Fill with Phi + i
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
A_h[i] = CONSTANT + i;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc(&A_d, Nbytes));
|
||||
HIP_CHECK(hipMalloc(&C_d, Nbytes));
|
||||
|
||||
hipStream_t mystream;
|
||||
std::vector<uint32_t> defaultCUMask;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
createDefaultCUMask(&defaultCUMask, props.multiProcessorCount);
|
||||
|
||||
hipExtStreamCreateWithCUMask(&mystream, defaultCUMask.size(),
|
||||
defaultCUMask.data());
|
||||
HIP_CHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice,
|
||||
mystream));
|
||||
const unsigned blocks = GRIDSIZE;
|
||||
const unsigned threadsPerBlock = BLOCKSIZE;
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, mystream, A_d, C_d, N);
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
|
||||
mystream));
|
||||
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
|
||||
while (!cbDone) usleep(100000); // Sleep for 100 ms
|
||||
HIP_CHECK(hipStreamDestroy(mystream));
|
||||
HIP_CHECK(hipFree(reinterpret_cast<void*>(C_d)));
|
||||
HIP_CHECK(hipFree(reinterpret_cast<void*>(A_d)));
|
||||
free(C_h);
|
||||
free(A_h);
|
||||
REQUIRE(isPassed == true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Validates functionality of stream with cu mask.
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") {
|
||||
const int KNumPartition = NUM_CU_PARTITIONS;
|
||||
float *dA[KNumPartition], *dC[KNumPartition];
|
||||
float *hA, *hC;
|
||||
size_t N = 25*SIZE_INBYTES_OF_MB;
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
std::vector<hipStream_t> streams(KNumPartition);
|
||||
std::vector<std::vector<uint32_t>> cuMasks(KNumPartition);
|
||||
std::stringstream ss[KNumPartition];
|
||||
|
||||
int nGpu = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&nGpu));
|
||||
if (nGpu < 1) {
|
||||
WARN("Didn't find any GPU! skipping the test!");
|
||||
return;
|
||||
}
|
||||
|
||||
static int device = 0;
|
||||
HIP_CHECK(hipSetDevice(device));
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, device));
|
||||
INFO("info: running on bus " << "0x" << props.pciBusID << " "
|
||||
<< props.name << " with " << props.multiProcessorCount << " CUs");
|
||||
|
||||
hA = new float[Nbytes];
|
||||
REQUIRE(hA != nullptr);
|
||||
hC = new float[Nbytes];
|
||||
REQUIRE(hC != nullptr);
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
hA[i] = CONSTANT + i;
|
||||
}
|
||||
|
||||
for (int np = 0; np < KNumPartition; np++) {
|
||||
HIP_CHECK(hipMalloc(&dA[np], Nbytes));
|
||||
HIP_CHECK(hipMalloc(&dC[np], Nbytes));
|
||||
// make unique CU masks in the multiple of dwords for each stream
|
||||
uint32_t temp = 0;
|
||||
uint32_t bit_index = np;
|
||||
for (int i = np; i < props.multiProcessorCount; i = i + 4) {
|
||||
temp |= 1UL << bit_index;
|
||||
if (bit_index >= 32) {
|
||||
cuMasks[np].push_back(temp);
|
||||
temp = 0;
|
||||
bit_index = np;
|
||||
temp |= 1UL << bit_index;
|
||||
}
|
||||
bit_index += 4;
|
||||
}
|
||||
if (bit_index != 0) {
|
||||
cuMasks[np].push_back(temp);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipExtStreamCreateWithCUMask(&streams[np], cuMasks[np].size(),
|
||||
cuMasks[np].data()));
|
||||
|
||||
HIP_CHECK(hipMemcpy(dA[np], hA, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
ss[np] << std::hex;
|
||||
for (int i = cuMasks[np].size() - 1; i >= 0; i--) {
|
||||
ss[np] << cuMasks[np][i];
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned blocks = GRIDSIZE;
|
||||
const unsigned threadsPerBlock = BLOCKSIZE;
|
||||
|
||||
auto single_start = std::chrono::steady_clock::now();
|
||||
INFO("info: launch 'vector_square' kernel on one stream " <<
|
||||
streams[0] << " with CU mask: 0x" << ss[0].str().c_str());
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, streams[0], dA[0], dC[0], N);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
auto single_end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> single_kernel_time = single_end - single_start;
|
||||
|
||||
HIP_CHECK(hipMemcpy(hC, dC[0], Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
REQUIRE(hC[i] == (hA[i] * hA[i]));
|
||||
}
|
||||
|
||||
INFO("info: launch 'vector_square' kernel on "
|
||||
<< KNumPartition << " streams:");
|
||||
auto all_start = std::chrono::steady_clock::now();
|
||||
for (int np = 0; np < KNumPartition; np++) {
|
||||
INFO("info: launch 'vector_square' kernel on the stream "
|
||||
<< streams[np] << " with CU mask: 0x" << ss[np].str().c_str());
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, streams[np], dA[np], dC[np], N);
|
||||
}
|
||||
hipDeviceSynchronize();
|
||||
|
||||
auto all_end = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double> all_kernel_time = all_end - all_start;
|
||||
|
||||
for (int np = 0; np < KNumPartition; np++) {
|
||||
HIP_CHECK(hipMemcpy(hC, dC[np], Nbytes, hipMemcpyDeviceToHost));
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
REQUIRE(hC[i] == (hA[i] * hA[i]));
|
||||
}
|
||||
}
|
||||
|
||||
INFO("info: kernel launched on one stream took: " <<
|
||||
single_kernel_time.count() << " seconds");
|
||||
INFO("info: kernels launched on " << KNumPartition <<
|
||||
" streams took: " << all_kernel_time.count() << " seconds");
|
||||
INFO("info: launching kernels on " << KNumPartition <<
|
||||
" streams asynchronously is " <<
|
||||
single_kernel_time.count() / (all_kernel_time.count() / KNumPartition)
|
||||
<< " times faster per stream than launching on one stream alone");
|
||||
|
||||
delete [] hA;
|
||||
delete [] hC;
|
||||
for (int np = 0; np < KNumPartition; np++) {
|
||||
hipFree(dC[np]);
|
||||
hipFree(dA[np]);
|
||||
HIP_CHECK(hipStreamDestroy(streams[np]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Create a stream with all CU masks disabled (0x00000000).
|
||||
* Verify that default CU mask is set for the stream.
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_AllCUsMasked") {
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipDeviceProp_t props;
|
||||
HIP_CHECK(hipGetDeviceProperties(&props, 0));
|
||||
// make a CU mask with all CUs disabled.
|
||||
std::vector<uint32_t> allCUDisabled;
|
||||
createDisabledCUMask(&allCUDisabled, props.multiProcessorCount);
|
||||
hipStream_t stream;
|
||||
HIP_CHECK(hipExtStreamCreateWithCUMask(&stream, allCUDisabled.size(),
|
||||
allCUDisabled.data()));
|
||||
// Verify whether default CU mask is set for the stream.
|
||||
uint32_t size = (props.multiProcessorCount / 32) + 1;
|
||||
std::vector<uint32_t> cuMask(size);
|
||||
std::vector<uint32_t> defaultCUMask;
|
||||
createDefaultCUMask(&defaultCUMask, props.multiProcessorCount);
|
||||
HIP_CHECK(hipExtStreamGetCUMask(stream, cuMask.size(), &cuMask[0]));
|
||||
for (int i = 0; i < static_cast<int>(defaultCUMask.size()); i++) {
|
||||
REQUIRE(defaultCUMask[i] == cuMask[i]);
|
||||
}
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Scenario: Negative Testing of hipExtStreamCreateWithCUMask.
|
||||
*/
|
||||
TEST_CASE("Unit_hipExtStreamCreateWithCUMask_NegTst") {
|
||||
std::vector<uint32_t> defaultCUMask;
|
||||
REQUIRE(hipSuccess == hipSetDevice(0));
|
||||
hipDeviceProp_t props;
|
||||
REQUIRE(hipSuccess == hipGetDeviceProperties(&props, 0));
|
||||
createDefaultCUMask(&defaultCUMask, props.multiProcessorCount);
|
||||
hipStream_t stream;
|
||||
// Negative Scenario 1: stream = nullptr
|
||||
SECTION("stream is nullptr") {
|
||||
REQUIRE_FALSE(hipExtStreamCreateWithCUMask(nullptr,
|
||||
defaultCUMask.size(),
|
||||
defaultCUMask.data()) == hipSuccess);
|
||||
}
|
||||
// Negative Scenario 2: cuMaskSize = 0
|
||||
SECTION("cuMaskSize is 0") {
|
||||
REQUIRE_FALSE(hipExtStreamCreateWithCUMask(&stream, 0,
|
||||
defaultCUMask.data()) == hipSuccess);
|
||||
}
|
||||
// Negative Scenario 3: cuMask = nullptr
|
||||
SECTION("cuMask is nullptr") {
|
||||
REQUIRE_FALSE(hipExtStreamCreateWithCUMask(&stream,
|
||||
defaultCUMask.size(),
|
||||
nullptr) == hipSuccess);
|
||||
}
|
||||
}
|
||||
Ссылка в новой задаче
Block a user