SWDEV-388834 - [catch2][dtest] Module tests migrated from direct to catch2
Change-Id: I9a3fbdd4e52bb69ab428b7cfcd478fa0382e7cc9
Этот коммит содержится в:
коммит произвёл
Srinivasarao Gollamandala
родитель
2d2e90a405
Коммит
304b0ac90b
@@ -1,22 +1,19 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -42,24 +39,23 @@ THE SOFTWARE.
|
||||
* Launches kernel with parameters and shared memory on stream with arguments
|
||||
* passed to kernel params or extra arguments.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include <hip_test_defgroups.hh>
|
||||
#include <math.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "hip/hip_ext.h"
|
||||
#include <regex> // NOLINT
|
||||
#include <string>
|
||||
#include "hip_module_launch_kernel_common.hh" // NOLINT
|
||||
#include "hip/hip_ext.h"
|
||||
|
||||
#include "hip_module_launch_kernel_common.hh"
|
||||
|
||||
constexpr auto fileName = "copyKernel.code";
|
||||
constexpr auto kernel_name = "copy_ker";
|
||||
static constexpr auto totalWorkGroups{1024};
|
||||
static constexpr auto localWorkSize{512};
|
||||
static constexpr auto lastWorkSizeEven{256};
|
||||
static constexpr auto lastWorkSizeOdd{257};
|
||||
|
||||
#define fileName "copyKernel.code"
|
||||
#define kernel_name "copy_ker"
|
||||
|
||||
/**
|
||||
Local Function to search a string in file.
|
||||
*/
|
||||
@@ -156,12 +152,14 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_NonUniformWorkGroup") {
|
||||
args.buffersize = arraylength;
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
// Memcpy from A to Ad
|
||||
HIP_CHECK(hipMemcpy(Ad, A, sizeBytes, hipMemcpyDefault));
|
||||
REQUIRE(hipErrorInvalidValue ==
|
||||
hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL,
|
||||
hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize,
|
||||
1, 1, 0, 0, NULL,
|
||||
reinterpret_cast<void**>(&config), 0));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
@@ -216,11 +214,13 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_UniformWorkGroup") {
|
||||
args.buffersize = arraylength;
|
||||
size_t size = sizeof(args);
|
||||
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
// Memcpy from A to Ad
|
||||
HIP_CHECK(hipMemcpy(Ad, A, sizeBytes, hipMemcpyDefault));
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize, 1, 1, 0, 0, NULL,
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(Function, arraylength, 1, 1, localWorkSize,
|
||||
1, 1, 0, 0, NULL,
|
||||
reinterpret_cast<void**>(&config), 0));
|
||||
// Memcpy results back to host
|
||||
HIP_CHECK(hipMemcpy(B, Bd, sizeBytes, hipMemcpyDefault));
|
||||
@@ -243,7 +243,8 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") {
|
||||
hipEvent_t start_event = nullptr;
|
||||
HIP_CHECK(hipEventCreate(&start_event));
|
||||
const auto kernel = GetKernel(mg.module(), "NOPKernel");
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr,
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr,
|
||||
nullptr, nullptr,
|
||||
start_event, nullptr));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipEventQuery(start_event));
|
||||
@@ -253,7 +254,8 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") {
|
||||
hipEvent_t stop_event = nullptr;
|
||||
HIP_CHECK(hipEventCreate(&stop_event));
|
||||
const auto kernel = GetKernel(mg.module(), "NOPKernel");
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr, nullptr,
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(kernel, 1, 1, 1, 1, 1, 1, 0, nullptr,
|
||||
nullptr, nullptr,
|
||||
nullptr, stop_event));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipEventQuery(stop_event));
|
||||
@@ -263,7 +265,569 @@ TEST_CASE("Unit_hipExtModuleLaunchKernel_Positive_Parameters") {
|
||||
TEST_CASE("Unit_hipExtModuleLaunchKernel_Negative_Parameters") {
|
||||
ModuleLaunchKernelNegativeParameters<hipExtModuleLaunchKernel>();
|
||||
}
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Test case to verify Negative tests of hipExtModuleLaunchKernel API.
|
||||
* - Test case to verify kernel execution time of the particular kernel by using hipExtModuleLaunchKernel.
|
||||
* - Test case to verify hipExtModuleLaunchKernel API by disabling time flag in event creation.
|
||||
* - Test case to verify hipExtModuleLaunchKernel API's Corner Scenarios for Grid and Block dimensions.
|
||||
* - Test case to verify different work groups of hipExtModuleLaunchKernel API.
|
||||
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - catch/unit/module/hipExtModuleLaunchKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.6
|
||||
*/
|
||||
|
||||
struct gridblockDim {
|
||||
unsigned int gridX;
|
||||
unsigned int gridY;
|
||||
unsigned int gridZ;
|
||||
unsigned int blockX;
|
||||
unsigned int blockY;
|
||||
unsigned int blockZ;
|
||||
};
|
||||
class ModuleLaunchKernel {
|
||||
int N = 64;
|
||||
int SIZE = N*N;
|
||||
int *A, *B, *C;
|
||||
hipDeviceptr_t *Ad, *Bd;
|
||||
hipStream_t stream1, stream2;
|
||||
hipEvent_t start_event1, end_event1, start_event2, end_event2,
|
||||
start_timingDisabled, end_timingDisabled;
|
||||
hipModule_t Module;
|
||||
hipDeviceptr_t deviceGlobal;
|
||||
hipFunction_t MultKernel, SixteenSecKernel, FourSecKernel,
|
||||
TwoSecKernel, KernelandExtraParamKernel, DummyKernel;
|
||||
struct {
|
||||
int clockRate;
|
||||
void* _Ad;
|
||||
void* _Bd;
|
||||
void* _Cd;
|
||||
int _n;
|
||||
} args1, args2;
|
||||
struct {
|
||||
} args3;
|
||||
size_t size1;
|
||||
size_t size2;
|
||||
size_t size3;
|
||||
size_t deviceGlobalSize;
|
||||
public :
|
||||
void AllocateMemory();
|
||||
void DeAllocateMemory();
|
||||
void ModuleLoad();
|
||||
bool Module_Negative_tests();
|
||||
bool ExtModule_Negative_tests();
|
||||
bool ExtModule_Corner_tests();
|
||||
bool Module_WorkGroup_Test();
|
||||
bool ExtModule_KernelExecutionTime();
|
||||
bool ExtModule_ConcurencyCheck_GlobalVar(int conc_flag);
|
||||
bool ExtModule_ConcurrencyCheck_TimeVer();
|
||||
bool ExtModule_Disabled_Timingflag();
|
||||
};
|
||||
|
||||
void ModuleLaunchKernel::AllocateMemory() {
|
||||
A = new int[N*N*sizeof(int)];
|
||||
B = new int[N*N*sizeof(int)];
|
||||
for (int i=0; i < N; i++) {
|
||||
for (int j=0; j < N; j++) {
|
||||
A[i*N +j] = 1;
|
||||
B[i*N +j] = 1;
|
||||
}
|
||||
}
|
||||
HIP_CHECK(hipStreamCreate(&stream1));
|
||||
HIP_CHECK(hipStreamCreate(&stream2));
|
||||
HIP_CHECK(hipMalloc(&Ad, SIZE*sizeof(int)));
|
||||
HIP_CHECK(hipMalloc(&Bd, SIZE*sizeof(int)));
|
||||
HIP_CHECK(hipHostMalloc(reinterpret_cast<void**>(&C), SIZE*sizeof(int)));
|
||||
HIP_CHECK(hipMemcpy(Ad, A, SIZE*sizeof(int), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd, B, SIZE*sizeof(int), hipMemcpyHostToDevice));
|
||||
int clkRate = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0));
|
||||
args1._Ad = Ad;
|
||||
args1._Bd = Bd;
|
||||
args1._Cd = C;
|
||||
args1._n = N;
|
||||
args1.clockRate = clkRate;
|
||||
args2._Ad = NULL;
|
||||
args2._Bd = NULL;
|
||||
args2._Cd = NULL;
|
||||
args2._n = 0;
|
||||
args2.clockRate = clkRate;
|
||||
size1 = sizeof(args1);
|
||||
size2 = sizeof(args2);
|
||||
size3 = sizeof(args3);
|
||||
HIP_CHECK(hipEventCreate(&start_event1));
|
||||
HIP_CHECK(hipEventCreate(&end_event1));
|
||||
HIP_CHECK(hipEventCreate(&start_event2));
|
||||
HIP_CHECK(hipEventCreate(&end_event2));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&start_timingDisabled,
|
||||
hipEventDisableTiming));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&end_timingDisabled,
|
||||
hipEventDisableTiming));
|
||||
}
|
||||
|
||||
void ModuleLaunchKernel::ModuleLoad() {
|
||||
constexpr auto matmulName = "matmul.code";
|
||||
constexpr auto matmulK = "matmulK";
|
||||
constexpr auto SixteenSec = "SixteenSecKernel";
|
||||
constexpr auto KernelandExtra = "KernelandExtraParams";
|
||||
constexpr auto FourSec = "FourSecKernel";
|
||||
constexpr auto TwoSec = "TwoSecKernel";
|
||||
constexpr auto globalDevVar = "deviceGlobal";
|
||||
constexpr auto dummyKernel = "dummyKernel";
|
||||
|
||||
HIP_CHECK(hipModuleLoad(&Module, matmulName));
|
||||
HIP_CHECK(hipModuleGetFunction(&MultKernel, Module, matmulK));
|
||||
HIP_CHECK(hipModuleGetFunction(&SixteenSecKernel, Module, SixteenSec));
|
||||
HIP_CHECK(hipModuleGetFunction(&KernelandExtraParamKernel,
|
||||
Module, KernelandExtra));
|
||||
HIP_CHECK(hipModuleGetFunction(&FourSecKernel, Module, FourSec));
|
||||
HIP_CHECK(hipModuleGetFunction(&TwoSecKernel, Module, TwoSec));
|
||||
HIP_CHECK(hipModuleGetFunction(&DummyKernel, Module, dummyKernel));
|
||||
HIP_CHECK(hipModuleGetGlobal(&deviceGlobal, &deviceGlobalSize,
|
||||
Module, globalDevVar));
|
||||
}
|
||||
|
||||
void ModuleLaunchKernel::DeAllocateMemory() {
|
||||
HIP_CHECK(hipEventDestroy(start_event1));
|
||||
HIP_CHECK(hipEventDestroy(end_event1));
|
||||
HIP_CHECK(hipEventDestroy(start_event2));
|
||||
HIP_CHECK(hipEventDestroy(end_event2));
|
||||
HIP_CHECK(hipEventDestroy(start_timingDisabled));
|
||||
HIP_CHECK(hipEventDestroy(end_timingDisabled));
|
||||
HIP_CHECK(hipStreamDestroy(stream1));
|
||||
HIP_CHECK(hipStreamDestroy(stream2));
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
HIP_CHECK(hipFree(Ad));
|
||||
HIP_CHECK(hipFree(Bd));
|
||||
HIP_CHECK(hipHostFree(C));
|
||||
HIP_CHECK(hipModuleUnload(Module));
|
||||
}
|
||||
/*
|
||||
* In this scenario,We launch the 4 sec kernel and 2 sec kernel
|
||||
* and we fetch the event execution time of each kernel and it
|
||||
* should not exceed the execution time of that particular kernel
|
||||
*/
|
||||
bool ModuleLaunchKernel::ExtModule_KernelExecutionTime() {
|
||||
constexpr auto FOURSEC_KERNEL{4999};
|
||||
constexpr auto TWOSEC_KERNEL{2999};
|
||||
bool testStatus = true;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
float time_4sec, time_2sec;
|
||||
|
||||
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL, reinterpret_cast<void**>(&config2),
|
||||
start_event1, end_event1, 0));
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
|
||||
NULL, reinterpret_cast<void**>(&config2),
|
||||
start_event2, end_event2, 0));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
HIP_CHECK(hipEventElapsedTime(&time_4sec, start_event1, end_event1));
|
||||
HIP_CHECK(hipEventElapsedTime(&time_2sec, start_event2, end_event2));
|
||||
if (time_4sec < FOURSEC_KERNEL && time_2sec < TWOSEC_KERNEL) {
|
||||
testStatus = true;
|
||||
} else {
|
||||
testStatus = false;
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
/*
|
||||
* In this Scenario, we create events by disabling the timing flag
|
||||
* We then Launch the kernel using hipExtModuleLaunchKernel by passing
|
||||
* disabled events and try to fetch kernel execution time using
|
||||
* hipEventElapsedTime API which would fail as the flag is disabled.
|
||||
*/
|
||||
bool ModuleLaunchKernel::ExtModule_Disabled_Timingflag() {
|
||||
bool testStatus = true;
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
hipError_t e;
|
||||
float time_2sec;
|
||||
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
|
||||
NULL, reinterpret_cast<void**>(&config2),
|
||||
start_timingDisabled, end_timingDisabled, 0));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
e = hipEventElapsedTime(&time_2sec, start_timingDisabled, end_timingDisabled);
|
||||
if (e == hipErrorInvalidHandle) {
|
||||
testStatus = true;
|
||||
} else {
|
||||
INFO("Event elapsed time is success when time flag is disabled \n");
|
||||
testStatus = false;
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
/*
|
||||
* In this scenario , we initially create a global device variable in matmul.cpp
|
||||
* with initial value as 1 We then launch the four sec and two sec kernels and
|
||||
* try to modify the variable.
|
||||
* In case of concurrency,the variable gets updated in four sec kernel to 0x2222
|
||||
* and then the two sec kernel would be launched parallely which would again
|
||||
* modify the global variable to 0x3333
|
||||
* In case of non concurrency,the variale gets updated in four sec kernel
|
||||
* and then in two sec kernel and the value of global variable would be 0x5555
|
||||
*/
|
||||
bool ModuleLaunchKernel::ExtModule_ConcurencyCheck_GlobalVar(int conc_flag) {
|
||||
bool testStatus = true;
|
||||
int deviceGlobal_h = 0;
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
void *config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(FourSecKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL, reinterpret_cast<void**>(&config2),
|
||||
start_event1, end_event1, conc_flag));
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(TwoSecKernel, 1, 1, 1, 1, 1, 1, 0, stream1,
|
||||
NULL, reinterpret_cast<void**>(&config2),
|
||||
start_event2, end_event2, conc_flag));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
HIP_CHECK(hipMemcpyDtoH(&deviceGlobal_h, hipDeviceptr_t(deviceGlobal),
|
||||
deviceGlobalSize));
|
||||
if (conc_flag && deviceGlobal_h != 0x5555) {
|
||||
testStatus = true;
|
||||
} else if (!conc_flag && deviceGlobal_h == 0x5555) {
|
||||
testStatus = true;
|
||||
} else {
|
||||
testStatus = false;
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
/* In this scenario,we initially launch 2 kernels,one is sixteen sec kernel
|
||||
* and other is matrix multiplication with non-concurrency (flag 0)
|
||||
* and we launch the same 2 kernels with concurrency flag 1. We then compare
|
||||
* the time difference between the concurrency and non currency kernels.
|
||||
* The concurrency kernel duration should be less than the non concurrency
|
||||
* duration kernels
|
||||
*/
|
||||
bool ModuleLaunchKernel::ExtModule_ConcurrencyCheck_TimeVer() {
|
||||
bool testStatus = true;
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
int mismatch = 0;
|
||||
void* config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
void* config2[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args2,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size2,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(SixteenSecKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config2),
|
||||
NULL, NULL, 0));
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(MultKernel, N, N, 1, 32, 32 , 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
NULL, NULL, 0));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
auto duration1 = std::chrono::duration_cast<std::chrono::microseconds>
|
||||
(stop-start);
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(SixteenSecKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config2),
|
||||
NULL, NULL, 1));
|
||||
HIP_CHECK(hipExtModuleLaunchKernel(MultKernel, N, N, 1, 32, 32, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
NULL, NULL, 1));
|
||||
HIP_CHECK(hipStreamSynchronize(stream1));
|
||||
stop = std::chrono::high_resolution_clock::now();
|
||||
auto duration2 = std::chrono::duration_cast<std::chrono::microseconds>
|
||||
(stop-start);
|
||||
if (!(duration2.count() < duration1.count())) {
|
||||
testStatus = false;
|
||||
}
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
if (C[i*N + j] != N)
|
||||
mismatch++;
|
||||
}
|
||||
}
|
||||
if (mismatch) {
|
||||
testStatus = false;
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
bool ModuleLaunchKernel::ExtModule_Negative_tests() {
|
||||
bool testStatus = true;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipError_t err;
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args1,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
void *params[] = {Ad};
|
||||
// Passing nullptr to kernel function in hipExtModuleLaunchKernel API
|
||||
err = hipExtModuleLaunchKernel(nullptr, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed nullptr to kernel function");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing Max int value to block dimensions
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
std::numeric_limits<uint32_t>::max(),
|
||||
std::numeric_limits<uint32_t>::max(), 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for max values to block dimension");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing 0 as value for all dimensions
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 0, 0, 0,
|
||||
0,
|
||||
0,
|
||||
0, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for 0 as value for all dimensions");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing 0 as value for x dimension
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 0, 1, 1,
|
||||
0,
|
||||
1,
|
||||
1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for 0 as value for x dimension");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing 0 as value for y dimension
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 0, 1,
|
||||
1,
|
||||
0,
|
||||
1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for 0 as value for y dimension");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing 0 as value for z dimension
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 0,
|
||||
1,
|
||||
1,
|
||||
0, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for 0 as value for z dimension");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing both kernel and extra params
|
||||
err = hipExtModuleLaunchKernel(KernelandExtraParamKernel, 1, 1, 1, 1, 1, 1, 0,
|
||||
stream1, reinterpret_cast<void**>(¶ms),
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel fail when we pass both kernel,extra args");
|
||||
testStatus = false;
|
||||
}
|
||||
// Passing more than maxthreadsperblock to block dimensions
|
||||
hipDeviceProp_t deviceProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0));
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
deviceProp.maxThreadsPerBlock+1,
|
||||
deviceProp.maxThreadsPerBlock+1,
|
||||
deviceProp.maxThreadsPerBlock+1, 0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for max group size");
|
||||
testStatus = false;
|
||||
}
|
||||
// Block dimension X = Max Allowed + 1
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
deviceProp.maxThreadsDim[0]+1,
|
||||
1,
|
||||
1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for (MaxBlockDimX + 1)");
|
||||
testStatus = false;
|
||||
}
|
||||
// Block dimension Y = Max Allowed + 1
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1,
|
||||
deviceProp.maxThreadsDim[1]+1,
|
||||
1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for (MaxBlockDimY + 1)");
|
||||
testStatus = false;
|
||||
}
|
||||
// Block dimension Z = Max Allowed + 1
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1,
|
||||
1,
|
||||
1,
|
||||
deviceProp.maxThreadsDim[2]+1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for (MaxBlockDimZ + 1)");
|
||||
testStatus = false;
|
||||
}
|
||||
|
||||
// Passing invalid config data in extra params
|
||||
void *config3[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size1,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
err = hipExtModuleLaunchKernel(MultKernel, 1, 1, 1, 1, 1, 1, 0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config3),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
INFO("hipExtModuleLaunchKernel failed for invalid conf");
|
||||
testStatus = false;
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
|
||||
bool ModuleLaunchKernel::ExtModule_Corner_tests() {
|
||||
bool testStatus = true;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipError_t err;
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
hipDeviceProp_t deviceProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0));
|
||||
unsigned int maxblockX = deviceProp.maxThreadsDim[0];
|
||||
unsigned int maxblockY = deviceProp.maxThreadsDim[1];
|
||||
unsigned int maxblockZ = deviceProp.maxThreadsDim[2];
|
||||
struct gridblockDim test[6] = {{1, 1, 1, maxblockX, 1, 1},
|
||||
{1, 1, 1, 1, maxblockY, 1},
|
||||
{1, 1, 1, 1, 1, maxblockZ},
|
||||
{UINT32_MAX, 1, 1, 1, 1, 1},
|
||||
{1, UINT32_MAX, 1, 1, 1, 1},
|
||||
{1, 1, UINT32_MAX, 1, 1, 1}};
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
err = hipExtModuleLaunchKernel(DummyKernel,
|
||||
test[i].gridX,
|
||||
test[i].gridY,
|
||||
test[i].gridZ,
|
||||
test[i].blockX,
|
||||
test[i].blockY,
|
||||
test[i].blockZ,
|
||||
0,
|
||||
stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err != hipSuccess) {
|
||||
testStatus = false;
|
||||
}
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
|
||||
bool ModuleLaunchKernel::Module_WorkGroup_Test() {
|
||||
bool testStatus = true;
|
||||
HIP_CHECK(hipSetDevice(0));
|
||||
hipError_t err;
|
||||
AllocateMemory();
|
||||
ModuleLoad();
|
||||
void *config1[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args3,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size3,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
hipDeviceProp_t deviceProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&deviceProp, 0));
|
||||
double cuberootVal =
|
||||
cbrt(static_cast<double>(deviceProp.maxThreadsPerBlock));
|
||||
uint32_t cuberoot_floor = floor(cuberootVal);
|
||||
uint32_t cuberoot_ceil = ceil(cuberootVal);
|
||||
// Scenario: (block.x * block.y * block.z) <= Work Group Size where
|
||||
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
err = hipExtModuleLaunchKernel(DummyKernel,
|
||||
1, 1, 1,
|
||||
cuberoot_floor, cuberoot_floor, cuberoot_floor,
|
||||
0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err != hipSuccess) {
|
||||
testStatus = false;
|
||||
}
|
||||
// Scenario: (block.x * block.y * block.z) > Work Group Size where
|
||||
// block.x < MaxBlockDimX , block.y < MaxBlockDimY and block.z < MaxBlockDimZ
|
||||
err = hipExtModuleLaunchKernel(DummyKernel,
|
||||
1, 1, 1,
|
||||
cuberoot_ceil, cuberoot_ceil, cuberoot_ceil + 1,
|
||||
0, stream1, NULL,
|
||||
reinterpret_cast<void**>(&config1),
|
||||
nullptr, nullptr, 0);
|
||||
if (err == hipSuccess) {
|
||||
testStatus = false;
|
||||
}
|
||||
DeAllocateMemory();
|
||||
return testStatus;
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipExtModuleLaunchKernel_Functional") {
|
||||
bool testStatus = true;
|
||||
ModuleLaunchKernel kernelLaunch;
|
||||
testStatus &= kernelLaunch.ExtModule_Negative_tests();
|
||||
// Disabled below test cases as firmware currently does not support the
|
||||
// concurrency in the same stream based on the flag
|
||||
#if 0
|
||||
testStatus &= kernelLaunch.ExtModule_ConcurencyCheck_GlobalVar(1);
|
||||
testStatus &= kernelLaunch.ExtModule_ConcurencyCheck_GlobalVar(0);
|
||||
testStatus &= kernelLaunch.ExtModule_ConcurrencyCheck_TimeVer();
|
||||
#endif
|
||||
SECTION("Kernel Execution Time") {
|
||||
testStatus &= kernelLaunch.ExtModule_KernelExecutionTime();
|
||||
REQUIRE(testStatus == true);
|
||||
}
|
||||
SECTION("Disable Time Flag") {
|
||||
testStatus &= kernelLaunch.ExtModule_Disabled_Timingflag();
|
||||
REQUIRE(testStatus == true);
|
||||
}
|
||||
SECTION("Corner Tests") {
|
||||
testStatus &= kernelLaunch.ExtModule_Corner_tests();
|
||||
REQUIRE(testStatus == true);
|
||||
}
|
||||
SECTION("WorkGroup Test") {
|
||||
testStatus &= kernelLaunch.Module_WorkGroup_Test();
|
||||
REQUIRE(testStatus == true);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* End doxygen group KernelTest.
|
||||
* @}
|
||||
|
||||
Ссылка в новой задаче
Block a user