Catch2 Test Fixes (#2961)
* test fixes * address PR comment * PR comment fixing Nvidia pass * add additional kernel launch checks * pr comments
Bu işleme şunda yer alıyor:
işlemeyi yapan:
GitHub
ebeveyn
609edf1c9e
işleme
da453211ed
@@ -29,6 +29,7 @@ TEMPLATE_TEST_CASE("ABM_AddKernel_MultiTypeMultiSize", "", int, long, float, lon
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
hipLaunchKernelGGL(add<TestType>, 1, size, 0, 0, d_a, d_b, d_c, size);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost);
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
@@ -53,6 +53,7 @@ TEST_CASE("Unit_hipManagedKeyword_SingleGpu") {
|
||||
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast<const float*>(A),
|
||||
static_cast<float*>(B));
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
float maxError = 0.0f;
|
||||
|
||||
@@ -26,7 +26,7 @@ THE SOFTWARE.
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <type_traits>
|
||||
|
||||
#define TOL 0.001
|
||||
#define guarantee(cond, str) \
|
||||
{ \
|
||||
if (!(cond)) { \
|
||||
@@ -45,7 +45,7 @@ size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectM
|
||||
size_t mismatchesToPrint = 10;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
T expected = F(A[i], B[i]);
|
||||
if (Out[i] != expected) {
|
||||
if (std::fabs(Out[i] - expected) > TOL) {
|
||||
if (mismatchCount == 0) {
|
||||
firstMismatch = i;
|
||||
}
|
||||
|
||||
@@ -270,6 +270,7 @@ void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerB
|
||||
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
|
||||
std::forward<Args>(packedArgs)...);
|
||||
#endif
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
//---
|
||||
|
||||
@@ -221,6 +221,7 @@ void runMultiProcKernel(ipcEventInfo_t *shmEventInfo, int index) {
|
||||
const dim3 blocks(BUF_SIZE / threads.x, 1);
|
||||
hipLaunchKernelGGL(computeKernel, dim3(blocks), dim3(threads), 0, 0,
|
||||
d_ptr + index *BUF_SIZE, d_ptr, index + 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipEventRecord(event));
|
||||
|
||||
// Barrier 2 : Signals that event is recorded
|
||||
|
||||
@@ -120,7 +120,7 @@ static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
|
||||
|
||||
@@ -276,12 +276,15 @@ bool test_printf_multistream(uint32_t num_blocks,
|
||||
hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, stream[i], Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1),
|
||||
dim3(1, threads_per_block, 1),
|
||||
0, stream[i], Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks),
|
||||
dim3(1, 1, threads_per_block),
|
||||
0, stream[i], Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
for (int i = 0; i < NUM_STREAM; i++) {
|
||||
@@ -368,12 +371,15 @@ bool test_printf_multigpu(int gpu,
|
||||
hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1),
|
||||
dim3(1, threads_per_block, 1),
|
||||
0, 0, Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks),
|
||||
dim3(1, 1, threads_per_block),
|
||||
0, 0, Ad, Bd, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
|
||||
@@ -246,6 +246,7 @@ bool test_printf_conststr(uint32_t num_blocks, uint32_t threads_per_block,
|
||||
hipLaunchKernelGGL(kernel_printf_conststr, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
@@ -308,6 +309,7 @@ bool test_printf_two_conditionalstr(uint32_t num_blocks,
|
||||
dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
@@ -370,6 +372,7 @@ bool test_printf_single_conditionalstr(uint32_t num_blocks,
|
||||
dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
@@ -427,6 +430,7 @@ bool test_variable_str(uint32_t print_limit,
|
||||
hipLaunchKernelGGL(func, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount, Ad);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
HIP_CHECK(hipMemcpy(Ah, Ad, buffsize*sizeof(int32_t),
|
||||
hipMemcpyDeviceToHost));
|
||||
@@ -483,6 +487,7 @@ bool test_decimal_str(uint32_t num_blocks, uint32_t threads_per_block,
|
||||
hipLaunchKernelGGL(kernel_decimal_calculation, dim3(num_blocks, 1, 1),
|
||||
dim3(threads_per_block, 1, 1),
|
||||
0, 0, iterCount, maxPrecision);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(0));
|
||||
std::ifstream CapturedData = captured.getCapturedData();
|
||||
char *buffer = new char[CHUNK_SIZE];
|
||||
|
||||
@@ -157,6 +157,7 @@ static void test_group_partition(unsigned int tileSz) {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0, dResult, tileSz, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err));
|
||||
@@ -217,6 +218,7 @@ static void test_shfl_down() {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_shfl_down, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
|
||||
@@ -138,6 +138,7 @@ static void test_group_partition(unsigned tileSz) {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0, dPtr, tileSz, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipMemcpy(hPtr, dPtr, arrSize, hipMemcpyDeviceToHost);
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
@@ -207,6 +208,7 @@ static void test_shfl_up() {
|
||||
hipLaunchKernelGGL(kernel_shfl_up, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i);
|
||||
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err));
|
||||
|
||||
@@ -244,6 +244,7 @@ static void test_active_threads_grouping() {
|
||||
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_coalesced_active_groups, blockSize, threadsPerBlock, 0, 0);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
@@ -308,6 +309,7 @@ static void test_group_partition(unsigned int tileSz, bool useGlobalMem) {
|
||||
if (useGlobalMem) {
|
||||
hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock, 0, 0, tileSz,
|
||||
dResult, useGlobalMem, globalMem, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
@@ -316,6 +318,7 @@ static void test_group_partition(unsigned int tileSz, bool useGlobalMem) {
|
||||
} else {
|
||||
hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0, tileSz, dResult, useGlobalMem, globalMem, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
@@ -390,6 +393,7 @@ static void test_shfl_any_to_any() {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_shfl_any_to_any, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0 , dPtr, dsrcArr, dResults, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
@@ -459,6 +463,7 @@ static void test_shfl_broadcast() {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_shfl, blockSize, threadsPerBlock,
|
||||
threadsPerBlock * sizeof(int), 0, dPtr, dResults, srcLane, i);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
|
||||
err = hipDeviceSynchronize();
|
||||
if (err != hipSuccess) {
|
||||
|
||||
@@ -27,6 +27,7 @@ THE SOFTWARE.
|
||||
|
||||
#define _SIZE sizeof(int) * 1024 * 1024
|
||||
#define NUM_STREAMS 2
|
||||
#define NUM_ITERS 1 << 30
|
||||
|
||||
static __global__ void Iter(int* Ad, int num) {
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
@@ -57,7 +58,8 @@ TEST_CASE("Unit_hipDeviceSynchronize_Functional") {
|
||||
}
|
||||
for (int i = 0; i < NUM_STREAMS; i++) {
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0,
|
||||
stream[i], Ad[i], 1 << 30);
|
||||
stream[i], Ad[i], NUM_ITERS);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
for (int i = 0; i < NUM_STREAMS; i++) {
|
||||
HIP_CHECK(hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost,
|
||||
@@ -71,7 +73,7 @@ TEST_CASE("Unit_hipDeviceSynchronize_Functional") {
|
||||
// Conservative implementations which synchronize the hipMemcpyAsync will
|
||||
// fail, ie if HIP_LAUNCH_BLOCKING=true.
|
||||
|
||||
CHECK(1 << 30 != A[NUM_STREAMS - 1][0] - 1);
|
||||
CHECK(NUM_ITERS != A[NUM_STREAMS - 1][0] - 1);
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
CHECK(1 << 30 == A[NUM_STREAMS - 1][0] - 1);
|
||||
CHECK(NUM_ITERS == A[NUM_STREAMS - 1][0] - 1);
|
||||
}
|
||||
|
||||
@@ -163,6 +163,7 @@ TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst") {
|
||||
hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(mykernel, dim3(1), dim3(1),
|
||||
0, 0, archProp_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(archProp_h, archProp_d,
|
||||
NUM_OF_ARCHPROP*sizeof(int), hipMemcpyDeviceToHost));
|
||||
// Validate the host architecture property with device
|
||||
|
||||
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithnounsafeflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
testResult = HipTest::assemblyFile_Verification<TestType>(
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithoutflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
testResult = HipTest::assemblyFile_Verification<TestType>(
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_CoherentwithUnsafeflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithnounsafeflag", "",
|
||||
dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithoutflag", "",
|
||||
dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithUnsafeflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -76,6 +76,7 @@ TEST_CASE("Unit_BuiltInAtomicAdd_CoherentGlobalMem") {
|
||||
hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
std::cout << "test 1" << std::endl;
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
REQUIRE(A_h[0] == INITIAL_VAL);
|
||||
@@ -118,6 +119,7 @@ TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMem") {
|
||||
hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d),
|
||||
static_cast<double* >(result));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -112,6 +112,7 @@ TEST_CASE("Unit_BuiltinAtomics_fmaxCoherentGlobalMem") {
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&result), sizeof(double)));
|
||||
hipLaunchKernelGGL(unsafeAtomicMax_GlobalMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d), result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(*B_h == 0);
|
||||
@@ -159,9 +160,11 @@ TEST_CASE("Unit_BuiltinAtomics_fmaxNonCoherentGlobalFlatMem") {
|
||||
if (mem_type) {
|
||||
hipLaunchKernelGGL(unsafeAtomicMax_GlobalMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d), result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
} else {
|
||||
hipLaunchKernelGGL(unsafeAtomicMax_FlatMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d), result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -112,6 +112,7 @@ TEST_CASE("Unit_BuiltinAtomics_fminCoherentGlobalMem") {
|
||||
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&result), sizeof(double)));
|
||||
hipLaunchKernelGGL(unsafeAtomicMin_GlobalMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d), result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
|
||||
REQUIRE(*B_h == 0);
|
||||
@@ -159,9 +160,11 @@ TEST_CASE("Unit_BuiltinAtomics_fminNonCoherentGlobalFlatMem") {
|
||||
if (mem_type) {
|
||||
hipLaunchKernelGGL(unsafeAtomicMin_GlobalMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d), result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
} else {
|
||||
hipLaunchKernelGGL(unsafeAtomicMin_FlatMem, dim3(1), dim3(1),
|
||||
0, 0, static_cast<double* >(A_d), result);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -65,6 +65,7 @@ TEST_CASE("Unit_AnyAll_CompileTest") {
|
||||
hipLaunchKernelGGL(warpvote, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0,
|
||||
device_any, device_all, pshift);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(
|
||||
hipMemcpy(host_any, device_any, Num_Warps_per_Grid * sizeof(int), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(
|
||||
|
||||
@@ -66,6 +66,7 @@ TEST_CASE("Unit_ballot") {
|
||||
hipLaunchKernelGGL(gpu_ballot, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0,
|
||||
device_ballot, Num_Warps_per_Block, pshift);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid * sizeof(unsigned int),
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
@@ -146,6 +146,7 @@ TEST_CASE("Unit_bitExtract") {
|
||||
hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, deviceOut32,
|
||||
deviceSrc032, deviceSrc132, deviceSrc232, deviceOut64, deviceSrc064,
|
||||
deviceSrc164, deviceSrc264);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -158,6 +158,7 @@ TEST_CASE("Unit_bitInsert") {
|
||||
hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, deviceOut32,
|
||||
deviceSrc032, deviceSrc132, deviceSrc232, deviceSrc332, deviceOut64,
|
||||
deviceSrc064, deviceSrc164, deviceSrc264, deviceSrc364);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -115,6 +115,7 @@ TEST_CASE("Unit_brev") {
|
||||
hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
|
||||
deviceC, deviceD, WIDTH, HEIGHT);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -136,6 +136,7 @@ TEST_CASE("Unit_clz") {
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
|
||||
deviceC, deviceD, WIDTH, HEIGHT);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -110,6 +110,7 @@ TEST_CASE("Unit_ffs") {
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
|
||||
deviceC, deviceD, WIDTH, HEIGHT);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -56,6 +56,7 @@ TEST_CASE("Unit_deviceFunctions_CompileTest") {
|
||||
res = hipMalloc((void**)&Outd, SIZE);
|
||||
REQUIRE(res == hipSuccess);
|
||||
hipLaunchKernelGGL(floatMath, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, Ind, Outd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
res = hipDeviceSynchronize();
|
||||
REQUIRE(res == hipSuccess);
|
||||
res = hipGetLastError();
|
||||
|
||||
@@ -129,6 +129,7 @@ template <typename T, typename D> void testType(int msize) {
|
||||
|
||||
auto kernel = testOperationsGPU<T, D>;
|
||||
hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
@@ -145,7 +145,7 @@ TEST_CASE("Unit_funnelshift") {
|
||||
|
||||
hipLaunchKernelGGL(funnelshift_kernel, dim3(1), dim3(1), 0, 0, device_l_output, device_lc_output,
|
||||
device_r_output, device_rc_output);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(host_l_output, device_l_output, NUM_TESTS * sizeof(unsigned int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(host_lc_output, device_lc_output, NUM_TESTS * sizeof(unsigned int),
|
||||
|
||||
@@ -69,6 +69,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
|
||||
hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost,
|
||||
stream));
|
||||
@@ -83,6 +84,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
|
||||
SECTION("Calling hipMemcpyTo/FromSymbol - validate value in host memory") {
|
||||
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice));
|
||||
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -99,6 +101,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
|
||||
hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(
|
||||
hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream));
|
||||
@@ -116,6 +119,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
|
||||
hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost,
|
||||
hipStreamPerThread));
|
||||
@@ -139,6 +143,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
|
||||
HIP_CHECK(hipGetSymbolAddress((void**)&symbolAddress, HIP_SYMBOL(globalConst)));
|
||||
HIP_CHECK(hipMalloc((void**)&checkOkD, sizeof(bool)));
|
||||
hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipFree(checkOkD));
|
||||
HIP_ASSERT(checkOk);
|
||||
@@ -171,6 +176,7 @@ TEST_CASE("Unit_hipGetSymbolAddressAndSize_Validation") {
|
||||
HIP_CHECK(hipMalloc(&checkOkD, sizeof(bool)));
|
||||
hipLaunchKernelGGL(checkGlobalConstAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0,
|
||||
symbolArrAddress, checkOkD);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipFree(checkOkD));
|
||||
HIP_ASSERT(checkOk);
|
||||
@@ -184,6 +190,7 @@ TEST_CASE("Unit_hipGetSymbolAddressAndSize_Validation") {
|
||||
HIP_CHECK(hipMalloc(&checkOkD, sizeof(bool)));
|
||||
hipLaunchKernelGGL(checkStaticConstVarAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0,
|
||||
symbolVarAddress, checkOkD);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipFree(checkOkD));
|
||||
HIP_ASSERT(checkOk);
|
||||
|
||||
@@ -100,6 +100,7 @@ template <typename T, typename U> int dataTypesRun() {
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA,
|
||||
static_cast<const T*>(deviceB), WIDTH, HEIGHT);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
@@ -148,7 +149,7 @@ template <typename T, typename U> int dataTypesRun2() {
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA,
|
||||
static_cast<const T*>(deviceB), WIDTH, HEIGHT);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -198,7 +199,7 @@ template <typename T, typename U> int dataTypesRun4() {
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA,
|
||||
static_cast<const T*>(deviceB), WIDTH, HEIGHT);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ TEST_CASE("Unit_mbcnt") {
|
||||
|
||||
hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0,
|
||||
device_mbcnt_lo, device_mbcnt_hi, device_lane_id);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
unsigned int* host_mbcnt_lo = (unsigned int*)malloc(buffer_size);
|
||||
unsigned int* host_mbcnt_hi = (unsigned int*)malloc(buffer_size);
|
||||
unsigned int* host_lane_id = (unsigned int*)malloc(buffer_size);
|
||||
|
||||
@@ -100,7 +100,7 @@ TEST_CASE("Unit_popc") {
|
||||
hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
|
||||
deviceC, deviceD, WIDTH, HEIGHT);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -88,7 +88,7 @@ static void test_syncthreads_and(int blockSize) {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_syncthreads_and, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD,
|
||||
allThreadsOneD, oneThreadZeroD, allThreadsMinusOneD);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -95,7 +95,7 @@ void test_syncthreads_count(int blockSize) {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_syncthreads_count, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD,
|
||||
allThreadsOneD, oddThreadsOneD, allThreadsMinusOneD, allThreadsIdD);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -88,7 +88,7 @@ static void test_syncthreads_or(int blockSize) {
|
||||
// Launch Kernel
|
||||
hipLaunchKernelGGL(kernel_syncthreads_or, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD,
|
||||
allThreadsOneD, oneThreadOneD, allThreadsMinusOneD);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
// Copy result from device to host
|
||||
HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -92,6 +92,7 @@ TEST_CASE("Unit_threadfence_system") {
|
||||
HIP_CHECK(hipSetDevice(next_id - 1));
|
||||
hipLaunchKernelGGL(gpu_round_robin, dim_grid, dim_block, 0, 0x0, next_id, num_dev, num_iter,
|
||||
data, flag);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithnoUnsafeflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Coherentwithoutflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
|
||||
|
||||
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithUnsafeflag", "",
|
||||
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentnounsafeatomicsflag", "",
|
||||
dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithoutflag", "",
|
||||
dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithunsafeatomicsflag", "",
|
||||
dim3(1), dim3(1),
|
||||
0, 0, A_d,
|
||||
result_d);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
bool testResult;
|
||||
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
|
||||
|
||||
@@ -224,6 +224,7 @@ template <typename V> bool run_CheckSharedVectorType() {
|
||||
if (hipMalloc(&ptr, sizeof(bool)) != HIP_SUCCESS) return false;
|
||||
unique_ptr<bool, decltype(hipFree)*> correct{ptr, hipFree};
|
||||
hipLaunchKernelGGL((CheckSharedVectorType<V>), dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get());
|
||||
HIP_CHECK(hipGetLastError());
|
||||
bool passed = true;
|
||||
if (hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)) != HIP_SUCCESS) {
|
||||
return false;
|
||||
@@ -252,7 +253,7 @@ TEST_CASE("Unit_vectorTypes_CompileTest") {
|
||||
|
||||
unique_ptr<bool, decltype(hipFree)*> correct{ptr, hipFree};
|
||||
hipLaunchKernelGGL(CheckVectorTypes, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get());
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
bool passed = true;
|
||||
res = hipMemcpyDtoH(&passed, correct.get(), sizeof(bool));
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
@@ -80,6 +80,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_
|
||||
HIP_CHECK(hipEventRecord(start, stream));
|
||||
hipLaunchKernelGGL(HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, stream,
|
||||
static_cast<const int*>(C_d), C_h, numElements, count);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipEventRecord(stop, stream));
|
||||
|
||||
if (waitStart) {
|
||||
@@ -111,7 +112,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_
|
||||
REQUIRE(false);
|
||||
}
|
||||
|
||||
if (e == hipSuccess) assert(t == 0.0f);
|
||||
if (e == hipSuccess) HIP_ASSERT(t == 0.0f);
|
||||
|
||||
// stop usually ready unless we skipped the synchronization (syncNone)
|
||||
e = hipEventElapsedTime(&t, stop, stop);
|
||||
@@ -137,7 +138,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_
|
||||
HIP_ASSERT(hipEventElapsedTime(&t, start, neverCreated) == hipErrorInvalidHandle);
|
||||
|
||||
HIP_ASSERT(hipEventElapsedTime(&t, neverRecorded, stop) == hipErrorInvalidHandle);
|
||||
HIP_ASSERT(hipEventElapsedTime(&t, start, neverRecorded) == hipErrorInvalidHandle);
|
||||
HIP_ASSERT(hipGetLastError() == hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipEventDestroy(neverRecorded));
|
||||
|
||||
@@ -65,7 +65,7 @@ TEST_CASE("Unit_hipEventIpc") {
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipEventRecord(stop, NULL));
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
|
||||
@@ -88,7 +88,7 @@ TEST_CASE("Unit_hipEventRecord") {
|
||||
HipTest::launchKernel<float>(HipTest::vectorADD<float>, blocks, 1, 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d),
|
||||
C_d, N);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipEventRecord(stop, NULL));
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
long long hostStop = HipTest::get_time();
|
||||
|
||||
@@ -101,7 +101,8 @@ TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset", "", i
|
||||
// Reference the registered device pointer Ad from inside the kernel:
|
||||
for (int i = 0; i < num_devices; i++) {
|
||||
HIP_CHECK(hipSetDevice(i));
|
||||
hipLaunchKernelGGL(Inc, dim3(LEN / 512), dim3(512), 0, 0, Ad[i]);
|
||||
hipLaunchKernelGGL(Inc, dim3(LEN / 32), dim3(32), 0, 0, Ad[i]);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
}
|
||||
REQUIRE(A[10] == 1 + static_cast<TestType>(num_devices));
|
||||
|
||||
@@ -111,7 +111,7 @@ static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
|
||||
@@ -219,7 +219,7 @@ static bool validateMemoryOnGpuMThread(int gpu, bool concurOnOneGPU = false) {
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
|
||||
|
||||
@@ -114,6 +114,7 @@ TEST_CASE("Unit_hipMallocManaged_Advanced") {
|
||||
HIP_CHECK(hipEventRecord(event0, 0));
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const float*>(A), static_cast<const float*>(B), C, numElements);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipEventRecord(event1, 0));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
float time = 0.0f;
|
||||
|
||||
@@ -536,6 +536,7 @@ TEMPLATE_TEST_CASE("Unit_hipMallocPitch_KernelLaunch", ""
|
||||
hipLaunchKernelGGL(copy_var<TestType>, dim3(1), dim3(1),
|
||||
0, 0, static_cast<TestType*>(A_d),
|
||||
static_cast<TestType*>(B_d), ROWS, pitch_A);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
|
||||
// hipMemcpy2D Device to Host
|
||||
|
||||
@@ -241,6 +241,7 @@ void memcpytest2(DeviceMemory<T>* dmem, HostMemory<T>* hmem,
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
|
||||
static_cast<const T*>(dmem->A_d()), static_cast<const T*>(dmem->B_d()),
|
||||
dmem->C_d(), numElements);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
if (useDeviceToDevice) {
|
||||
// Do an extra device-to-device copy here to mix things up:
|
||||
@@ -402,7 +403,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_KernelLaunch", "", int, float,
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
|
||||
static_cast<const TestType*>(A_d),
|
||||
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
@@ -567,7 +568,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_PinnedRegMemWithKernelLaunch",
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, static_cast<const TestType*>(A_d),
|
||||
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM);
|
||||
|
||||
@@ -595,7 +596,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_PinnedRegMemWithKernelLaunch",
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, static_cast<const TestType*>(X_d),
|
||||
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM);
|
||||
|
||||
@@ -319,6 +319,7 @@ TEST_CASE("Unit_hipMemcpy_HalfMemCopy") {
|
||||
HIP_CHECK(hipMemcpyAsync(B_h, A_d,
|
||||
(NUM_ELM/2)*sizeof(float),
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkTest(A_h, B_h, NUM_ELM/2);
|
||||
}
|
||||
HipTest::freeArrays<float>(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
|
||||
@@ -50,6 +50,7 @@ void Thread_func(T *A_d, T *B_d, T* C_d, T* C_h, size_t Nbytes,
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0,
|
||||
mystream, A_d, C_d, N_ELMTS);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream));
|
||||
// The following two MemcpyAsync calls are for sole
|
||||
// purpose of loading stream with multiple async calls
|
||||
@@ -76,6 +77,7 @@ void Thread_func_MultiStream() {
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0,
|
||||
mystream, A_d, C_d, N_ELMTS);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream));
|
||||
// The following hipMemcpyAsync() is called only to
|
||||
// load stream with multiple Async calls
|
||||
@@ -121,7 +123,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_KernelLaunch", "", int, float,
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
|
||||
static_cast<const TestType*>(A_d),
|
||||
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipStreamDestroy(stream));
|
||||
@@ -348,7 +350,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch",
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, static_cast<const TestType*>(A_d),
|
||||
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM);
|
||||
|
||||
@@ -379,7 +381,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch",
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, 0, static_cast<const TestType*>(X_d),
|
||||
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, Z_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, gpu1Stream));
|
||||
HIP_CHECK(hipStreamSynchronize(gpu1Stream));
|
||||
|
||||
@@ -66,6 +66,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "",
|
||||
dim3(1), 0, 0,
|
||||
static_cast<const TestType *>(A_d),
|
||||
static_cast<const TestType *>(B_d), C_d, NUM_ELM);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD<TestType>(A_h, B_h, C_h, NUM_ELM);
|
||||
@@ -80,6 +81,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "",
|
||||
dim3(1), 0, 0,
|
||||
static_cast<const TestType*>(X_d),
|
||||
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyDtoH(C_h, (hipDeviceptr_t)Z_d, Nbytes));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD<TestType>(A_h, B_h, C_h, NUM_ELM);
|
||||
|
||||
@@ -68,6 +68,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "",
|
||||
dim3(1), 0, 0,
|
||||
static_cast<const TestType *>(A_d),
|
||||
static_cast<const TestType *>(B_d), C_d, NUM_ELM);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD<TestType>(A_h, B_h, C_h, NUM_ELM);
|
||||
@@ -84,6 +85,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "",
|
||||
dim3(1), 0, 0,
|
||||
static_cast<const TestType*>(X_d),
|
||||
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
@@ -127,6 +127,7 @@ TEST_CASE("Unit_hipMemcpyPeer_Basic") {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
|
||||
0, 0, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, numElements*sizeof(int));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
|
||||
@@ -138,6 +139,7 @@ TEST_CASE("Unit_hipMemcpyPeer_Basic") {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
|
||||
0, 0, static_cast<const int*>(X_d),
|
||||
static_cast<const int*>(Y_d), Z_d, numElements*sizeof(int));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
|
||||
|
||||
@@ -143,6 +143,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
|
||||
0, 0, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, numElements*sizeof(int));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
|
||||
@@ -166,6 +167,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
|
||||
0, 0, static_cast<const int*>(X_d),
|
||||
static_cast<const int*>(Y_d), Z_d, numElements*sizeof(int));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
|
||||
@@ -227,6 +229,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_StreamOnDiffDevice") {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
|
||||
0, 0, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, numElements*sizeof(int));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
|
||||
@@ -240,6 +243,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_StreamOnDiffDevice") {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
|
||||
0, 0, static_cast<const int*>(X_d),
|
||||
static_cast<const int*>(Y_d), Z_d, numElements*sizeof(int));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int),
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
|
||||
@@ -99,6 +99,7 @@ void TestwithOnestream(void) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
@@ -136,6 +137,7 @@ void TestwithTwoStream(void) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NUM_STREAMS; ++i) {
|
||||
@@ -191,6 +193,7 @@ void TestDtoDonSameDevice(void) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NUM_STREAMS; ++i) {
|
||||
@@ -262,6 +265,7 @@ void TestOnMultiGPUwithOneStream(void) {
|
||||
dim3(threadsPerBlock), 0, stream[i],
|
||||
static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NumDevices; ++i) {
|
||||
@@ -298,6 +302,7 @@ void TestkindDtoH(void) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemcpyWithStream(C_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
@@ -365,6 +370,7 @@ void TestkindDtoD(void) {
|
||||
dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NumDevices; ++i) {
|
||||
@@ -411,6 +417,7 @@ void TestkindDefault(void) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
HIP_CHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream));
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
@@ -472,6 +479,7 @@ void TestkindDefaultForDtoD(void) {
|
||||
dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NumDevices; ++i) {
|
||||
|
||||
@@ -167,6 +167,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestwithOnestream(bool &val_res) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
val_res = ValidateResult(A_h, B_h, C_h);
|
||||
@@ -203,6 +204,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestwithTwoStream(bool &val_res) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NoofStreams; ++i) {
|
||||
@@ -258,6 +260,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestDtoDonSameDevice(bool &val_res) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < NoofStreams; ++i) {
|
||||
@@ -329,6 +332,7 @@ void HipMemcpyWithStreamMultiThreadtests::
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < numDevices; ++i) {
|
||||
@@ -363,6 +367,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDtoH(bool &val_res) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
@@ -434,6 +439,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDtoD(bool &val_res) {
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < numDevices; ++i) {
|
||||
@@ -481,6 +487,7 @@ void HipMemcpyWithStreamMultiThreadtests::
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream, static_cast<const int*>(A_d),
|
||||
static_cast<const int*>(B_d), C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream));
|
||||
val_res = ValidateResult(A_h, B_h, C_h);
|
||||
@@ -544,6 +551,7 @@ void HipMemcpyWithStreamMultiThreadtests::
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
|
||||
0, stream[i], static_cast<const int*>(A_d[i]),
|
||||
static_cast<const int*>(B_d[i]), C_d[i], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
for (int i=0; i < numDevices; ++i) {
|
||||
|
||||
@@ -53,6 +53,7 @@ TEST_CASE("Unit_hipHostMalloc_CoherentAccess") {
|
||||
std::cout << clkRate << std::endl;
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks),
|
||||
0, 0, hostRes, clkRate);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
int eleCounter = 0;
|
||||
while (eleCounter < blocks) {
|
||||
// blocks until the value changes
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
|
||||
/* Defines */
|
||||
#define NUM_THREADS 1000
|
||||
#define ITER 100
|
||||
#define ITER 10
|
||||
#define NUM_H 256
|
||||
#define NUM_W 256
|
||||
|
||||
@@ -105,6 +105,7 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") {
|
||||
for (size_t k = 0; k < ITER; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, hipStreamPerThread, B_d, C_d, elements);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H,
|
||||
hipStreamPerThread));
|
||||
|
||||
@@ -369,7 +369,7 @@ static void seekAndSet3DArrayPortion(bool bAsync) {
|
||||
myparms.srcPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPos = make_hipPos(0, 0, 0);
|
||||
myparms.dstPtr = make_hipPitchedPtr(array3D, sizeof(char) * arr_dimensions.x,
|
||||
arr_dimensions.x, arr_dimensions.y);
|
||||
arr_dimensions.y, arr_dimensions.z);
|
||||
myparms.srcPtr = devicePitchedPointer;
|
||||
myparms.extent = extent;
|
||||
#if HT_NVIDIA
|
||||
|
||||
@@ -84,6 +84,7 @@ static void threadFunc(hipStream_t stream, hipPitchedPtr devpPtr,
|
||||
|
||||
hipLaunchKernelGGL(func_set_value, dim3(blocks), dim3(threadsPerBlock), 0,
|
||||
stream, devpPtr, extent, memsetval);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipMemset3DAsync(devpPtr, testval, extent, stream));
|
||||
HIPCHECK(hipMemcpy3DAsync(&myparms, stream));
|
||||
}
|
||||
|
||||
@@ -92,6 +92,7 @@ static bool testhipMemsetAsyncWithKernel(bool UseStrmPerThrd) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, hipStreamPerThread, obj.B_d,
|
||||
obj.C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal, N, hipStreamPerThread));
|
||||
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
|
||||
@@ -117,6 +118,7 @@ static bool testhipMemsetD32AsyncWithKernel() {
|
||||
for (int k = 0 ; k < ITER ; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemsetD32Async((hipDeviceptr_t)obj.C_d , obj.memSetVal,
|
||||
N, obj.stream));
|
||||
HIP_CHECK(hipStreamSynchronize(obj.stream));
|
||||
@@ -135,6 +137,7 @@ static bool testhipMemsetD16AsyncWithKernel() {
|
||||
for (int k = 0 ; k < ITER ; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemsetD16Async((hipDeviceptr_t)obj.C_d , obj.memSetVal,
|
||||
N, obj.stream));
|
||||
HIP_CHECK(hipStreamSynchronize(obj.stream));
|
||||
@@ -153,6 +156,7 @@ static bool testhipMemsetD8AsyncWithKernel() {
|
||||
for (int k = 0; k < ITER; k++) {
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemsetD8Async((hipDeviceptr_t)obj.C_d, obj.memSetVal,
|
||||
N, obj.stream));
|
||||
HIP_CHECK(hipStreamSynchronize(obj.stream));
|
||||
|
||||
@@ -138,6 +138,7 @@ TEST_CASE("Unit_hipPointerGetAttribute_KernelUpdation") {
|
||||
reinterpret_cast<hipDeviceptr_t>(A_d)));
|
||||
hipLaunchKernelGGL(var_update, dim3(1), dim3(1), 0, 0,
|
||||
reinterpret_cast<int *>(data));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
for (unsigned int i = 0; i < N; i++) {
|
||||
|
||||
@@ -74,6 +74,7 @@ void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) {
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const T*>(A_d), static_cast<const T*>(B_d), C_d, numElements);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
MemTraits<C>::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream);
|
||||
|
||||
|
||||
@@ -56,6 +56,7 @@ void run1(size_t size, hipStream_t stream) {
|
||||
HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream));
|
||||
HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream));
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream, Cd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream));
|
||||
HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
@@ -97,7 +98,9 @@ void run(size_t size, hipStream_t stream1, hipStream_t stream2) {
|
||||
HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1));
|
||||
HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2));
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream1, Cd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream2, Cdd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1));
|
||||
HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2));
|
||||
HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1));
|
||||
|
||||
@@ -59,8 +59,10 @@ TEST_CASE("Unit_hipStreamCreate_MultistreamBasicFunctionalities") {
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(hipAPIStreamDisableTest::kernel),
|
||||
dim3(1), dim3(1), 0, streams[i], data[i], xd,
|
||||
hipAPIStreamDisableTest::NN);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(hipAPIStreamDisableTest::nKernel),
|
||||
dim3(1), dim3(1), 0, 0, yd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipStreamDestroy(streams[i]));
|
||||
}
|
||||
HIP_CHECK(hipMemcpy(&x, xd, sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -49,7 +49,9 @@ TEST_CASE("Unit_hipMultiStream_sameDevice") {
|
||||
HIP_CHECK(hipStreamCreate(&streams[i]));
|
||||
HIP_CHECK(hipMalloc(&data[i], NN * sizeof(float)));
|
||||
hipLaunchKernelGGL(kernel, dim3(1), dim3(1), 0, streams[i], data[i], xd, NN);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipLaunchKernelGGL(HIP_KERNEL_NAME(nKernel), dim3(1), dim3(1), 0, 0, yd);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipFree(data[i]));
|
||||
HIP_CHECK(hipStreamDestroy(streams[i]));
|
||||
}
|
||||
@@ -80,9 +82,11 @@ TEST_CASE("Unit_hipMultiStream_multimeDevice") {
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
// Launch kernel with default stream
|
||||
hipLaunchKernelGGL(kernel_do_nothing, dim3(1), dim3(1), 0, 0, 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
// Launch kernel on all streams
|
||||
for (int i = 0; i < nStreams; i++) {
|
||||
hipLaunchKernelGGL(kernel_do_nothing, dim3(1), dim3(1), 0, streams[i], 1);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
// Sync stream 1
|
||||
HIP_CHECK(hipStreamSynchronize(streams[0]));
|
||||
|
||||
@@ -131,7 +131,7 @@ TEST_CASE("Unit_hipStreamAddCallback_MultipleThreads") {
|
||||
hipLaunchKernelGGL((device_function), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0,
|
||||
mystream, C_d, A_d, N);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(
|
||||
hipMemcpyAsync(C1_h, C_d, Nbytes,
|
||||
hipMemcpyDeviceToHost, mystream));
|
||||
|
||||
@@ -86,7 +86,7 @@ bool testStreamCallbackFunctionality(bool isDefault) {
|
||||
const unsigned threadsPerBlock = 256;
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, 0, A_d, C_d, NSize);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
|
||||
0));
|
||||
HIP_CHECK(hipStreamAddCallback(0, Callback, nullptr, 0));
|
||||
@@ -102,7 +102,7 @@ bool testStreamCallbackFunctionality(bool isDefault) {
|
||||
const unsigned threadsPerBlock = 256;
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, mystream, A_d, C_d, NSize);
|
||||
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
|
||||
mystream));
|
||||
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
|
||||
|
||||
@@ -146,6 +146,7 @@ void funcTestsForAllPriorityLevelsWrtNullStrm(unsigned int flags,
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
|
||||
dim3(BLOCKSIZE), 0, stream[idx], A_d[idx],
|
||||
C_d[idx], MEMCPYSIZE2);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
|
||||
hipMemcpyDeviceToHost, stream[idx]));
|
||||
}
|
||||
@@ -227,6 +228,7 @@ void queueTasksInStreams(std::vector<hipStream_t> *stream,
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
|
||||
dim3(BLOCKSIZE), 0, (*stream)[idx], A_d[idx],
|
||||
C_d[idx], MEMCPYSIZE2);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIPCHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
|
||||
hipMemcpyDeviceToHost, (*stream)[idx]));
|
||||
}
|
||||
@@ -426,6 +428,7 @@ bool validateStreamPrioritiesWithEvents() {
|
||||
hipLaunchKernelGGL((memcpy_kernel<T>), dim3(GRIDSIZE), \
|
||||
dim3(BLOCKSIZE), 0, stream_##x, dst_d_##x + j, src_d_##x + j, \
|
||||
(MEMCPYSIZE / sizeof(T))); \
|
||||
HIP_CHECK(hipGetLastError()); \
|
||||
}
|
||||
OP(low)
|
||||
OP(normal)
|
||||
|
||||
@@ -157,6 +157,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_ValidateCallbackFunc") {
|
||||
const unsigned threadsPerBlock = BLOCKSIZE;
|
||||
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, mystream, A_d, C_d, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
|
||||
mystream));
|
||||
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
|
||||
@@ -250,6 +251,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") {
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, streams[0], dA[0], dC[0], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
auto single_end = std::chrono::steady_clock::now();
|
||||
@@ -269,6 +271,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") {
|
||||
<< streams[np] << " with CU mask: 0x" << ss[np].str().c_str());
|
||||
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
|
||||
dim3(threadsPerBlock), 0, streams[np], dA[np], dC[np], N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@ TEST_CASE("Unit_hipBindTexture2D_Pitch") {
|
||||
|
||||
hipLaunchKernelGGL(texture2dCopyKernel, dim3(4, 4, 1), dim3(32, 32, 1),
|
||||
0, 0, devPtrB);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy2D(B, SIZE_W*sizeof(TYPE_t), devPtrB,
|
||||
SIZE_W*sizeof(TYPE_t), SIZE_W*sizeof(TYPE_t),
|
||||
|
||||
@@ -63,6 +63,7 @@ TEST_CASE("Unit_hipBindTexture_tex1DfetchVerification") {
|
||||
dim3 dimGrid(N / dimBlock.x, 1, 1);
|
||||
|
||||
hipLaunchKernelGGL(kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, devBuf);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(output, devBuf, N * sizeof(float),
|
||||
hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -118,6 +118,7 @@ static void textureTest(texture<T, hipTextureType1D,
|
||||
|
||||
hipLaunchKernelGGL(normalizedValTextureTest<T>, dim3(1, 1, 1),
|
||||
dim3(SIZE, 1, 1), 0, 0, SIZE, dOutputData);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
float *hOutputData = new float[SIZE];
|
||||
REQUIRE(hOutputData != nullptr);
|
||||
|
||||
@@ -86,9 +86,11 @@ TEST_CASE("Unit_hipSimpleTexture2DLayered_Check") {
|
||||
|
||||
dim3 dimBlock(8, 8, 1);
|
||||
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
|
||||
for (unsigned int layer = 0; layer < num_layers; layer++)
|
||||
for (unsigned int layer = 0; layer < num_layers; layer++) {
|
||||
hipLaunchKernelGGL(simpleKernelLayeredArray, dimGrid, dimBlock, 0, 0,
|
||||
dData, width, height, layer);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Allocate mem for the result on host side
|
||||
|
||||
@@ -92,6 +92,7 @@ static void runSimpleTexture3D_Check(int width, int height, int depth,
|
||||
|
||||
hipLaunchKernelGGL(simpleKernel3DArray, dim3(1, 1, 1), dim3(1, 1, 1),
|
||||
0, 0, dData, width, height, depth);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
// Allocate mem for the result on host side
|
||||
|
||||
@@ -79,6 +79,7 @@ static void runTest(hipTextureAddressMode addressMode,
|
||||
|
||||
hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
|
||||
texBufOut, texObj);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(float),
|
||||
|
||||
@@ -85,6 +85,7 @@ TEMPLATE_TEST_CASE("Unit_hipTexObjPitch_texture2D", "", float, int,
|
||||
|
||||
hipLaunchKernelGGL(texture2dCopyKernel, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0,
|
||||
texObj, devPtrB);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipMemcpy2D(B, SIZE_W*sizeof(TestType), devPtrB,
|
||||
SIZE_W*sizeof(TestType), SIZE_W*sizeof(TestType),
|
||||
|
||||
@@ -97,6 +97,7 @@ static void runMipMapTest(unsigned int width, unsigned int height, unsigned int
|
||||
|
||||
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, width,
|
||||
(2 * mipmap_level));
|
||||
HIP_CHECK(hipGetLastError());
|
||||
hipDeviceSynchronize();
|
||||
|
||||
float* hOutputData = reinterpret_cast<float*>(malloc(size));
|
||||
|
||||
@@ -73,6 +73,7 @@ static void runTest(const int width, const float offsetX) {
|
||||
|
||||
hipLaunchKernelGGL(tex1DKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
|
||||
textureObject, width, offsetX);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -113,9 +113,11 @@ static void runTest(const int width, const float offsetX = 0) {
|
||||
if (resType == hipResourceTypeArray) {
|
||||
hipLaunchKernelGGL(tex1DRGBAKernel<normalizedCoords>, dimGrid, dimBlock,
|
||||
0, 0, dData, textureObject, width, offsetX);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
} else {
|
||||
hipLaunchKernelGGL(tex1DRGBAKernelFetch, dimGrid, dimBlock,
|
||||
0, 0, dData, textureObject, offsetX);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
@@ -69,6 +69,7 @@ TEST_CASE("Unit_hipCreateTextureObject_tex1DfetchVerification") {
|
||||
|
||||
hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
|
||||
texBufOut, texObj);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(float),
|
||||
|
||||
@@ -85,6 +85,7 @@ TEST_CASE("Unit_hipTextureObj2D_Check") {
|
||||
|
||||
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock),
|
||||
0, 0, dData, textureObject, width);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -81,6 +81,7 @@ static void runTest(const int width, const int height, const float offsetX, cons
|
||||
|
||||
hipLaunchKernelGGL(tex2DKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
|
||||
textureObject, width, height, offsetX, offsetY);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -86,6 +86,7 @@ static void runTest(const int width, const int height, const float offsetX, cons
|
||||
|
||||
hipLaunchKernelGGL(tex2DRGBAKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
|
||||
textureObject, width, height, offsetX, offsetY);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -113,6 +113,7 @@ static void runTest(const int width, const int height, const int depth, const fl
|
||||
|
||||
hipLaunchKernelGGL(tex3DKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
|
||||
textureObject, width, height, depth, offsetX, offsetY, offsetZ);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -178,6 +178,7 @@ bool runTest() {
|
||||
|
||||
hipLaunchKernelGGL(tex1dKernelFetch<T>, dimGrid, dimBlock, 0, 0, texBufOut,
|
||||
texObj, N);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(T), hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -69,6 +69,7 @@ TEST_CASE("Unit_hipTextureRef2D_Check") {
|
||||
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
|
||||
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
|
||||
dData, width);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
HIP_CHECK(hipDeviceSynchronize());
|
||||
|
||||
float* hOutputData = reinterpret_cast<float*>(malloc(size));
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle