Catch2 Test Fixes (#2961)

* test fixes

* address PR comment

* PR comment fixing Nvidia pass

* add additional kernel launch checks

* pr comments
Bu işleme şunda yer alıyor:
Paulius Velesko
2022-10-01 01:07:25 +03:00
işlemeyi yapan: GitHub
ebeveyn 609edf1c9e
işleme da453211ed
94 değiştirilmiş dosya ile 177 ekleme ve 34 silme
+1
Dosyayı Görüntüle
@@ -29,6 +29,7 @@ TEMPLATE_TEST_CASE("ABM_AddKernel_MultiTypeMultiSize", "", int, long, float, lon
REQUIRE(res == hipSuccess);
hipLaunchKernelGGL(add<TestType>, 1, size, 0, 0, d_a, d_b, d_c, size);
HIP_CHECK(hipGetLastError());
res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost);
REQUIRE(res == hipSuccess);
+1
Dosyayı Görüntüle
@@ -53,6 +53,7 @@ TEST_CASE("Unit_hipManagedKeyword_SingleGpu") {
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast<const float*>(A),
static_cast<float*>(B));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
float maxError = 0.0f;
+2 -2
Dosyayı Görüntüle
@@ -26,7 +26,7 @@ THE SOFTWARE.
#include <fstream>
#include <regex>
#include <type_traits>
#define TOL 0.001
#define guarantee(cond, str) \
{ \
if (!(cond)) { \
@@ -45,7 +45,7 @@ size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectM
size_t mismatchesToPrint = 10;
for (size_t i = 0; i < N; i++) {
T expected = F(A[i], B[i]);
if (Out[i] != expected) {
if (std::fabs(Out[i] - expected) > TOL) {
if (mismatchCount == 0) {
firstMismatch = i;
}
+1
Dosyayı Görüntüle
@@ -270,6 +270,7 @@ void launchKernel(K kernel, Dim numBlocks, Dim numThreads, std::uint32_t memPerB
launchRTCKernel<Typenames...>(kernel, numBlocks, numThreads, memPerBlock, stream,
std::forward<Args>(packedArgs)...);
#endif
HIP_CHECK(hipGetLastError());
}
//---
+1
Dosyayı Görüntüle
@@ -221,6 +221,7 @@ void runMultiProcKernel(ipcEventInfo_t *shmEventInfo, int index) {
const dim3 blocks(BUF_SIZE / threads.x, 1);
hipLaunchKernelGGL(computeKernel, dim3(blocks), dim3(threads), 0, 0,
d_ptr + index *BUF_SIZE, d_ptr, index + 1);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipEventRecord(event));
// Barrier 2 : Signals that event is recorded
+1 -1
Dosyayı Görüntüle
@@ -120,7 +120,7 @@ static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, 0, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
+6
Dosyayı Görüntüle
@@ -276,12 +276,15 @@ bool test_printf_multistream(uint32_t num_blocks,
hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, stream[i], Ad, Bd, iterCount);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1),
dim3(1, threads_per_block, 1),
0, stream[i], Ad, Bd, iterCount);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks),
dim3(1, 1, threads_per_block),
0, stream[i], Ad, Bd, iterCount);
HIP_CHECK(hipGetLastError());
}
HIP_CHECK(hipDeviceSynchronize());
for (int i = 0; i < NUM_STREAM; i++) {
@@ -368,12 +371,15 @@ bool test_printf_multigpu(int gpu,
hipLaunchKernelGGL(kernel_complex_opX, dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, 0, Ad, Bd, iterCount);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(kernel_complex_opY, dim3(1, num_blocks, 1),
dim3(1, threads_per_block, 1),
0, 0, Ad, Bd, iterCount);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(kernel_complex_opZ, dim3(1, 1, num_blocks),
dim3(1, 1, threads_per_block),
0, 0, Ad, Bd, iterCount);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
std::ifstream CapturedData = captured.getCapturedData();
char *buffer = new char[CHUNK_SIZE];
+5
Dosyayı Görüntüle
@@ -246,6 +246,7 @@ bool test_printf_conststr(uint32_t num_blocks, uint32_t threads_per_block,
hipLaunchKernelGGL(kernel_printf_conststr, dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, 0, iterCount);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(0));
std::ifstream CapturedData = captured.getCapturedData();
char *buffer = new char[CHUNK_SIZE];
@@ -308,6 +309,7 @@ bool test_printf_two_conditionalstr(uint32_t num_blocks,
dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, 0, iterCount);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(0));
std::ifstream CapturedData = captured.getCapturedData();
char *buffer = new char[CHUNK_SIZE];
@@ -370,6 +372,7 @@ bool test_printf_single_conditionalstr(uint32_t num_blocks,
dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, 0, iterCount);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(0));
std::ifstream CapturedData = captured.getCapturedData();
char *buffer = new char[CHUNK_SIZE];
@@ -427,6 +430,7 @@ bool test_variable_str(uint32_t print_limit,
hipLaunchKernelGGL(func, dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, 0, iterCount, Ad);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(0));
HIP_CHECK(hipMemcpy(Ah, Ad, buffsize*sizeof(int32_t),
hipMemcpyDeviceToHost));
@@ -483,6 +487,7 @@ bool test_decimal_str(uint32_t num_blocks, uint32_t threads_per_block,
hipLaunchKernelGGL(kernel_decimal_calculation, dim3(num_blocks, 1, 1),
dim3(threads_per_block, 1, 1),
0, 0, iterCount, maxPrecision);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(0));
std::ifstream CapturedData = captured.getCapturedData();
char *buffer = new char[CHUNK_SIZE];
+2
Dosyayı Görüntüle
@@ -157,6 +157,7 @@ static void test_group_partition(unsigned int tileSz) {
// Launch Kernel
hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0, dResult, tileSz, i);
HIP_CHECK(hipGetLastError());
err = hipDeviceSynchronize();
if (err != hipSuccess) {
fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err));
@@ -217,6 +218,7 @@ static void test_shfl_down() {
// Launch Kernel
hipLaunchKernelGGL(kernel_shfl_down, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i);
HIP_CHECK(hipGetLastError());
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
err = hipDeviceSynchronize();
if (err != hipSuccess) {
+2
Dosyayı Görüntüle
@@ -138,6 +138,7 @@ static void test_group_partition(unsigned tileSz) {
// Launch Kernel
hipLaunchKernelGGL(kernel_cg_group_partition, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0, dPtr, tileSz, i);
HIP_CHECK(hipGetLastError());
hipMemcpy(hPtr, dPtr, arrSize, hipMemcpyDeviceToHost);
err = hipDeviceSynchronize();
if (err != hipSuccess) {
@@ -207,6 +208,7 @@ static void test_shfl_up() {
hipLaunchKernelGGL(kernel_shfl_up, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0, dPtr, dResults, lane_delta, i);
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
HIP_CHECK(hipGetLastError());
err = hipDeviceSynchronize();
if (err != hipSuccess) {
fprintf(stderr, "Failed to launch kernel (error code %s)!\n", hipGetErrorString(err));
+5
Dosyayı Görüntüle
@@ -244,6 +244,7 @@ static void test_active_threads_grouping() {
// Launch Kernel
hipLaunchKernelGGL(kernel_coalesced_active_groups, blockSize, threadsPerBlock, 0, 0);
HIP_CHECK(hipGetLastError());
err = hipDeviceSynchronize();
if (err != hipSuccess) {
@@ -308,6 +309,7 @@ static void test_group_partition(unsigned int tileSz, bool useGlobalMem) {
if (useGlobalMem) {
hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock, 0, 0, tileSz,
dResult, useGlobalMem, globalMem, i);
HIP_CHECK(hipGetLastError());
err = hipDeviceSynchronize();
if (err != hipSuccess) {
@@ -316,6 +318,7 @@ static void test_group_partition(unsigned int tileSz, bool useGlobalMem) {
} else {
hipLaunchKernelGGL(kernel_cg_coalesced_group_partition, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0, tileSz, dResult, useGlobalMem, globalMem, i);
HIP_CHECK(hipGetLastError());
err = hipDeviceSynchronize();
if (err != hipSuccess) {
@@ -390,6 +393,7 @@ static void test_shfl_any_to_any() {
// Launch Kernel
hipLaunchKernelGGL(kernel_shfl_any_to_any, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0 , dPtr, dsrcArr, dResults, i);
HIP_CHECK(hipGetLastError());
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
err = hipDeviceSynchronize();
if (err != hipSuccess) {
@@ -459,6 +463,7 @@ static void test_shfl_broadcast() {
// Launch Kernel
hipLaunchKernelGGL(kernel_shfl, blockSize, threadsPerBlock,
threadsPerBlock * sizeof(int), 0, dPtr, dResults, srcLane, i);
HIP_CHECK(hipGetLastError());
hipMemcpy(hPtr, dResults, group_size_in_bytes, hipMemcpyDeviceToHost);
err = hipDeviceSynchronize();
if (err != hipSuccess) {
+5 -3
Dosyayı Görüntüle
@@ -27,6 +27,7 @@ THE SOFTWARE.
#define _SIZE sizeof(int) * 1024 * 1024
#define NUM_STREAMS 2
#define NUM_ITERS 1 << 30
static __global__ void Iter(int* Ad, int num) {
int tx = threadIdx.x + blockIdx.x * blockDim.x;
@@ -57,7 +58,8 @@ TEST_CASE("Unit_hipDeviceSynchronize_Functional") {
}
for (int i = 0; i < NUM_STREAMS; i++) {
hipLaunchKernelGGL(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0,
stream[i], Ad[i], 1 << 30);
stream[i], Ad[i], NUM_ITERS);
HIP_CHECK(hipGetLastError());
}
for (int i = 0; i < NUM_STREAMS; i++) {
HIP_CHECK(hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost,
@@ -71,7 +73,7 @@ TEST_CASE("Unit_hipDeviceSynchronize_Functional") {
// Conservative implementations which synchronize the hipMemcpyAsync will
// fail, ie if HIP_LAUNCH_BLOCKING=true.
CHECK(1 << 30 != A[NUM_STREAMS - 1][0] - 1);
CHECK(NUM_ITERS != A[NUM_STREAMS - 1][0] - 1);
HIP_CHECK(hipDeviceSynchronize());
CHECK(1 << 30 == A[NUM_STREAMS - 1][0] - 1);
CHECK(NUM_ITERS == A[NUM_STREAMS - 1][0] - 1);
}
+1
Dosyayı Görüntüle
@@ -163,6 +163,7 @@ TEST_CASE("Unit_hipGetDeviceProperties_ArchPropertiesTst") {
hipMemcpyHostToDevice));
hipLaunchKernelGGL(mykernel, dim3(1), dim3(1),
0, 0, archProp_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(archProp_h, archProp_d,
NUM_OF_ARCHPROP*sizeof(int), hipMemcpyDeviceToHost));
// Validate the host architecture property with device
+1
Dosyayı Görüntüle
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithnounsafeflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
testResult = HipTest::assemblyFile_Verification<TestType>(
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_Coherentwithoutflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
testResult = HipTest::assemblyFile_Verification<TestType>(
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_CoherentwithUnsafeflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
+1
Dosyayı Görüntüle
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithnounsafeflag", "",
dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+1
Dosyayı Görüntüle
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithoutflag", "",
dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_AtomicAdd_NonCoherentwithUnsafeflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+2
Dosyayı Görüntüle
@@ -76,6 +76,7 @@ TEST_CASE("Unit_BuiltInAtomicAdd_CoherentGlobalMem") {
hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1),
0, 0, A_d,
result);
HIP_CHECK(hipGetLastError());
std::cout << "test 1" << std::endl;
HIP_CHECK(hipDeviceSynchronize());
REQUIRE(A_h[0] == INITIAL_VAL);
@@ -118,6 +119,7 @@ TEST_CASE("Unit_BuiltInAtomicAdd_NonCoherentGlobalMem") {
hipLaunchKernelGGL(AtomicAdd_GlobalMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d),
static_cast<double* >(result));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+3
Dosyayı Görüntüle
@@ -112,6 +112,7 @@ TEST_CASE("Unit_BuiltinAtomics_fmaxCoherentGlobalMem") {
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&result), sizeof(double)));
hipLaunchKernelGGL(unsafeAtomicMax_GlobalMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d), result);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(*B_h == 0);
@@ -159,9 +160,11 @@ TEST_CASE("Unit_BuiltinAtomics_fmaxNonCoherentGlobalFlatMem") {
if (mem_type) {
hipLaunchKernelGGL(unsafeAtomicMax_GlobalMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d), result);
HIP_CHECK(hipGetLastError());
} else {
hipLaunchKernelGGL(unsafeAtomicMax_FlatMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d), result);
HIP_CHECK(hipGetLastError());
}
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
+3
Dosyayı Görüntüle
@@ -112,6 +112,7 @@ TEST_CASE("Unit_BuiltinAtomics_fminCoherentGlobalMem") {
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&result), sizeof(double)));
hipLaunchKernelGGL(unsafeAtomicMin_GlobalMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d), result);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
REQUIRE(*B_h == 0);
@@ -159,9 +160,11 @@ TEST_CASE("Unit_BuiltinAtomics_fminNonCoherentGlobalFlatMem") {
if (mem_type) {
hipLaunchKernelGGL(unsafeAtomicMin_GlobalMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d), result);
HIP_CHECK(hipGetLastError());
} else {
hipLaunchKernelGGL(unsafeAtomicMin_FlatMem, dim3(1), dim3(1),
0, 0, static_cast<double* >(A_d), result);
HIP_CHECK(hipGetLastError());
}
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(B_h, result, sizeof(double), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -65,6 +65,7 @@ TEST_CASE("Unit_AnyAll_CompileTest") {
hipLaunchKernelGGL(warpvote, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0,
device_any, device_all, pshift);
HIP_CHECK(hipGetLastError());
HIP_CHECK(
hipMemcpy(host_any, device_any, Num_Warps_per_Grid * sizeof(int), hipMemcpyDeviceToHost));
HIP_CHECK(
+1
Dosyayı Görüntüle
@@ -66,6 +66,7 @@ TEST_CASE("Unit_ballot") {
hipLaunchKernelGGL(gpu_ballot, dim3(Num_Blocks_per_Grid), dim3(Num_Threads_per_Block), 0, 0,
device_ballot, Num_Warps_per_Block, pshift);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(host_ballot, device_ballot, Num_Warps_per_Grid * sizeof(unsigned int),
hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -146,6 +146,7 @@ TEST_CASE("Unit_bitExtract") {
hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, deviceOut32,
deviceSrc032, deviceSrc132, deviceSrc232, deviceOut64, deviceSrc064,
deviceSrc164, deviceSrc264);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -158,6 +158,7 @@ TEST_CASE("Unit_bitInsert") {
hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0, deviceOut32,
deviceSrc032, deviceSrc132, deviceSrc232, deviceSrc332, deviceOut64,
deviceSrc064, deviceSrc164, deviceSrc264, deviceSrc364);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostOut32, deviceOut32, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -115,6 +115,7 @@ TEST_CASE("Unit_brev") {
hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
deviceC, deviceD, WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -136,6 +136,7 @@ TEST_CASE("Unit_clz") {
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
deviceC, deviceD, WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -110,6 +110,7 @@ TEST_CASE("Unit_ffs") {
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
deviceC, deviceD, WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -56,6 +56,7 @@ TEST_CASE("Unit_deviceFunctions_CompileTest") {
res = hipMalloc((void**)&Outd, SIZE);
REQUIRE(res == hipSuccess);
hipLaunchKernelGGL(floatMath, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, Ind, Outd);
HIP_CHECK(hipGetLastError());
res = hipDeviceSynchronize();
REQUIRE(res == hipSuccess);
res = hipGetLastError();
+1
Dosyayı Görüntüle
@@ -129,6 +129,7 @@ template <typename T, typename D> void testType(int msize) {
auto kernel = testOperationsGPU<T, D>;
hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost));
+1 -1
Dosyayı Görüntüle
@@ -145,7 +145,7 @@ TEST_CASE("Unit_funnelshift") {
hipLaunchKernelGGL(funnelshift_kernel, dim3(1), dim3(1), 0, 0, device_l_output, device_lc_output,
device_r_output, device_rc_output);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(host_l_output, device_l_output, NUM_TESTS * sizeof(unsigned int),
hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(host_lc_output, device_lc_output, NUM_TESTS * sizeof(unsigned int),
+7
Dosyayı Görüntüle
@@ -69,6 +69,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipStreamSynchronize(stream));
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost,
stream));
@@ -83,6 +84,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
SECTION("Calling hipMemcpyTo/FromSymbol - validate value in host memory") {
HIP_CHECK(hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice));
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost));
@@ -99,6 +101,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream));
HIP_CHECK(hipStreamSynchronize(stream));
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
HIP_CHECK(
hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream));
@@ -116,6 +119,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost,
hipStreamPerThread));
@@ -139,6 +143,7 @@ TEST_CASE("Unit_hipMemcpyToSymbolAsync_ToNFrom") {
HIP_CHECK(hipGetSymbolAddress((void**)&symbolAddress, HIP_SYMBOL(globalConst)));
HIP_CHECK(hipMalloc((void**)&checkOkD, sizeof(bool)));
hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost));
HIP_CHECK(hipFree(checkOkD));
HIP_ASSERT(checkOk);
@@ -171,6 +176,7 @@ TEST_CASE("Unit_hipGetSymbolAddressAndSize_Validation") {
HIP_CHECK(hipMalloc(&checkOkD, sizeof(bool)));
hipLaunchKernelGGL(checkGlobalConstAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0,
symbolArrAddress, checkOkD);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost));
HIP_CHECK(hipFree(checkOkD));
HIP_ASSERT(checkOk);
@@ -184,6 +190,7 @@ TEST_CASE("Unit_hipGetSymbolAddressAndSize_Validation") {
HIP_CHECK(hipMalloc(&checkOkD, sizeof(bool)));
hipLaunchKernelGGL(checkStaticConstVarAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0,
symbolVarAddress, checkOkD);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost));
HIP_CHECK(hipFree(checkOkD));
HIP_ASSERT(checkOk);
+3 -2
Dosyayı Görüntüle
@@ -100,6 +100,7 @@ template <typename T, typename U> int dataTypesRun() {
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA,
static_cast<const T*>(deviceB), WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost));
@@ -148,7 +149,7 @@ template <typename T, typename U> int dataTypesRun2() {
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA,
static_cast<const T*>(deviceB), WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost));
@@ -198,7 +199,7 @@ template <typename T, typename U> int dataTypesRun4() {
dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA,
static_cast<const T*>(deviceB), WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(T), hipMemcpyDeviceToHost));
+1 -1
Dosyayı Görüntüle
@@ -63,7 +63,7 @@ TEST_CASE("Unit_mbcnt") {
hipLaunchKernelGGL(HIP_kernel, dim3(num_blocks), dim3(num_threads_per_block), 0, 0,
device_mbcnt_lo, device_mbcnt_hi, device_lane_id);
HIP_CHECK(hipGetLastError());
unsigned int* host_mbcnt_lo = (unsigned int*)malloc(buffer_size);
unsigned int* host_mbcnt_hi = (unsigned int*)malloc(buffer_size);
unsigned int* host_lane_id = (unsigned int*)malloc(buffer_size);
+1 -1
Dosyayı Görüntüle
@@ -100,7 +100,7 @@ TEST_CASE("Unit_popc") {
hipLaunchKernelGGL(HIP_kernel, dim3(WIDTH / THREADS_PER_BLOCK_X, HEIGHT / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, deviceA, deviceB,
deviceC, deviceD, WIDTH, HEIGHT);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(hostA, deviceA, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(hostC, deviceC, NUM * sizeof(unsigned int), hipMemcpyDeviceToHost));
+1 -1
Dosyayı Görüntüle
@@ -88,7 +88,7 @@ static void test_syncthreads_and(int blockSize) {
// Launch Kernel
hipLaunchKernelGGL(kernel_syncthreads_and, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD,
allThreadsOneD, oneThreadZeroD, allThreadsMinusOneD);
HIP_CHECK(hipGetLastError());
// Copy result from device to host
HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost));
+1 -1
Dosyayı Görüntüle
@@ -95,7 +95,7 @@ void test_syncthreads_count(int blockSize) {
// Launch Kernel
hipLaunchKernelGGL(kernel_syncthreads_count, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD,
allThreadsOneD, oddThreadsOneD, allThreadsMinusOneD, allThreadsIdD);
HIP_CHECK(hipGetLastError());
// Copy result from device to host
HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost));
+1 -1
Dosyayı Görüntüle
@@ -88,7 +88,7 @@ static void test_syncthreads_or(int blockSize) {
// Launch Kernel
hipLaunchKernelGGL(kernel_syncthreads_or, 2, blockSize, 0, 0, syncTestD, allThreadsZeroD,
allThreadsOneD, oneThreadOneD, allThreadsMinusOneD);
HIP_CHECK(hipGetLastError());
// Copy result from device to host
HIP_CHECK(hipMemcpy(syncTestH, syncTestD, nBytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipMemcpy(allThreadsZeroH, allThreadsZeroD, nBytes, hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -92,6 +92,7 @@ TEST_CASE("Unit_threadfence_system") {
HIP_CHECK(hipSetDevice(next_id - 1));
hipLaunchKernelGGL(gpu_round_robin, dim_grid, dim_block, 0, 0x0, next_id, num_dev, num_iter,
data, flag);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
}));
}
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithnoUnsafeflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Coherentwithoutflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
+1
Dosyayı Görüntüle
@@ -73,6 +73,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_CoherentwithUnsafeflag", "",
hipLaunchKernelGGL(AtomicCheck<TestType>, dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentnounsafeatomicsflag", "",
dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithoutflag", "",
dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+1
Dosyayı Görüntüle
@@ -72,6 +72,7 @@ TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_NonCoherentwithunsafeatomicsflag", "",
dim3(1), dim3(1),
0, 0, A_d,
result_d);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
bool testResult;
REQUIRE(A_h[0] == INITIAL_VAL + INC_VAL);
+2 -1
Dosyayı Görüntüle
@@ -224,6 +224,7 @@ template <typename V> bool run_CheckSharedVectorType() {
if (hipMalloc(&ptr, sizeof(bool)) != HIP_SUCCESS) return false;
unique_ptr<bool, decltype(hipFree)*> correct{ptr, hipFree};
hipLaunchKernelGGL((CheckSharedVectorType<V>), dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get());
HIP_CHECK(hipGetLastError());
bool passed = true;
if (hipMemcpyDtoH(&passed, correct.get(), sizeof(bool)) != HIP_SUCCESS) {
return false;
@@ -252,7 +253,7 @@ TEST_CASE("Unit_vectorTypes_CompileTest") {
unique_ptr<bool, decltype(hipFree)*> correct{ptr, hipFree};
hipLaunchKernelGGL(CheckVectorTypes, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, correct.get());
HIP_CHECK(hipGetLastError());
bool passed = true;
res = hipMemcpyDtoH(&passed, correct.get(), sizeof(bool));
REQUIRE(res == hipSuccess);
+3 -2
Dosyayı Görüntüle
@@ -80,6 +80,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_
HIP_CHECK(hipEventRecord(start, stream));
hipLaunchKernelGGL(HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, stream,
static_cast<const int*>(C_d), C_h, numElements, count);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipEventRecord(stop, stream));
if (waitStart) {
@@ -111,7 +112,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_
REQUIRE(false);
}
if (e == hipSuccess) assert(t == 0.0f);
if (e == hipSuccess) HIP_ASSERT(t == 0.0f);
// stop usually ready unless we skipped the synchronization (syncNone)
e = hipEventElapsedTime(&t, stop, stop);
@@ -137,7 +138,7 @@ void test(unsigned testMask, int* C_d, int* C_h, int64_t numElements, hipStream_
HIP_ASSERT(hipEventElapsedTime(&t, start, neverCreated) == hipErrorInvalidHandle);
HIP_ASSERT(hipEventElapsedTime(&t, neverRecorded, stop) == hipErrorInvalidHandle);
HIP_ASSERT(hipEventElapsedTime(&t, start, neverRecorded) == hipErrorInvalidHandle);
HIP_ASSERT(hipGetLastError() == hipErrorInvalidHandle);
}
HIP_CHECK(hipEventDestroy(neverRecorded));
+1 -1
Dosyayı Görüntüle
@@ -65,7 +65,7 @@ TEST_CASE("Unit_hipEventIpc") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipEventRecord(stop, NULL));
HIP_CHECK(hipEventSynchronize(stop));
+1 -1
Dosyayı Görüntüle
@@ -88,7 +88,7 @@ TEST_CASE("Unit_hipEventRecord") {
HipTest::launchKernel<float>(HipTest::vectorADD<float>, blocks, 1, 0, 0,
static_cast<const float*>(A_d), static_cast<const float*>(B_d),
C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipEventRecord(stop, NULL));
HIP_CHECK(hipEventSynchronize(stop));
long long hostStop = HipTest::get_time();
+2 -1
Dosyayı Görüntüle
@@ -101,7 +101,8 @@ TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset", "", i
// Reference the registered device pointer Ad from inside the kernel:
for (int i = 0; i < num_devices; i++) {
HIP_CHECK(hipSetDevice(i));
hipLaunchKernelGGL(Inc, dim3(LEN / 512), dim3(512), 0, 0, Ad[i]);
hipLaunchKernelGGL(Inc, dim3(LEN / 32), dim3(32), 0, 0, Ad[i]);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
}
REQUIRE(A[10] == 1 + static_cast<TestType>(num_devices));
+2 -2
Dosyayı Görüntüle
@@ -111,7 +111,7 @@ static bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
@@ -219,7 +219,7 @@ static bool validateMemoryOnGpuMThread(int gpu, bool concurOnOneGPU = false) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
+1
Dosyayı Görüntüle
@@ -114,6 +114,7 @@ TEST_CASE("Unit_hipMallocManaged_Advanced") {
HIP_CHECK(hipEventRecord(event0, 0));
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const float*>(A), static_cast<const float*>(B), C, numElements);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipEventRecord(event1, 0));
HIP_CHECK(hipDeviceSynchronize());
float time = 0.0f;
+1
Dosyayı Görüntüle
@@ -536,6 +536,7 @@ TEMPLATE_TEST_CASE("Unit_hipMallocPitch_KernelLaunch", ""
hipLaunchKernelGGL(copy_var<TestType>, dim3(1), dim3(1),
0, 0, static_cast<TestType*>(A_d),
static_cast<TestType*>(B_d), ROWS, pitch_A);
HIP_CHECK(hipGetLastError());
// hipMemcpy2D Device to Host
+4 -3
Dosyayı Görüntüle
@@ -241,6 +241,7 @@ void memcpytest2(DeviceMemory<T>* dmem, HostMemory<T>* hmem,
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
static_cast<const T*>(dmem->A_d()), static_cast<const T*>(dmem->B_d()),
dmem->C_d(), numElements);
HIP_CHECK(hipGetLastError());
if (useDeviceToDevice) {
// Do an extra device-to-device copy here to mix things up:
@@ -402,7 +403,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_KernelLaunch", "", int, float,
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
static_cast<const TestType*>(A_d),
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
@@ -567,7 +568,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_PinnedRegMemWithKernelLaunch",
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, 0, static_cast<const TestType*>(A_d),
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM);
@@ -595,7 +596,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_PinnedRegMemWithKernelLaunch",
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, 0, static_cast<const TestType*>(X_d),
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM);
+1
Dosyayı Görüntüle
@@ -319,6 +319,7 @@ TEST_CASE("Unit_hipMemcpy_HalfMemCopy") {
HIP_CHECK(hipMemcpyAsync(B_h, A_d,
(NUM_ELM/2)*sizeof(float),
hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkTest(A_h, B_h, NUM_ELM/2);
}
HipTest::freeArrays<float>(A_d, B_d, C_d, A_h, B_h, C_h, false);
+5 -3
Dosyayı Görüntüle
@@ -50,6 +50,7 @@ void Thread_func(T *A_d, T *B_d, T* C_d, T* C_h, size_t Nbytes,
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0,
mystream, A_d, C_d, N_ELMTS);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream));
// The following two MemcpyAsync calls are for sole
// purpose of loading stream with multiple async calls
@@ -76,6 +77,7 @@ void Thread_func_MultiStream() {
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0,
mystream, A_d, C_d, N_ELMTS);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream));
// The following hipMemcpyAsync() is called only to
// load stream with multiple Async calls
@@ -121,7 +123,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_KernelLaunch", "", int, float,
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1), 0, 0,
static_cast<const TestType*>(A_d),
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipStreamDestroy(stream));
@@ -348,7 +350,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch",
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, 0, static_cast<const TestType*>(A_d),
static_cast<const TestType*>(B_d), C_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkVectorADD(A_h, B_h, C_h, NUM_ELM);
@@ -379,7 +381,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyAsync_PinnedRegMemWithKernelLaunch",
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, 0, static_cast<const TestType*>(X_d),
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, Z_d, Nbytes,
hipMemcpyDeviceToHost, gpu1Stream));
HIP_CHECK(hipStreamSynchronize(gpu1Stream));
+2
Dosyayı Görüntüle
@@ -66,6 +66,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "",
dim3(1), 0, 0,
static_cast<const TestType *>(A_d),
static_cast<const TestType *>(B_d), C_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD<TestType>(A_h, B_h, C_h, NUM_ELM);
@@ -80,6 +81,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoD_Basic", "",
dim3(1), 0, 0,
static_cast<const TestType*>(X_d),
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyDtoH(C_h, (hipDeviceptr_t)Z_d, Nbytes));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD<TestType>(A_h, B_h, C_h, NUM_ELM);
+2
Dosyayı Görüntüle
@@ -68,6 +68,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "",
dim3(1), 0, 0,
static_cast<const TestType *>(A_d),
static_cast<const TestType *>(B_d), C_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HIP_CHECK(hipDeviceSynchronize());
HipTest::checkVectorADD<TestType>(A_h, B_h, C_h, NUM_ELM);
@@ -84,6 +85,7 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyDtoDAsync_Basic", "",
dim3(1), 0, 0,
static_cast<const TestType*>(X_d),
static_cast<const TestType*>(Y_d), Z_d, NUM_ELM);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipDeviceSynchronize());
+2
Dosyayı Görüntüle
@@ -127,6 +127,7 @@ TEST_CASE("Unit_hipMemcpyPeer_Basic") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
0, 0, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, numElements*sizeof(int));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int),
hipMemcpyDeviceToHost));
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
@@ -138,6 +139,7 @@ TEST_CASE("Unit_hipMemcpyPeer_Basic") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
0, 0, static_cast<const int*>(X_d),
static_cast<const int*>(Y_d), Z_d, numElements*sizeof(int));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int),
hipMemcpyDeviceToHost));
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
+4
Dosyayı Görüntüle
@@ -143,6 +143,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
0, 0, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, numElements*sizeof(int));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int),
hipMemcpyDeviceToHost));
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
@@ -166,6 +167,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Basic") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
0, 0, static_cast<const int*>(X_d),
static_cast<const int*>(Y_d), Z_d, numElements*sizeof(int));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int),
hipMemcpyDeviceToHost));
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
@@ -227,6 +229,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_StreamOnDiffDevice") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
0, 0, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, numElements*sizeof(int));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, C_d, numElements*sizeof(int),
hipMemcpyDeviceToHost));
HipTest::checkVectorADD<int>(A_h, B_h, C_h, numElements);
@@ -240,6 +243,7 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_StreamOnDiffDevice") {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(1), dim3(1),
0, 0, static_cast<const int*>(X_d),
static_cast<const int*>(Y_d), Z_d, numElements*sizeof(int));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy(C_h, Z_d, numElements*sizeof(int),
hipMemcpyDeviceToHost));
+8
Dosyayı Görüntüle
@@ -99,6 +99,7 @@ void TestwithOnestream(void) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
HipTest::checkVectorADD(A_h, B_h, C_h, N);
@@ -136,6 +137,7 @@ void TestwithTwoStream(void) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NUM_STREAMS; ++i) {
@@ -191,6 +193,7 @@ void TestDtoDonSameDevice(void) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NUM_STREAMS; ++i) {
@@ -262,6 +265,7 @@ void TestOnMultiGPUwithOneStream(void) {
dim3(threadsPerBlock), 0, stream[i],
static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NumDevices; ++i) {
@@ -298,6 +302,7 @@ void TestkindDtoH(void) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpyWithStream(C_h, C_d, Nbytes,
hipMemcpyDeviceToHost, stream));
@@ -365,6 +370,7 @@ void TestkindDtoD(void) {
dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NumDevices; ++i) {
@@ -411,6 +417,7 @@ void TestkindDefault(void) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream));
HipTest::checkVectorADD(A_h, B_h, C_h, N);
@@ -472,6 +479,7 @@ void TestkindDefaultForDtoD(void) {
dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NumDevices; ++i) {
+8
Dosyayı Görüntüle
@@ -167,6 +167,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestwithOnestream(bool &val_res) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipStreamSynchronize(stream));
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
val_res = ValidateResult(A_h, B_h, C_h);
@@ -203,6 +204,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestwithTwoStream(bool &val_res) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NoofStreams; ++i) {
@@ -258,6 +260,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestDtoDonSameDevice(bool &val_res) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < NoofStreams; ++i) {
@@ -329,6 +332,7 @@ void HipMemcpyWithStreamMultiThreadtests::
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < numDevices; ++i) {
@@ -363,6 +367,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDtoH(bool &val_res) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipStreamSynchronize(stream));
HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes,
hipMemcpyDeviceToHost, stream));
@@ -434,6 +439,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDtoD(bool &val_res) {
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < numDevices; ++i) {
@@ -481,6 +487,7 @@ void HipMemcpyWithStreamMultiThreadtests::
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream, static_cast<const int*>(A_d),
static_cast<const int*>(B_d), C_d, N);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipStreamSynchronize(stream));
HIPCHECK(hipMemcpyWithStream(C_h, C_d, Nbytes, hipMemcpyDefault, stream));
val_res = ValidateResult(A_h, B_h, C_h);
@@ -544,6 +551,7 @@ void HipMemcpyWithStreamMultiThreadtests::
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock),
0, stream[i], static_cast<const int*>(A_d[i]),
static_cast<const int*>(B_d[i]), C_d[i], N);
HIP_CHECK(hipGetLastError());
}
for (int i=0; i < numDevices; ++i) {
+1
Dosyayı Görüntüle
@@ -53,6 +53,7 @@ TEST_CASE("Unit_hipHostMalloc_CoherentAccess") {
std::cout << clkRate << std::endl;
hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks),
0, 0, hostRes, clkRate);
HIP_CHECK(hipGetLastError());
int eleCounter = 0;
while (eleCounter < blocks) {
// blocks until the value changes
+2 -1
Dosyayı Görüntüle
@@ -31,7 +31,7 @@
/* Defines */
#define NUM_THREADS 1000
#define ITER 100
#define ITER 10
#define NUM_H 256
#define NUM_W 256
@@ -105,6 +105,7 @@ TEST_CASE("Unit_hipMemset2DAsync_WithKernel") {
for (size_t k = 0; k < ITER; k++) {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, hipStreamPerThread, B_d, C_d, elements);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipMemset2DAsync(C_d, pitch_C, memsetval, NUM_W, NUM_H,
hipStreamPerThread));
+1 -1
Dosyayı Görüntüle
@@ -369,7 +369,7 @@ static void seekAndSet3DArrayPortion(bool bAsync) {
myparms.srcPos = make_hipPos(0, 0, 0);
myparms.dstPos = make_hipPos(0, 0, 0);
myparms.dstPtr = make_hipPitchedPtr(array3D, sizeof(char) * arr_dimensions.x,
arr_dimensions.x, arr_dimensions.y);
arr_dimensions.y, arr_dimensions.z);
myparms.srcPtr = devicePitchedPointer;
myparms.extent = extent;
#if HT_NVIDIA
+1
Dosyayı Görüntüle
@@ -84,6 +84,7 @@ static void threadFunc(hipStream_t stream, hipPitchedPtr devpPtr,
hipLaunchKernelGGL(func_set_value, dim3(blocks), dim3(threadsPerBlock), 0,
stream, devpPtr, extent, memsetval);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipMemset3DAsync(devpPtr, testval, extent, stream));
HIPCHECK(hipMemcpy3DAsync(&myparms, stream));
}
+4
Dosyayı Görüntüle
@@ -92,6 +92,7 @@ static bool testhipMemsetAsyncWithKernel(bool UseStrmPerThrd) {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, hipStreamPerThread, obj.B_d,
obj.C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
HIP_CHECK(hipMemsetAsync(obj.C_d , obj.memSetVal, N, hipStreamPerThread));
HIP_CHECK(hipStreamSynchronize(hipStreamPerThread));
@@ -117,6 +118,7 @@ static bool testhipMemsetD32AsyncWithKernel() {
for (int k = 0 ; k < ITER ; k++) {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemsetD32Async((hipDeviceptr_t)obj.C_d , obj.memSetVal,
N, obj.stream));
HIP_CHECK(hipStreamSynchronize(obj.stream));
@@ -135,6 +137,7 @@ static bool testhipMemsetD16AsyncWithKernel() {
for (int k = 0 ; k < ITER ; k++) {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemsetD16Async((hipDeviceptr_t)obj.C_d , obj.memSetVal,
N, obj.stream));
HIP_CHECK(hipStreamSynchronize(obj.stream));
@@ -153,6 +156,7 @@ static bool testhipMemsetD8AsyncWithKernel() {
for (int k = 0; k < ITER; k++) {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, obj.stream, obj.B_d, obj.C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemsetD8Async((hipDeviceptr_t)obj.C_d, obj.memSetVal,
N, obj.stream));
HIP_CHECK(hipStreamSynchronize(obj.stream));
+1
Dosyayı Görüntüle
@@ -138,6 +138,7 @@ TEST_CASE("Unit_hipPointerGetAttribute_KernelUpdation") {
reinterpret_cast<hipDeviceptr_t>(A_d)));
hipLaunchKernelGGL(var_update, dim3(1), dim3(1), 0, 0,
reinterpret_cast<int *>(data));
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
for (unsigned int i = 0; i < N; i++) {
+1
Dosyayı Görüntüle
@@ -74,6 +74,7 @@ void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream) {
hipLaunchKernelGGL(HipTest::vectorADDReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0,
static_cast<const T*>(A_d), static_cast<const T*>(B_d), C_d, numElements);
HIP_CHECK(hipGetLastError());
MemTraits<C>::Copy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, stream);
+3
Dosyayı Görüntüle
@@ -56,6 +56,7 @@ void run1(size_t size, hipStream_t stream) {
HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream));
HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream));
hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream, Cd);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream));
HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream));
HIPCHECK(hipDeviceSynchronize());
@@ -97,7 +98,9 @@ void run(size_t size, hipStream_t stream1, hipStream_t stream2) {
HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1));
HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2));
hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream1, Cd);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(HIP_KERNEL_NAME(Inc), dim3(N / 500), dim3(500), 0, stream2, Cdd);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1));
HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2));
HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1));
+2
Dosyayı Görüntüle
@@ -59,8 +59,10 @@ TEST_CASE("Unit_hipStreamCreate_MultistreamBasicFunctionalities") {
hipLaunchKernelGGL(HIP_KERNEL_NAME(hipAPIStreamDisableTest::kernel),
dim3(1), dim3(1), 0, streams[i], data[i], xd,
hipAPIStreamDisableTest::NN);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(HIP_KERNEL_NAME(hipAPIStreamDisableTest::nKernel),
dim3(1), dim3(1), 0, 0, yd);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipStreamDestroy(streams[i]));
}
HIP_CHECK(hipMemcpy(&x, xd, sizeof(float), hipMemcpyDeviceToHost));
+4
Dosyayı Görüntüle
@@ -49,7 +49,9 @@ TEST_CASE("Unit_hipMultiStream_sameDevice") {
HIP_CHECK(hipStreamCreate(&streams[i]));
HIP_CHECK(hipMalloc(&data[i], NN * sizeof(float)));
hipLaunchKernelGGL(kernel, dim3(1), dim3(1), 0, streams[i], data[i], xd, NN);
HIP_CHECK(hipGetLastError());
hipLaunchKernelGGL(HIP_KERNEL_NAME(nKernel), dim3(1), dim3(1), 0, 0, yd);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipFree(data[i]));
HIP_CHECK(hipStreamDestroy(streams[i]));
}
@@ -80,9 +82,11 @@ TEST_CASE("Unit_hipMultiStream_multimeDevice") {
HIP_CHECK(hipDeviceSynchronize());
// Launch kernel with default stream
hipLaunchKernelGGL(kernel_do_nothing, dim3(1), dim3(1), 0, 0, 1);
HIP_CHECK(hipGetLastError());
// Launch kernel on all streams
for (int i = 0; i < nStreams; i++) {
hipLaunchKernelGGL(kernel_do_nothing, dim3(1), dim3(1), 0, streams[i], 1);
HIP_CHECK(hipGetLastError());
}
// Sync stream 1
HIP_CHECK(hipStreamSynchronize(streams[0]));
+1 -1
Dosyayı Görüntüle
@@ -131,7 +131,7 @@ TEST_CASE("Unit_hipStreamAddCallback_MultipleThreads") {
hipLaunchKernelGGL((device_function), dim3(blocks),
dim3(threadsPerBlock), 0,
mystream, C_d, A_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(
hipMemcpyAsync(C1_h, C_d, Nbytes,
hipMemcpyDeviceToHost, mystream));
+2 -2
Dosyayı Görüntüle
@@ -86,7 +86,7 @@ bool testStreamCallbackFunctionality(bool isDefault) {
const unsigned threadsPerBlock = 256;
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0, 0, A_d, C_d, NSize);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
0));
HIP_CHECK(hipStreamAddCallback(0, Callback, nullptr, 0));
@@ -102,7 +102,7 @@ bool testStreamCallbackFunctionality(bool isDefault) {
const unsigned threadsPerBlock = 256;
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0, mystream, A_d, C_d, NSize);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
mystream));
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
+3
Dosyayı Görüntüle
@@ -146,6 +146,7 @@ void funcTestsForAllPriorityLevelsWrtNullStrm(unsigned int flags,
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
dim3(BLOCKSIZE), 0, stream[idx], A_d[idx],
C_d[idx], MEMCPYSIZE2);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
hipMemcpyDeviceToHost, stream[idx]));
}
@@ -227,6 +228,7 @@ void queueTasksInStreams(std::vector<hipStream_t> *stream,
hipLaunchKernelGGL((HipTest::vector_square), dim3(GRIDSIZE),
dim3(BLOCKSIZE), 0, (*stream)[idx], A_d[idx],
C_d[idx], MEMCPYSIZE2);
HIP_CHECK(hipGetLastError());
HIPCHECK(hipMemcpyAsync(C_h[idx], C_d[idx], size,
hipMemcpyDeviceToHost, (*stream)[idx]));
}
@@ -426,6 +428,7 @@ bool validateStreamPrioritiesWithEvents() {
hipLaunchKernelGGL((memcpy_kernel<T>), dim3(GRIDSIZE), \
dim3(BLOCKSIZE), 0, stream_##x, dst_d_##x + j, src_d_##x + j, \
(MEMCPYSIZE / sizeof(T))); \
HIP_CHECK(hipGetLastError()); \
}
OP(low)
OP(normal)
+3
Dosyayı Görüntüle
@@ -157,6 +157,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_ValidateCallbackFunc") {
const unsigned threadsPerBlock = BLOCKSIZE;
hipLaunchKernelGGL((HipTest::vector_square), dim3(blocks),
dim3(threadsPerBlock), 0, mystream, A_d, C_d, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost,
mystream));
HIP_CHECK(hipStreamAddCallback(mystream, Callback, nullptr, 0));
@@ -250,6 +251,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") {
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, streams[0], dA[0], dC[0], N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
auto single_end = std::chrono::steady_clock::now();
@@ -269,6 +271,7 @@ TEST_CASE("Unit_hipExtStreamCreateWithCUMask_Functionality") {
<< streams[np] << " with CU mask: 0x" << ss[np].str().c_str());
hipLaunchKernelGGL(HipTest::vector_square, dim3(blocks),
dim3(threadsPerBlock), 0, streams[np], dA[np], dC[np], N);
HIP_CHECK(hipGetLastError());
}
HIP_CHECK(hipDeviceSynchronize());
+1
Dosyayı Görüntüle
@@ -66,6 +66,7 @@ TEST_CASE("Unit_hipBindTexture2D_Pitch") {
hipLaunchKernelGGL(texture2dCopyKernel, dim3(4, 4, 1), dim3(32, 32, 1),
0, 0, devPtrB);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy2D(B, SIZE_W*sizeof(TYPE_t), devPtrB,
SIZE_W*sizeof(TYPE_t), SIZE_W*sizeof(TYPE_t),
+1
Dosyayı Görüntüle
@@ -63,6 +63,7 @@ TEST_CASE("Unit_hipBindTexture_tex1DfetchVerification") {
dim3 dimGrid(N / dimBlock.x, 1, 1);
hipLaunchKernelGGL(kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, devBuf);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(output, devBuf, N * sizeof(float),
hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -118,6 +118,7 @@ static void textureTest(texture<T, hipTextureType1D,
hipLaunchKernelGGL(normalizedValTextureTest<T>, dim3(1, 1, 1),
dim3(SIZE, 1, 1), 0, 0, SIZE, dOutputData);
HIP_CHECK(hipGetLastError());
float *hOutputData = new float[SIZE];
REQUIRE(hOutputData != nullptr);
+3 -1
Dosyayı Görüntüle
@@ -86,9 +86,11 @@ TEST_CASE("Unit_hipSimpleTexture2DLayered_Check") {
dim3 dimBlock(8, 8, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
for (unsigned int layer = 0; layer < num_layers; layer++)
for (unsigned int layer = 0; layer < num_layers; layer++) {
hipLaunchKernelGGL(simpleKernelLayeredArray, dimGrid, dimBlock, 0, 0,
dData, width, height, layer);
HIP_CHECK(hipGetLastError());
}
HIP_CHECK(hipDeviceSynchronize());
// Allocate mem for the result on host side
+1
Dosyayı Görüntüle
@@ -92,6 +92,7 @@ static void runSimpleTexture3D_Check(int width, int height, int depth,
hipLaunchKernelGGL(simpleKernel3DArray, dim3(1, 1, 1), dim3(1, 1, 1),
0, 0, dData, width, height, depth);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
// Allocate mem for the result on host side
+1
Dosyayı Görüntüle
@@ -79,6 +79,7 @@ static void runTest(hipTextureAddressMode addressMode,
hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
texBufOut, texObj);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(float),
+1
Dosyayı Görüntüle
@@ -85,6 +85,7 @@ TEMPLATE_TEST_CASE("Unit_hipTexObjPitch_texture2D", "", float, int,
hipLaunchKernelGGL(texture2dCopyKernel, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0,
texObj, devPtrB);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipMemcpy2D(B, SIZE_W*sizeof(TestType), devPtrB,
SIZE_W*sizeof(TestType), SIZE_W*sizeof(TestType),
+1
Dosyayı Görüntüle
@@ -97,6 +97,7 @@ static void runMipMapTest(unsigned int width, unsigned int height, unsigned int
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject, width,
(2 * mipmap_level));
HIP_CHECK(hipGetLastError());
hipDeviceSynchronize();
float* hOutputData = reinterpret_cast<float*>(malloc(size));
+1
Dosyayı Görüntüle
@@ -73,6 +73,7 @@ static void runTest(const int width, const float offsetX) {
hipLaunchKernelGGL(tex1DKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
textureObject, width, offsetX);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
+2
Dosyayı Görüntüle
@@ -113,9 +113,11 @@ static void runTest(const int width, const float offsetX = 0) {
if (resType == hipResourceTypeArray) {
hipLaunchKernelGGL(tex1DRGBAKernel<normalizedCoords>, dimGrid, dimBlock,
0, 0, dData, textureObject, width, offsetX);
HIP_CHECK(hipGetLastError());
} else {
hipLaunchKernelGGL(tex1DRGBAKernelFetch, dimGrid, dimBlock,
0, 0, dData, textureObject, offsetX);
HIP_CHECK(hipGetLastError());
}
HIP_CHECK(hipDeviceSynchronize());
+1
Dosyayı Görüntüle
@@ -69,6 +69,7 @@ TEST_CASE("Unit_hipCreateTextureObject_tex1DfetchVerification") {
hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
texBufOut, texObj);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(float),
+1
Dosyayı Görüntüle
@@ -85,6 +85,7 @@ TEST_CASE("Unit_hipTextureObj2D_Check") {
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock),
0, 0, dData, textureObject, width);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -81,6 +81,7 @@ static void runTest(const int width, const int height, const float offsetX, cons
hipLaunchKernelGGL(tex2DKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
textureObject, width, height, offsetX, offsetY);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
+1
Dosyayı Görüntüle
@@ -86,6 +86,7 @@ static void runTest(const int width, const int height, const float offsetX, cons
hipLaunchKernelGGL(tex2DRGBAKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
textureObject, width, height, offsetX, offsetY);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
+1
Dosyayı Görüntüle
@@ -113,6 +113,7 @@ static void runTest(const int width, const int height, const int depth, const fl
hipLaunchKernelGGL(tex3DKernel<normalizedCoords>, dimGrid, dimBlock, 0, 0, dData,
textureObject, width, height, depth, offsetX, offsetY, offsetZ);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
+1
Dosyayı Görüntüle
@@ -178,6 +178,7 @@ bool runTest() {
hipLaunchKernelGGL(tex1dKernelFetch<T>, dimGrid, dimBlock, 0, 0, texBufOut,
texObj, N);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
HIP_CHECK(hipMemcpy(output, texBufOut, N * sizeof(T), hipMemcpyDeviceToHost));
+1
Dosyayı Görüntüle
@@ -69,6 +69,7 @@ TEST_CASE("Unit_hipTextureRef2D_Check") {
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
dData, width);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());
float* hOutputData = reinterpret_cast<float*>(malloc(size));