[SWDEV-454178] - Fix for Atomic Arithmetic testcases

Change-Id: I8c8a7026bfe4906108c7c90907ffe130cd993ab4
This commit is contained in:
Rahul Manocha
2024-03-28 17:22:07 -07:00
committed by Rakesh Roy
parent 38be7ae377
commit ba23adcded
3 changed files with 45 additions and 253 deletions
-111
View File
@@ -931,122 +931,11 @@
"Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - int",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - float",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - double",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - int",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - float",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - double",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double",
"Unit_atomicAdd_Positive - int",
"Unit_atomicAdd_Positive - unsigned int",
"Unit_atomicAdd_Positive - unsigned long",
"Unit_atomicAdd_Positive - unsigned long long",
"Unit_atomicAdd_Positive - float",
"Unit_atomicAdd_Positive - double",
"Unit_atomicAdd_Positive_Multi_Kernel - int",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned int",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicAdd_Positive_Multi_Kernel - float",
"Unit_atomicAdd_Positive_Multi_Kernel - double",
"Unit_atomicAdd_Negative_Parameters_RTC",
"Unit_atomicAdd_system_Positive_Peer_GPUs - int",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicAdd_system_Positive_Peer_GPUs - float",
"Unit_atomicAdd_system_Positive_Peer_GPUs - double",
"Unit_atomicAdd_system_Positive_Host_And_GPU - int",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicAdd_system_Positive_Host_And_GPU - float",
"Unit_atomicAdd_system_Positive_Host_And_GPU - double",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double",
"Unit_unsafeAtomicAdd_Positive - float",
"Unit_unsafeAtomicAdd_Positive - double",
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float",
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double",
"Unit_safeAtomicAdd_Positive - float",
"Unit_safeAtomicAdd_Positive - double",
"Unit_safeAtomicAdd_Positive_Multi_Kernel - float",
"Unit_safeAtomicAdd_Positive_Multi_Kernel - double",
"Unit_atomicSub_Positive - int",
"Unit_atomicSub_Positive - unsigned int",
"Unit_atomicSub_Positive - unsigned long",
"Unit_atomicSub_Positive - unsigned long long",
"Unit_atomicSub_Positive - float",
"Unit_atomicSub_Positive - double",
"Unit_atomicSub_Positive_Multi_Kernel - int",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned int",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicSub_Positive_Multi_Kernel - float",
"Unit_atomicSub_Positive_Multi_Kernel - double",
"Unit_atomicSub_Negative_Parameters_RTC",
"Unit_atomicSub_system_Positive_Peer_GPUs - int",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicSub_system_Positive_Peer_GPUs - float",
"Unit_atomicSub_system_Positive_Peer_GPUs - double",
"Unit_atomicSub_system_Positive_Host_And_GPU - int",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicSub_system_Positive_Host_And_GPU - float",
"Unit_atomicSub_system_Positive_Host_And_GPU - double",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double",
"Unit_atomicInc_Positive - unsigned int",
"Unit_atomicInc_Positive_Multi_Kernel - unsigned int",
"Unit_atomicInc_Negative_Parameters_RTC",
"Unit_atomicDec_Positive - unsigned int",
"Unit_atomicDec_Positive_Multi_Kernel - unsigned int",
"Unit_atomicDec_Negative_Parameters_RTC",
"Unit_atomicCAS_Positive - int",
"Unit_atomicCAS_Positive - unsigned int",
"Unit_atomicCAS_Positive - unsigned long long",
"Unit_atomicCAS_Positive_Multi_Kernel - int",
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned int",
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicCAS_Negative_Parameters_RTC",
"Unit_atomicCAS_system_Positive_Peer_GPUs - int",
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicCAS_system_Positive_Host_And_GPU - int",
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"SWDEV-447384, SWDEV-447932: These tests fail in gfx1100, gfx1101 & gfx1102",
"Unit_hipFreeAsync_Negative_Parameters",
"Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Reused_Memory",
-111
View File
@@ -1114,122 +1114,11 @@
"Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - int",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - float",
"Unit___hip_atomic_fetch_add_Positive_Wavefront - double",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - int",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - float",
"Unit___hip_atomic_fetch_add_Positive_Workgroup - double",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float",
"Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float",
"Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double",
"Unit_atomicAdd_Positive - int",
"Unit_atomicAdd_Positive - unsigned int",
"Unit_atomicAdd_Positive - unsigned long",
"Unit_atomicAdd_Positive - unsigned long long",
"Unit_atomicAdd_Positive - float",
"Unit_atomicAdd_Positive - double",
"Unit_atomicAdd_Positive_Multi_Kernel - int",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned int",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long",
"Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicAdd_Positive_Multi_Kernel - float",
"Unit_atomicAdd_Positive_Multi_Kernel - double",
"Unit_atomicAdd_Negative_Parameters_RTC",
"Unit_atomicAdd_system_Positive_Peer_GPUs - int",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicAdd_system_Positive_Peer_GPUs - float",
"Unit_atomicAdd_system_Positive_Peer_GPUs - double",
"Unit_atomicAdd_system_Positive_Host_And_GPU - int",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicAdd_system_Positive_Host_And_GPU - float",
"Unit_atomicAdd_system_Positive_Host_And_GPU - double",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double",
"Unit_unsafeAtomicAdd_Positive - float",
"Unit_unsafeAtomicAdd_Positive - double",
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float",
"Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double",
"Unit_safeAtomicAdd_Positive - float",
"Unit_safeAtomicAdd_Positive - double",
"Unit_safeAtomicAdd_Positive_Multi_Kernel - float",
"Unit_safeAtomicAdd_Positive_Multi_Kernel - double",
"Unit_atomicSub_Positive - int",
"Unit_atomicSub_Positive - unsigned int",
"Unit_atomicSub_Positive - unsigned long",
"Unit_atomicSub_Positive - unsigned long long",
"Unit_atomicSub_Positive - float",
"Unit_atomicSub_Positive - double",
"Unit_atomicSub_Positive_Multi_Kernel - int",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned int",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long",
"Unit_atomicSub_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicSub_Positive_Multi_Kernel - float",
"Unit_atomicSub_Positive_Multi_Kernel - double",
"Unit_atomicSub_Negative_Parameters_RTC",
"Unit_atomicSub_system_Positive_Peer_GPUs - int",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long",
"Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicSub_system_Positive_Peer_GPUs - float",
"Unit_atomicSub_system_Positive_Peer_GPUs - double",
"Unit_atomicSub_system_Positive_Host_And_GPU - int",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long",
"Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicSub_system_Positive_Host_And_GPU - float",
"Unit_atomicSub_system_Positive_Host_And_GPU - double",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float",
"Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double",
"Unit_atomicInc_Positive - unsigned int",
"Unit_atomicInc_Positive_Multi_Kernel - unsigned int",
"Unit_atomicInc_Negative_Parameters_RTC",
"Unit_atomicDec_Positive - unsigned int",
"Unit_atomicDec_Positive_Multi_Kernel - unsigned int",
"Unit_atomicDec_Negative_Parameters_RTC",
"Unit_atomicCAS_Positive - int",
"Unit_atomicCAS_Positive - unsigned int",
"Unit_atomicCAS_Positive - unsigned long long",
"Unit_atomicCAS_Positive_Multi_Kernel - int",
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned int",
"Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long",
"Unit_atomicCAS_Negative_Parameters_RTC",
"Unit_atomicCAS_system_Positive_Peer_GPUs - int",
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int",
"Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long",
"Unit_atomicCAS_system_Positive_Host_And_GPU - int",
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int",
"Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int",
"Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long",
"SWDEV-450909: Test failed in stress testing",
"Unit_RTC_LinkDestroy_Default",
"=== SWDEV-453453 : Below tests failed in stress test on 22/03/24 ===",
+45 -31
View File
@@ -159,7 +159,8 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals)
__syncthreads();
}
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
if constexpr (use_shared_mem) {
__syncthreads();
@@ -224,7 +225,7 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
__syncthreads();
}
const auto n = cooperative_groups::this_grid().size() - width;
const auto n = cooperative_groups::this_grid().size();
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
@@ -272,14 +273,14 @@ std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const
const auto val = GetTestValue<TestType, operation>();
const auto total_thread_count = p.num_devices * p.kernel_count * p.ThreadCount() +
p.host_thread_count * p.HostIterationsPerThread();
p.host_thread_count * p.HostIterationsPerThread();
std::vector<TestType> res_vals(p.width);
std::vector<TestType> res_vals((p.num_devices + 1)* p.width);
std::vector<TestType> old_vals;
old_vals.reserve(total_thread_count);
auto perform_op = [&](unsigned id) {
auto& res = res_vals[id % p.width];
auto perform_op = [&](unsigned id, unsigned dev) {
auto& res = res_vals[id % p.width + (dev*p.width)];
old_vals.push_back(res);
if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem ||
@@ -301,15 +302,15 @@ std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const
for (auto i = 0u; i < p.num_devices; ++i) {
for (auto j = 0u; j < p.kernel_count; ++j) {
for (auto tid = 0u; tid < p.ThreadCount() - p.width; ++tid) {
perform_op(tid);
for (auto tid = 0u; tid < p.ThreadCount(); ++tid) {
perform_op(tid, i);
}
}
}
for (auto i = 0u; i < p.host_thread_count; ++i) {
for (auto j = 0u; j < p.HostIterationsPerThread(); ++j) {
perform_op(j);
perform_op(j, p.num_devices);
}
}
@@ -398,11 +399,13 @@ void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* co
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
void TestCore(const TestParams& p) {
const unsigned int flags =
p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u;
// Device Memory Allocation
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
const auto mem_alloc_size = p.width * p.pitch;
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
std::vector<LinearAllocGuard<TestType>> mem_devs;
std::vector<StreamGuard> streams;
for (auto i = 0; i < p.num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
@@ -410,48 +413,58 @@ void TestCore(const TestParams& p) {
for (auto j = 0; j < p.kernel_count; ++j) {
streams.emplace_back(Streams::created);
}
mem_devs.emplace_back(p.alloc_type, mem_alloc_size);
}
const auto mem_alloc_size = p.width * p.pitch;
LinearAllocGuard<TestType> mem_dev(p.alloc_type, mem_alloc_size, flags);
// Host Memory
std::vector<TestType> old_vals(p.num_devices * p.kernel_count * p.ThreadCount() +
p.host_thread_count * p.HostIterationsPerThread());
std::vector<TestType> res_vals(p.width);
std::vector<TestType> res_vals((p.num_devices + 1) * p.width);
TestType* const mem_ptr =
p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr();
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
// Initialize device memory
for (auto i = 0u; i < p.num_devices; ++i) {
HIP_CHECK(hipSetDevice(i));
TestType* const mem_ptr =
p.alloc_type == LinearAllocs::hipMalloc ? mem_devs[i].ptr() : mem_devs[i].host_ptr();
HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size));
}
// Launch Kernel
for (auto i = 0u; i < p.num_devices; ++i) {
for (auto j = 0u; j < p.kernel_count; ++j) {
const auto& stream = streams[i * p.kernel_count + j].stream();
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_devs[i].ptr(),
old_vals);
}
}
PerformHostAtomicOperation<TestType, operation>(p, mem_dev.host_ptr(), old_vals.data());
// Launch Host Threads
mem_devs.emplace_back(LinearAllocs::hipHostMalloc, mem_alloc_size);
PerformHostAtomicOperation<TestType, operation>(p, mem_devs[p.num_devices].host_ptr(), old_vals.data());
// Copy results back to Host
for (auto i = 0u; i < p.num_devices; ++i) {
const auto device_offset = i * p.kernel_count * p.ThreadCount();
HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(),
old_vals_alloc_size, hipMemcpyDeviceToHost));
}
HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType),
HIP_CHECK(hipMemcpy2D(res_vals.data() + i*p.width, sizeof(TestType), mem_devs[i].ptr(), p.pitch, sizeof(TestType),
p.width, hipMemcpyDeviceToHost));
}
HIP_CHECK(hipMemcpy2D(res_vals.data() + p.num_devices*p.width, sizeof(TestType), mem_devs[p.num_devices].host_ptr(), p.pitch, sizeof(TestType),
p.width, hipMemcpyHostToHost));
Verify<TestType, operation>(p, res_vals, old_vals);
}
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
inline dim3 GenerateThreadDimensions() { return dim3(16); }
inline dim3 GenerateBlockDimensions() {
int sm_count = 0;
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
return dim3(sm_count);
}
// Configures and creates the TestCore for a single device, and a single kernel launch
@@ -488,19 +501,20 @@ void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int p
}
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
{LA::hipMalloc}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false, memory_scope>(params);
}
}
}
SECTION("Shared memory") {
#ifdef __linux__
SECTION("Shared memory") {
params.blocks = dim3(1);
params.alloc_type = LinearAllocs::hipMalloc;
TestCore<TestType, operation, true, memory_scope>(params);
}
#endif
}
// Configures and creates the TestCore for a single device, and multiple kernel launches
@@ -524,7 +538,7 @@ void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsig
using LA = LinearAllocs;
for (const auto alloc_type :
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
{LA::hipMalloc}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false>(params);
@@ -568,10 +582,10 @@ void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices,
params.host_thread_count = host_thread_count;
using LA = LinearAllocs;
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
for (const auto alloc_type : {LA::hipMalloc}) {
params.alloc_type = alloc_type;
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
}
}
}
}