diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index cb8d43b009..70c6f42dd2 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -931,122 +931,11 @@ "Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency", "Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency", "Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - int", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - float", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - double", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - int", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - float", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - double", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double", - "Unit_atomicAdd_Positive - int", - "Unit_atomicAdd_Positive - unsigned int", - "Unit_atomicAdd_Positive - unsigned long", - "Unit_atomicAdd_Positive - unsigned long long", - "Unit_atomicAdd_Positive - float", - "Unit_atomicAdd_Positive - double", - "Unit_atomicAdd_Positive_Multi_Kernel - int", - "Unit_atomicAdd_Positive_Multi_Kernel - unsigned int", - "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long", - "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long", - "Unit_atomicAdd_Positive_Multi_Kernel - float", - "Unit_atomicAdd_Positive_Multi_Kernel - double", "Unit_atomicAdd_Negative_Parameters_RTC", - "Unit_atomicAdd_system_Positive_Peer_GPUs - int", - "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int", - "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long", - "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long", - "Unit_atomicAdd_system_Positive_Peer_GPUs - float", - "Unit_atomicAdd_system_Positive_Peer_GPUs - double", - "Unit_atomicAdd_system_Positive_Host_And_GPU - int", - "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int", - "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long", - "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long", - "Unit_atomicAdd_system_Positive_Host_And_GPU - float", - "Unit_atomicAdd_system_Positive_Host_And_GPU - double", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double", - "Unit_unsafeAtomicAdd_Positive - float", - "Unit_unsafeAtomicAdd_Positive - double", - "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float", - "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double", - "Unit_safeAtomicAdd_Positive - float", - "Unit_safeAtomicAdd_Positive - double", - "Unit_safeAtomicAdd_Positive_Multi_Kernel - float", - "Unit_safeAtomicAdd_Positive_Multi_Kernel - double", - "Unit_atomicSub_Positive - int", - "Unit_atomicSub_Positive - unsigned int", - "Unit_atomicSub_Positive - unsigned long", - "Unit_atomicSub_Positive - unsigned long long", - "Unit_atomicSub_Positive - float", - "Unit_atomicSub_Positive - double", - "Unit_atomicSub_Positive_Multi_Kernel - int", - "Unit_atomicSub_Positive_Multi_Kernel - unsigned int", - "Unit_atomicSub_Positive_Multi_Kernel - unsigned long", - "Unit_atomicSub_Positive_Multi_Kernel - unsigned long long", - "Unit_atomicSub_Positive_Multi_Kernel - float", - "Unit_atomicSub_Positive_Multi_Kernel - double", "Unit_atomicSub_Negative_Parameters_RTC", - "Unit_atomicSub_system_Positive_Peer_GPUs - int", - "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int", - "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long", - "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long", - "Unit_atomicSub_system_Positive_Peer_GPUs - float", - "Unit_atomicSub_system_Positive_Peer_GPUs - double", - "Unit_atomicSub_system_Positive_Host_And_GPU - int", - "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int", - "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long", - "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long", - "Unit_atomicSub_system_Positive_Host_And_GPU - float", - "Unit_atomicSub_system_Positive_Host_And_GPU - double", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double", - "Unit_atomicInc_Positive - unsigned int", - "Unit_atomicInc_Positive_Multi_Kernel - unsigned int", "Unit_atomicInc_Negative_Parameters_RTC", - "Unit_atomicDec_Positive - unsigned int", - "Unit_atomicDec_Positive_Multi_Kernel - unsigned int", "Unit_atomicDec_Negative_Parameters_RTC", - "Unit_atomicCAS_Positive - int", - "Unit_atomicCAS_Positive - unsigned int", - "Unit_atomicCAS_Positive - unsigned long long", - "Unit_atomicCAS_Positive_Multi_Kernel - int", - "Unit_atomicCAS_Positive_Multi_Kernel - unsigned int", - "Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long", "Unit_atomicCAS_Negative_Parameters_RTC", - "Unit_atomicCAS_system_Positive_Peer_GPUs - int", - "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int", - "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long", - "Unit_atomicCAS_system_Positive_Host_And_GPU - int", - "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int", - "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long", - "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int", - "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int", - "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long", "SWDEV-447384, SWDEV-447932: These tests fail in gfx1100, gfx1101 & gfx1102", "Unit_hipFreeAsync_Negative_Parameters", "Unit_hipIpcGetMemHandle_Positive_Unique_Handles_Reused_Memory", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 28cf736f54..b24c176d69 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -1114,122 +1114,11 @@ "Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency", "Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency", "Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - int", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned int", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - unsigned long long", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - float", - "Unit___hip_atomic_fetch_add_Positive_Wavefront - double", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - int", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned int", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - unsigned long long", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - float", - "Unit___hip_atomic_fetch_add_Positive_Workgroup - double", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - unsigned long long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - float", - "Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront - double", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned int", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - unsigned long long", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - float", - "Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup - double", - "Unit_atomicAdd_Positive - int", - "Unit_atomicAdd_Positive - unsigned int", - "Unit_atomicAdd_Positive - unsigned long", - "Unit_atomicAdd_Positive - unsigned long long", - "Unit_atomicAdd_Positive - float", - "Unit_atomicAdd_Positive - double", - "Unit_atomicAdd_Positive_Multi_Kernel - int", - "Unit_atomicAdd_Positive_Multi_Kernel - unsigned int", - "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long", - "Unit_atomicAdd_Positive_Multi_Kernel - unsigned long long", - "Unit_atomicAdd_Positive_Multi_Kernel - float", - "Unit_atomicAdd_Positive_Multi_Kernel - double", "Unit_atomicAdd_Negative_Parameters_RTC", - "Unit_atomicAdd_system_Positive_Peer_GPUs - int", - "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned int", - "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long", - "Unit_atomicAdd_system_Positive_Peer_GPUs - unsigned long long", - "Unit_atomicAdd_system_Positive_Peer_GPUs - float", - "Unit_atomicAdd_system_Positive_Peer_GPUs - double", - "Unit_atomicAdd_system_Positive_Host_And_GPU - int", - "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned int", - "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long", - "Unit_atomicAdd_system_Positive_Host_And_GPU - unsigned long long", - "Unit_atomicAdd_system_Positive_Host_And_GPU - float", - "Unit_atomicAdd_system_Positive_Host_And_GPU - double", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - int", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned int", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - unsigned long long", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - float", - "Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs - double", - "Unit_unsafeAtomicAdd_Positive - float", - "Unit_unsafeAtomicAdd_Positive - double", - "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - float", - "Unit_unsafeAtomicAdd_Positive_Multi_Kernel - double", - "Unit_safeAtomicAdd_Positive - float", - "Unit_safeAtomicAdd_Positive - double", - "Unit_safeAtomicAdd_Positive_Multi_Kernel - float", - "Unit_safeAtomicAdd_Positive_Multi_Kernel - double", - "Unit_atomicSub_Positive - int", - "Unit_atomicSub_Positive - unsigned int", - "Unit_atomicSub_Positive - unsigned long", - "Unit_atomicSub_Positive - unsigned long long", - "Unit_atomicSub_Positive - float", - "Unit_atomicSub_Positive - double", - "Unit_atomicSub_Positive_Multi_Kernel - int", - "Unit_atomicSub_Positive_Multi_Kernel - unsigned int", - "Unit_atomicSub_Positive_Multi_Kernel - unsigned long", - "Unit_atomicSub_Positive_Multi_Kernel - unsigned long long", - "Unit_atomicSub_Positive_Multi_Kernel - float", - "Unit_atomicSub_Positive_Multi_Kernel - double", "Unit_atomicSub_Negative_Parameters_RTC", - "Unit_atomicSub_system_Positive_Peer_GPUs - int", - "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned int", - "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long", - "Unit_atomicSub_system_Positive_Peer_GPUs - unsigned long long", - "Unit_atomicSub_system_Positive_Peer_GPUs - float", - "Unit_atomicSub_system_Positive_Peer_GPUs - double", - "Unit_atomicSub_system_Positive_Host_And_GPU - int", - "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned int", - "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long", - "Unit_atomicSub_system_Positive_Host_And_GPU - unsigned long long", - "Unit_atomicSub_system_Positive_Host_And_GPU - float", - "Unit_atomicSub_system_Positive_Host_And_GPU - double", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - int", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned int", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - unsigned long long", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - float", - "Unit_atomicSub_system_Positive_Host_And_Peer_GPUs - double", - "Unit_atomicInc_Positive - unsigned int", - "Unit_atomicInc_Positive_Multi_Kernel - unsigned int", "Unit_atomicInc_Negative_Parameters_RTC", - "Unit_atomicDec_Positive - unsigned int", - "Unit_atomicDec_Positive_Multi_Kernel - unsigned int", "Unit_atomicDec_Negative_Parameters_RTC", - "Unit_atomicCAS_Positive - int", - "Unit_atomicCAS_Positive - unsigned int", - "Unit_atomicCAS_Positive - unsigned long long", - "Unit_atomicCAS_Positive_Multi_Kernel - int", - "Unit_atomicCAS_Positive_Multi_Kernel - unsigned int", - "Unit_atomicCAS_Positive_Multi_Kernel - unsigned long long", "Unit_atomicCAS_Negative_Parameters_RTC", - "Unit_atomicCAS_system_Positive_Peer_GPUs - int", - "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned int", - "Unit_atomicCAS_system_Positive_Peer_GPUs - unsigned long long", - "Unit_atomicCAS_system_Positive_Host_And_GPU - int", - "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned int", - "Unit_atomicCAS_system_Positive_Host_And_GPU - unsigned long long", - "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - int", - "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned int", - "Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs - unsigned long long", "SWDEV-450909: Test failed in stress testing", "Unit_RTC_LinkDestroy_Default", "=== SWDEV-453453 : Below tests failed in stress test on 22/03/24 ===", diff --git a/catch/unit/atomics/arithmetic_common.hh b/catch/unit/atomics/arithmetic_common.hh index 0c142c4506..ff6096d18f 100644 --- a/catch/unit/atomics/arithmetic_common.hh +++ b/catch/unit/atomics/arithmetic_common.hh @@ -159,7 +159,8 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) __syncthreads(); } - old_vals[tid] = PerformAtomicOperation(mem); + old_vals[tid] = PerformAtomicOperation(mem); + if constexpr (use_shared_mem) { __syncthreads(); @@ -224,7 +225,7 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, __syncthreads(); } - const auto n = cooperative_groups::this_grid().size() - width; + const auto n = cooperative_groups::this_grid().size(); TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); @@ -272,14 +273,14 @@ std::tuple, std::vector> TestKernelHostRef(const const auto val = GetTestValue(); const auto total_thread_count = p.num_devices * p.kernel_count * p.ThreadCount() + - p.host_thread_count * p.HostIterationsPerThread(); + p.host_thread_count * p.HostIterationsPerThread(); - std::vector res_vals(p.width); + std::vector res_vals((p.num_devices + 1)* p.width); std::vector old_vals; old_vals.reserve(total_thread_count); - auto perform_op = [&](unsigned id) { - auto& res = res_vals[id % p.width]; + auto perform_op = [&](unsigned id, unsigned dev) { + auto& res = res_vals[id % p.width + (dev*p.width)]; old_vals.push_back(res); if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem || @@ -301,15 +302,15 @@ std::tuple, std::vector> TestKernelHostRef(const for (auto i = 0u; i < p.num_devices; ++i) { for (auto j = 0u; j < p.kernel_count; ++j) { - for (auto tid = 0u; tid < p.ThreadCount() - p.width; ++tid) { - perform_op(tid); + for (auto tid = 0u; tid < p.ThreadCount(); ++tid) { + perform_op(tid, i); } } } for (auto i = 0u; i < p.host_thread_count; ++i) { for (auto j = 0u; j < p.HostIterationsPerThread(); ++j) { - perform_op(j); + perform_op(j, p.num_devices); } } @@ -398,11 +399,13 @@ void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* co template void TestCore(const TestParams& p) { - const unsigned int flags = - p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u; + // Device Memory Allocation const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + const auto mem_alloc_size = p.width * p.pitch; + std::vector> old_vals_devs; + std::vector> mem_devs; std::vector streams; for (auto i = 0; i < p.num_devices; ++i) { HIP_CHECK(hipSetDevice(i)); @@ -410,48 +413,58 @@ void TestCore(const TestParams& p) { for (auto j = 0; j < p.kernel_count; ++j) { streams.emplace_back(Streams::created); } + mem_devs.emplace_back(p.alloc_type, mem_alloc_size); } - const auto mem_alloc_size = p.width * p.pitch; - LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size, flags); - + // Host Memory std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount() + p.host_thread_count * p.HostIterationsPerThread()); - std::vector res_vals(p.width); + std::vector res_vals((p.num_devices + 1) * p.width); - TestType* const mem_ptr = - p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); - - HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + // Initialize device memory + for (auto i = 0u; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_devs[i].ptr() : mem_devs[i].host_ptr(); + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + } + // Launch Kernel for (auto i = 0u; i < p.num_devices; ++i) { for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); - LaunchKernel(p, stream, mem_dev.ptr(), + LaunchKernel(p, stream, mem_devs[i].ptr(), old_vals); } } - PerformHostAtomicOperation(p, mem_dev.host_ptr(), old_vals.data()); + // Launch Host Threads + mem_devs.emplace_back(LinearAllocs::hipHostMalloc, mem_alloc_size); + PerformHostAtomicOperation(p, mem_devs[p.num_devices].host_ptr(), old_vals.data()); + + // Copy results back to Host for (auto i = 0u; i < p.num_devices; ++i) { const auto device_offset = i * p.kernel_count * p.ThreadCount(); HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), old_vals_alloc_size, hipMemcpyDeviceToHost)); - } - HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + HIP_CHECK(hipMemcpy2D(res_vals.data() + i*p.width, sizeof(TestType), mem_devs[i].ptr(), p.pitch, sizeof(TestType), p.width, hipMemcpyDeviceToHost)); + } + + HIP_CHECK(hipMemcpy2D(res_vals.data() + p.num_devices*p.width, sizeof(TestType), mem_devs[p.num_devices].host_ptr(), p.pitch, sizeof(TestType), + p.width, hipMemcpyHostToHost)); Verify(p, res_vals, old_vals); } -inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } +inline dim3 GenerateThreadDimensions() { return dim3(16); } inline dim3 GenerateBlockDimensions() { int sm_count = 0; HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); - return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); + return dim3(sm_count); } // Configures and creates the TestCore for a single device, and a single kernel launch @@ -488,19 +501,20 @@ void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int p } using LA = LinearAllocs; for (const auto alloc_type : - {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + {LA::hipMalloc}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { TestCore(params); } } } - - SECTION("Shared memory") { +#ifdef __linux__ + SECTION("Shared memory") { params.blocks = dim3(1); params.alloc_type = LinearAllocs::hipMalloc; TestCore(params); } +#endif } // Configures and creates the TestCore for a single device, and multiple kernel launches @@ -524,7 +538,7 @@ void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsig using LA = LinearAllocs; for (const auto alloc_type : - {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + {LA::hipMalloc}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { TestCore(params); @@ -568,10 +582,10 @@ void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices, params.host_thread_count = host_thread_count; using LA = LinearAllocs; - for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + for (const auto alloc_type : {LA::hipMalloc}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { TestCore(params); } } -} \ No newline at end of file +}