diff --git a/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh b/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh index 85f4e25ab0..2b4db75dd8 100644 --- a/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh +++ b/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh @@ -446,6 +446,7 @@ void TestCore(const TestParams& p) { // Launch Kernel for (auto i = 0u; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); diff --git a/projects/hip-tests/catch/unit/atomics/bitwise_common.hh b/projects/hip-tests/catch/unit/atomics/bitwise_common.hh index b611679683..9c7bf0f5da 100644 --- a/projects/hip-tests/catch/unit/atomics/bitwise_common.hh +++ b/projects/hip-tests/catch/unit/atomics/bitwise_common.hh @@ -272,6 +272,7 @@ void TestCore(const TestParams& p) { } // Launch Kernel and get back old vals for (auto i = 0u; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); diff --git a/projects/hip-tests/catch/unit/atomics/min_max_common.hh b/projects/hip-tests/catch/unit/atomics/min_max_common.hh index f6a075680e..1f6d180f83 100644 --- a/projects/hip-tests/catch/unit/atomics/min_max_common.hh +++ b/projects/hip-tests/catch/unit/atomics/min_max_common.hh @@ -302,6 +302,7 @@ void TestCore(const TestParams& p) { // Launch kernel for (auto i = 0u; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); diff --git a/projects/hip-tests/catch/unit/graph/hipGetProcAddressGraphApis.cc b/projects/hip-tests/catch/unit/graph/hipGetProcAddressGraphApis.cc index 25fbc6cd2d..63e50979a3 100644 --- a/projects/hip-tests/catch/unit/graph/hipGetProcAddressGraphApis.cc +++ b/projects/hip-tests/catch/unit/graph/hipGetProcAddressGraphApis.cc @@ -375,9 +375,7 @@ TEST_CASE("Unit_hipGetProcAddress_GraphAPIs_AddMemsetMemcpyNodes") { hipGraphExec_t graphExec; HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, 0)); - #ifdef _WIN32 HIP_CHECK(hipStreamSynchronize(0)); - #endif REQUIRE(validateArrayT(hostMemDst, N, value) == true); diff --git a/projects/hip-tests/catch/unit/memory/hipMallocAsync.cc b/projects/hip-tests/catch/unit/memory/hipMallocAsync.cc index 3a5baf4f17..6adb1088a1 100644 --- a/projects/hip-tests/catch/unit/memory/hipMallocAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMallocAsync.cc @@ -319,8 +319,8 @@ TEST_CASE("Unit_hipMallocAsync_Multidevice") { * - HIP_VERSION >= 6.2 */ #if HT_AMD -static void threadQAsyncCommands(streamMemAllocTest* testObj, - hipStream_t strm) { +static void threadQAsyncCommands(streamMemAllocTest* testObj, hipStream_t strm, int idx) { + HIP_CHECK(hipSetDevice(idx)); // Create host buffer with test data. testObj->createHostBufferWithData(); // Allocate device memory and transfer data to it asyncronously on stream. @@ -350,7 +350,7 @@ TEST_CASE("Unit_hipMallocAsync_Multidevice_Concurrent") { // Queue commands in each device for (int idx = 0; idx < num_devices; idx++) { HIP_CHECK(hipSetDevice(idx)); - std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx]); + std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx], idx); test.join(); } // Wait for the streams @@ -405,10 +405,10 @@ TEST_CASE("Unit_hipMallocAsync_Multidevice_MultiStream") { // Queue commands in each device for (int idx = 0; idx < num_devices; idx++) { HIP_CHECK(hipSetDevice(idx)); - std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx], - stream_buf[streamPerAsic*idx]); - std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx + 1], - stream_buf[streamPerAsic*idx + 1]); + std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic * idx], + stream_buf[streamPerAsic * idx], idx); + std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic * idx + 1], + stream_buf[streamPerAsic * idx + 1], idx); test1.join(); test2.join(); } diff --git a/projects/hip-tests/catch/unit/memory/hipMallocFromPoolAsync.cc b/projects/hip-tests/catch/unit/memory/hipMallocFromPoolAsync.cc index 6993dfa87a..ca9f41f076 100644 --- a/projects/hip-tests/catch/unit/memory/hipMallocFromPoolAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMallocFromPoolAsync.cc @@ -371,8 +371,8 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_ReleaseThreshold_Mgpu") { /** * Local Thread Functions */ -static void threadQAsyncCommands(streamMemAllocTest* testObj, - hipStream_t strm) { +static void threadQAsyncCommands(streamMemAllocTest* testObj, hipStream_t strm, int idx) { + HIP_CHECK(hipSetDevice(idx)); // Create host buffer with test data. testObj->createHostBufferWithData(); // Allocate device memory and transfer data to it asyncronously on stream. @@ -616,7 +616,7 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_Concurrent") { // Queue commands in each device for (int idx = 0; idx < num_devices; idx++) { HIP_CHECK(hipSetDevice(idx)); - std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx]); + std::thread test(threadQAsyncCommands, tesObjBuf[idx], stream_buf[idx], idx); test.join(); } // Wait for the streams @@ -675,10 +675,10 @@ TEST_CASE("Unit_hipMallocFromPoolAsync_Multidevice_MultiStream") { // Queue commands in each device for (int idx = 0; idx < num_devices; idx++) { HIP_CHECK(hipSetDevice(idx)); - std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx], - stream_buf[streamPerAsic*idx]); - std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic*idx + 1], - stream_buf[streamPerAsic*idx + 1]); + std::thread test1(threadQAsyncCommands, tesObjBuf[streamPerAsic * idx], + stream_buf[streamPerAsic * idx], idx); + std::thread test2(threadQAsyncCommands, tesObjBuf[streamPerAsic * idx + 1], + stream_buf[streamPerAsic * idx + 1], idx); test1.join(); test2.join(); } diff --git a/projects/hip-tests/catch/unit/memory/hipMemAdvise_old.cc b/projects/hip-tests/catch/unit/memory/hipMemAdvise_old.cc index 9b785aa61a..6116aa34b2 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemAdvise_old.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemAdvise_old.cc @@ -836,7 +836,6 @@ TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { int *Hmm = NULL, NumElms = (1024 * 1024), InitVal = 123, blockSize = 64; int *Hmm1 = NULL, DataMismatch = 0; hipStream_t strm; - HIP_CHECK(hipStreamCreate(&strm)); HIP_CHECK(hipMallocManaged(&Hmm, (NumElms * sizeof(int)))); // Initializing memory for (int i = 0; i < NumElms; ++i) { @@ -852,6 +851,7 @@ TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { for (int i = 1; i < Ngpus; ++i) { DataMismatch = 0; HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreate(&strm)); HIP_CHECK(hipMallocManaged(&Hmm1, (NumElms * sizeof(int)))); MemAdvise3<<>>(Hmm, Hmm1, NumElms); HIP_CHECK(hipStreamSynchronize(strm)); @@ -865,6 +865,7 @@ TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { WARN("DataMismatch is observed with the gpu: " << i); REQUIRE(false); } + HIP_CHECK(hipStreamDestroy(strm)); HIP_CHECK(hipFree(Hmm1)); } } @@ -873,10 +874,12 @@ TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { for (int i = 0; i < Ngpus; ++i) { DataMismatch = 0; HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreate(&strm)); HIP_CHECK(hipMemAdvise(Hmm, (NumElms * sizeof(int)), hipMemAdviseSetReadMostly, i)); MemAdvise2<<>>(Hmm, NumElms); HIP_CHECK(hipStreamSynchronize(strm)); + HIP_CHECK(hipStreamDestroy(strm)); } // verifying the final result for (int i = 0; i < NumElms; ++i) { @@ -892,7 +895,7 @@ TEST_CASE("Unit_hipMemAdvise_ReadMosltyMgpuTst") { } #endif HIP_CHECK(hipFree(Hmm)); - HIP_CHECK(hipStreamDestroy(strm)); + } else { SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " "attribute. Hence skipping the testing with Pass result.\n"); diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc index 5e1b384ff6..c27fa9c0ba 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyPeerAsync.cc @@ -51,9 +51,6 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Positive_Default") { HipTest::HIP_SKIP_TEST("Skipping because devices < 2"); return; } - const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created); - const StreamGuard stream_guard(stream_type); - const hipStream_t stream = stream_guard.stream(); const auto allocation_size = GENERATE(kPageSize / 2, kPageSize, kPageSize * 2); @@ -64,6 +61,11 @@ TEST_CASE("Unit_hipMemcpyPeerAsync_Positive_Default") { INFO("Src device: " << src_device << ", Dst device: " << dst_device); HIP_CHECK(hipSetDevice(src_device)); + + const auto stream_type = GENERATE(Streams::nullstream, Streams::perThread, Streams::created); + const StreamGuard stream_guard(stream_type); + const hipStream_t stream = stream_guard.stream(); + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); if (can_access_peer) { HIP_CHECK(hipDeviceEnablePeerAccess(dst_device, 0)); diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc index 6e906d6ab4..45d455e5c2 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc @@ -510,6 +510,7 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindDefaultForDtoD(bool& val_res) } for (int i = 0; i < numDevices; ++i) { + HIP_CHECK_THREAD(hipSetDevice(i)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), static_cast(B_d[i]), C_d[i], N); HIP_CHECK_THREAD(hipGetLastError()); diff --git a/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream_old.cc b/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream_old.cc index 2b9bba9660..b0f7309943 100644 --- a/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream_old.cc +++ b/projects/hip-tests/catch/unit/memory/hipMemcpyWithStream_old.cc @@ -475,6 +475,7 @@ void TestkindDefaultForDtoD(void) { } for (int i=0; i < NumDevices; ++i) { + HIP_CHECK(hipSetDevice(i)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i], static_cast(A_d[i]), diff --git a/projects/hip-tests/catch/unit/memory/hipSVMTestByteGranularity.cpp b/projects/hip-tests/catch/unit/memory/hipSVMTestByteGranularity.cpp index 9f3a896140..207bf7d1b3 100644 --- a/projects/hip-tests/catch/unit/memory/hipSVMTestByteGranularity.cpp +++ b/projects/hip-tests/catch/unit/memory/hipSVMTestByteGranularity.cpp @@ -109,6 +109,7 @@ TEST_CASE("test_svm_byte_granularity") { // get all the devices going simultaneously for(unsigned int d = 0; d < num_devices; d++) // device ids starting at 1. { + HIP_CHECK(hipSetDevice(d)); write_owned_locations<<>>(pA, num_devices_plus_host, d); HIP_CHECK(hipGetLastError()); } @@ -125,6 +126,7 @@ TEST_CASE("test_svm_byte_granularity") { size_t adjusted_num_elements = num_elements - num_devices; for(unsigned int d = 0; d < num_devices; d++) { + HIP_CHECK(hipSetDevice(d)); sum_neighbor_locations<<>>(pA, num_devices_plus_host, error_counts[d]); HIP_CHECK(hipGetLastError()); diff --git a/projects/hip-tests/catch/unit/memory/hipSVMTestFineGrainMemoryConsistency.cpp b/projects/hip-tests/catch/unit/memory/hipSVMTestFineGrainMemoryConsistency.cpp index cd5dd8fa6a..655327ba50 100644 --- a/projects/hip-tests/catch/unit/memory/hipSVMTestFineGrainMemoryConsistency.cpp +++ b/projects/hip-tests/catch/unit/memory/hipSVMTestFineGrainMemoryConsistency.cpp @@ -129,6 +129,7 @@ void launch_kernels_and_verify(std::vector &streams, unsigned int n // all the pixels. for(unsigned int d=0; d < num_devices; d++) { + HIP_CHECK(hipSetDevice(d)); build_hash_table_on_device<<<(num_pixels + 255) / 256, 256, 0, streams[d]>>>( pInputImage, num_pixels, pNodes, pNumNodes, numBins, d); HIP_CHECK(hipGetLastError()); diff --git a/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp b/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp index 83dc5b8709..ee7944fc97 100644 --- a/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp +++ b/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp @@ -208,6 +208,7 @@ TEST_CASE("test_svm_shared_address_space_fine_grain_buffers") { } else { + HIP_CHECK(hipSetDevice(ci)); create_linked_lists_on_device(streams[ci], pNodes, pAllocator, numLists, ListLength); } @@ -218,6 +219,7 @@ TEST_CASE("test_svm_shared_address_space_fine_grain_buffers") { } else { + HIP_CHECK(hipSetDevice(vi)); verify_linked_lists_on_device(streams[vi], pNodes, pNumCorrect, numLists, ListLength); } diff --git a/projects/hip-tests/catch/unit/memory/hipStreamAttachMemAsync.cc b/projects/hip-tests/catch/unit/memory/hipStreamAttachMemAsync.cc index df9c5895b7..f2b0a9a0ea 100644 --- a/projects/hip-tests/catch/unit/memory/hipStreamAttachMemAsync.cc +++ b/projects/hip-tests/catch/unit/memory/hipStreamAttachMemAsync.cc @@ -87,6 +87,9 @@ TEST_CASE("Unit_hipStreamAttachMemAsync_Positive_AttachGlobal") { HIP_CHECK(hipStreamSynchronize(nullptr)); for (int i = 0; i < stream_count; ++i) { + if (device_count > 1) { + HIP_CHECK(hipSetDevice(i)); + } HipTest::launchKernel(Set, 1, 1, 0, streams.at(i)->stream(), managed_global.ptr() + i, i); } diff --git a/projects/hip-tests/catch/unit/memory/mempool_common.hh b/projects/hip-tests/catch/unit/memory/mempool_common.hh index 50d0a2f569..1a01f567b9 100644 --- a/projects/hip-tests/catch/unit/memory/mempool_common.hh +++ b/projects/hip-tests/catch/unit/memory/mempool_common.hh @@ -407,6 +407,7 @@ class streamMemAllocTest { dim3(THREADS_PER_BLOCK), 0, stream, static_cast(A_d), static_cast(B_d), C_d, size); + HIP_CHECK(hipGetLastError()); } // Transfer data from device to host asynchronously. void transferFromMempool(hipStream_t stream) {