diff --git a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc index 1ad0a2ddbd..e83bb06cf6 100755 --- a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc +++ b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc @@ -214,6 +214,10 @@ void MemoryAsyncCopy::PrintTransactionType(Transaction *t) { printf("(Remote Device To Host)\n"); break; + case P2PRemote: + printf("(Peer To Remote Peer)\n"); + break; + default: printf("**Unexpected path**\n"); return; @@ -288,6 +292,8 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) { if (cpy_ag == nullptr) { std::cout << "Agents " << t->src << " and " << t->dst << "cannot access each other's pool." << std::endl; + std::cout << "Skipping..." << std::endl; + return; } ASSERT_NE(cpy_ag, nullptr); @@ -309,8 +315,9 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) { if (cpy_ag == nullptr) { std::cout << "Owner agents for pools" << t->src << " and " << t->dst << " cannot access each other's pool." << std::endl; + std::cout << "Skipping..." << std::endl; + return; } - ASSERT_NE(cpy_ag, nullptr); for (int i = 0; i < kNumGranularity; i++) { if (Size[i] > size) { @@ -434,8 +441,26 @@ void MemoryAsyncCopy::DisplayBenchmark(Transaction *t) const { printf("Peer-To-Peer) =============================\n"); break; + case P2PRemote: + printf("(Peer-To-Remote-Peer) =====================\n"); + break; + + case H2DRemote: + printf("(Host-To-Remote-Device) ===================\n"); + break; + + case D2HRemote: + printf("(Device-To-Remote-Host) ===================\n"); + break; + default: - ASSERT_EQ(t->type == H2D || t->type == D2H || t->type == P2P, true); + ASSERT_TRUE(false) << "Unexpected Transaction value:" << t->type << + std::endl; + } + + if ((*t->benchmark_copy_time).size() == 0) { + printf("Skipped...\n"); + return; } if (verified_) { std::cout << "Verification: Pass" << std::endl; diff --git a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy_numa.cc b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy_numa.cc index bfd83527f0..cfa8f7e4a4 100755 --- a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy_numa.cc +++ b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy_numa.cc @@ -170,21 +170,23 @@ void MemoryAsyncCopyNUMA::RunBenchmarkWithVerification(Transaction *t) { err = hsa_amd_memory_lock(local_alloc, size, &gpu_agent, 1, &locked_mem); ASSERT_EQ(HSA_STATUS_SUCCESS, err); - if (t->type == D2H) { + if (t->type == D2H || D2HRemote) { err = hsa_amd_memory_pool_allocate(src_pool, size, 0, &ptr_src); ASSERT_EQ(HSA_STATUS_SUCCESS, err); ptr_dst = locked_mem; - } else { - ASSERT_EQ(H2D, t->type); + } else if (t->type == H2D || H2DRemote) { err = hsa_amd_memory_pool_allocate(dst_pool, size, 0, &ptr_dst); ASSERT_EQ(HSA_STATUS_SUCCESS, err); ptr_src = locked_mem; + } else { + ASSERT_EQ(t->type, P2P); + std::cout << "Skipping P2P for NUMA test" << std::endl; + return; } ASSERT_EQ(HSA_STATUS_SUCCESS, err); - void* host_ptr_src = NULL; void* host_ptr_dst = NULL; err = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, @@ -242,6 +244,8 @@ void MemoryAsyncCopyNUMA::RunBenchmarkWithVerification(Transaction *t) { if (cpy_ag == nullptr) { std::cout << "Agents " << t->src << " and " << t->dst << "cannot access each other's pool." << std::endl; + std::cout << "Skipping..." << std::endl; + return; } ASSERT_NE(cpy_ag, nullptr); @@ -260,10 +264,12 @@ void MemoryAsyncCopyNUMA::RunBenchmarkWithVerification(Transaction *t) { if (cpy_ag == nullptr) { std::cout << "Agents " << t->src << " and " << t->dst << "cannot access each other's pool." << std::endl; + std::cout << "Skipping..." << std::endl; + return; } ASSERT_NE(cpy_ag, nullptr); - err = hsa_amd_memory_async_copy(ptr_dst, *cpy_ag, host_ptr_dst, *cpy_ag, + err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag, size, 0, NULL, s); ASSERT_EQ(HSA_STATUS_SUCCESS, err); @@ -281,6 +287,16 @@ void MemoryAsyncCopyNUMA::RunBenchmarkWithVerification(Transaction *t) { ASSERT_NE(cpy_ag, nullptr); + cpy_ag = AcquireAsyncCopyAccess(ptr_dst, dst_pool, &dst_agent, + ptr_src, src_pool, &src_agent); + if (cpy_ag == nullptr) { + std::cout << "Agents " << t->src << " and " << t->dst << + "cannot access each other's pool." << std::endl; + std::cout << "Skipping..." << std::endl; + return; + } + ASSERT_NE(cpy_ag, nullptr); + for (int i = 0; i < kNumGranularity; i++) { if (Size[i] > size) { break; @@ -300,7 +316,7 @@ void MemoryAsyncCopyNUMA::RunBenchmarkWithVerification(Transaction *t) { int index = copy_timer.CreateTimer(); copy_timer.StartTimer(index); - err = hsa_amd_memory_async_copy(ptr_dst, gpu_agent, ptr_src, gpu_agent, + err = hsa_amd_memory_async_copy(ptr_dst, *cpy_ag, ptr_src, *cpy_ag, Size[i], 0, NULL, t->signal); ASSERT_EQ(HSA_STATUS_SUCCESS, err); diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc index c4a142272e..1314683af9 100755 --- a/projects/rocr-runtime/rocrtst/suites/test_common/main.cc +++ b/projects/rocr-runtime/rocrtst/suites/test_common/main.cc @@ -449,7 +449,7 @@ TEST(rocrtstPerf, ENQUEUE_LATENCY) { RunGenericTest(&multiPacketequeue); } -TEST(rocrtstPerf, Memory_Async_Copy_NUMA) { +TEST(rocrtstPerf, DISABLED_Memory_Async_Copy_NUMA) { MemoryAsyncCopyNUMA numa; RunGenericTest(&numa); } diff --git a/projects/rocr-runtime/rocrtst/suites/test_common/test_common.cc b/projects/rocr-runtime/rocrtst/suites/test_common/test_common.cc index d9da7ddebc..3c8a1ebbc0 100755 --- a/projects/rocr-runtime/rocrtst/suites/test_common/test_common.cc +++ b/projects/rocr-runtime/rocrtst/suites/test_common/test_common.cc @@ -170,6 +170,12 @@ int DumpMonitorInfo() { for (uint32_t dindx = 0; dindx < num_mon_devices; ++dindx) { auto print_frequencies = [&](rsmi_frequencies *freqs, std::string label) { + if (rsmi_ret != RSMI_STATUS_SUCCESS) { + std::cout << "get frequency call returned " << rsmi_ret << std::endl; + dump_ret = 1; + return; + } + if (print_attr_label(label)) { for (uint32_t i = 0; i < freqs->num_supported; ++i) { std::cout << "\t** " << i << ": " << @@ -214,6 +220,7 @@ int DumpMonitorInfo() { rsmi_frequencies freqs; rsmi_ret = rsmi_dev_gpu_clk_freq_get(dindx, RSMI_CLK_TYPE_SYS, &freqs); + print_frequencies(&freqs, "Supported GPU clock frequencies:\n"); rsmi_ret = rsmi_dev_gpu_clk_freq_get(dindx, RSMI_CLK_TYPE_MEM, &freqs);