From d44cbfd4adaba9f03caa8da2a122c20ca6c31cfe Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Fri, 10 Feb 2023 13:07:42 +0800 Subject: [PATCH] Fix memory async copy test performance issue Copying memory from device to host with a CPU agent would cause a poor performance due to the reading of uncahced device memory by CPU. Fix it by using a GPU agent. Signed-off-by: Lang Yu Change-Id: Ia3b562758fe73ef9efaa284f47e67bf569cc7b7b [ROCm/ROCR-Runtime commit: 8501c0bcb1ede048e4df16b5477f1137b2f96845] --- .../suites/performance/memory_async_copy.cc | 23 ++++++++++--------- .../suites/performance/memory_async_copy.h | 10 ++++---- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc index 4f6c1123c7..193d6546bc 100755 --- a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc +++ b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.cc @@ -175,18 +175,20 @@ hsa_status_t AcquireAccess(hsa_agent_t agent, // succeeds a pointer to the second agent will be returned. If it fails, a // nullptr will be returned. hsa_agent_t * -AcquireAsyncCopyAccess( +MemoryAsyncCopy::AcquireAsyncCopyAccess( void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag, void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag) { - if (AcquireAccess(*dst_ag, src_pool, src_ptr) != HSA_STATUS_SUCCESS) { - if (AcquireAccess(*src_ag, dst_pool, dst_ptr) == HSA_STATUS_SUCCESS) { - return src_ag; - } else { - return nullptr; - } - } else { + hsa_status_t err; + + err = AcquireAccess(*src_ag, dst_pool, dst_ptr); + if (err == HSA_STATUS_SUCCESS && src_ag->handle != cpu_agent_.handle) + return src_ag; + + err = AcquireAccess(*dst_ag, src_pool, src_ptr); + if (err == HSA_STATUS_SUCCESS && dst_ag->handle != cpu_agent_.handle) return dst_ag; - } + + return &cpu_agent_; } void MemoryAsyncCopy::PrintTransactionType(Transaction *t) { @@ -312,7 +314,6 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) { std::cout << "Skipping..." << std::endl; return; } - ASSERT_NE(cpy_ag, nullptr); err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag, size, 0, NULL, s); @@ -375,7 +376,7 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) { err = hsa_amd_memory_async_copy(host_ptr_dst, cpu_agent_, ptr_dst, - dst_agent, size, 0, NULL, s); + dst_agent, Size[i], 0, NULL, s); ASSERT_EQ(HSA_STATUS_SUCCESS, err); while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1, diff --git a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.h b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.h index 5ea8f393f5..aa04be8951 100755 --- a/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.h +++ b/projects/rocr-runtime/rocrtst/suites/performance/memory_async_copy.h @@ -56,11 +56,6 @@ #include "hsa/hsa_ext_amd.h" #include "suites/test_common/test_base.h" -hsa_agent_t * -AcquireAsyncCopyAccess( - void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag, - void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag); - hsa_status_t AcquireAccess(hsa_agent_t agent, hsa_amd_memory_pool_t pool, void* ptr); typedef enum TransType @@ -195,6 +190,11 @@ class MemoryAsyncCopy : public TestBase { hsa_agent_t cpu_agent() const {return cpu_agent_;} void set_cpu_agent(hsa_agent_t a) {cpu_agent_ = a;} + hsa_agent_t * + AcquireAsyncCopyAccess( + void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag, + void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag); + protected: void PrintTransactionType(Transaction *t); #if ROCRTST_EMULATOR_BUILD