Fix memory async copy test performance issue

Copying memory from device to host with a CPU agent
would cause a poor performance due to the reading of
uncahced device memory by CPU.

Fix it by using a GPU agent.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Change-Id: Ia3b562758fe73ef9efaa284f47e67bf569cc7b7b


[ROCm/ROCR-Runtime commit: 8501c0bcb1]
Cette révision appartient à :
Lang Yu
2023-02-10 13:07:42 +08:00
Parent 1ff8eae7be
révision d44cbfd4ad
2 fichiers modifiés avec 17 ajouts et 16 suppressions
+12 -11
Voir le fichier
@@ -175,18 +175,20 @@ hsa_status_t AcquireAccess(hsa_agent_t agent,
// succeeds a pointer to the second agent will be returned. If it fails, a
// nullptr will be returned.
hsa_agent_t *
AcquireAsyncCopyAccess(
MemoryAsyncCopy::AcquireAsyncCopyAccess(
void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag) {
if (AcquireAccess(*dst_ag, src_pool, src_ptr) != HSA_STATUS_SUCCESS) {
if (AcquireAccess(*src_ag, dst_pool, dst_ptr) == HSA_STATUS_SUCCESS) {
return src_ag;
} else {
return nullptr;
}
} else {
hsa_status_t err;
err = AcquireAccess(*src_ag, dst_pool, dst_ptr);
if (err == HSA_STATUS_SUCCESS && src_ag->handle != cpu_agent_.handle)
return src_ag;
err = AcquireAccess(*dst_ag, src_pool, src_ptr);
if (err == HSA_STATUS_SUCCESS && dst_ag->handle != cpu_agent_.handle)
return dst_ag;
}
return &cpu_agent_;
}
void MemoryAsyncCopy::PrintTransactionType(Transaction *t) {
@@ -312,7 +314,6 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) {
std::cout << "Skipping..." << std::endl;
return;
}
ASSERT_NE(cpy_ag, nullptr);
err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag,
size, 0, NULL, s);
@@ -375,7 +376,7 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) {
err = hsa_amd_memory_async_copy(host_ptr_dst, cpu_agent_, ptr_dst,
dst_agent, size, 0, NULL, s);
dst_agent, Size[i], 0, NULL, s);
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
+5 -5
Voir le fichier
@@ -56,11 +56,6 @@
#include "hsa/hsa_ext_amd.h"
#include "suites/test_common/test_base.h"
hsa_agent_t *
AcquireAsyncCopyAccess(
void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag);
hsa_status_t AcquireAccess(hsa_agent_t agent,
hsa_amd_memory_pool_t pool, void* ptr);
typedef enum TransType
@@ -195,6 +190,11 @@ class MemoryAsyncCopy : public TestBase {
hsa_agent_t cpu_agent() const {return cpu_agent_;}
void set_cpu_agent(hsa_agent_t a) {cpu_agent_ = a;}
hsa_agent_t *
AcquireAsyncCopyAccess(
void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag);
protected:
void PrintTransactionType(Transaction *t);
#if ROCRTST_EMULATOR_BUILD