Fix memory async copy test performance issue
Copying memory from device to host with a CPU agent
would cause a poor performance due to the reading of
uncahced device memory by CPU.
Fix it by using a GPU agent.
Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Change-Id: Ia3b562758fe73ef9efaa284f47e67bf569cc7b7b
[ROCm/ROCR-Runtime commit: 8501c0bcb1]
Cette révision appartient à :
@@ -175,18 +175,20 @@ hsa_status_t AcquireAccess(hsa_agent_t agent,
|
||||
// succeeds a pointer to the second agent will be returned. If it fails, a
|
||||
// nullptr will be returned.
|
||||
hsa_agent_t *
|
||||
AcquireAsyncCopyAccess(
|
||||
MemoryAsyncCopy::AcquireAsyncCopyAccess(
|
||||
void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
|
||||
void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag) {
|
||||
if (AcquireAccess(*dst_ag, src_pool, src_ptr) != HSA_STATUS_SUCCESS) {
|
||||
if (AcquireAccess(*src_ag, dst_pool, dst_ptr) == HSA_STATUS_SUCCESS) {
|
||||
return src_ag;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
hsa_status_t err;
|
||||
|
||||
err = AcquireAccess(*src_ag, dst_pool, dst_ptr);
|
||||
if (err == HSA_STATUS_SUCCESS && src_ag->handle != cpu_agent_.handle)
|
||||
return src_ag;
|
||||
|
||||
err = AcquireAccess(*dst_ag, src_pool, src_ptr);
|
||||
if (err == HSA_STATUS_SUCCESS && dst_ag->handle != cpu_agent_.handle)
|
||||
return dst_ag;
|
||||
}
|
||||
|
||||
return &cpu_agent_;
|
||||
}
|
||||
|
||||
void MemoryAsyncCopy::PrintTransactionType(Transaction *t) {
|
||||
@@ -312,7 +314,6 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) {
|
||||
std::cout << "Skipping..." << std::endl;
|
||||
return;
|
||||
}
|
||||
ASSERT_NE(cpy_ag, nullptr);
|
||||
|
||||
err = hsa_amd_memory_async_copy(ptr_src, *cpy_ag, host_ptr_src, *cpy_ag,
|
||||
size, 0, NULL, s);
|
||||
@@ -375,7 +376,7 @@ void MemoryAsyncCopy::RunBenchmarkWithVerification(Transaction *t) {
|
||||
|
||||
|
||||
err = hsa_amd_memory_async_copy(host_ptr_dst, cpu_agent_, ptr_dst,
|
||||
dst_agent, size, 0, NULL, s);
|
||||
dst_agent, Size[i], 0, NULL, s);
|
||||
ASSERT_EQ(HSA_STATUS_SUCCESS, err);
|
||||
|
||||
while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
|
||||
@@ -56,11 +56,6 @@
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include "suites/test_common/test_base.h"
|
||||
|
||||
hsa_agent_t *
|
||||
AcquireAsyncCopyAccess(
|
||||
void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
|
||||
void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag);
|
||||
|
||||
hsa_status_t AcquireAccess(hsa_agent_t agent,
|
||||
hsa_amd_memory_pool_t pool, void* ptr);
|
||||
typedef enum TransType
|
||||
@@ -195,6 +190,11 @@ class MemoryAsyncCopy : public TestBase {
|
||||
hsa_agent_t cpu_agent() const {return cpu_agent_;}
|
||||
void set_cpu_agent(hsa_agent_t a) {cpu_agent_ = a;}
|
||||
|
||||
hsa_agent_t *
|
||||
AcquireAsyncCopyAccess(
|
||||
void *dst_ptr, hsa_amd_memory_pool_t dst_pool, hsa_agent_t *dst_ag,
|
||||
void *src_ptr, hsa_amd_memory_pool_t src_pool, hsa_agent_t *src_ag);
|
||||
|
||||
protected:
|
||||
void PrintTransactionType(Transaction *t);
|
||||
#if ROCRTST_EMULATOR_BUILD
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur