From bfbc8cd09b897d3430007442682f707f2eced009 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Tue, 5 May 2020 15:44:59 -0400 Subject: [PATCH] SWDEV-234684 - hipmemcpy optimization does not work in tests Change-Id: I899d172c5b2af88c796fe9a36f97d15ac45caf94 --- rocclr/device/rocm/rocblit.cpp | 8 ++++---- rocclr/device/rocm/rocdevice.cpp | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 3a7285c2ad..7d12d50dc4 100755 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -1658,8 +1658,8 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost, if ((srcMemory.owner()->getHostMem() == nullptr) && (srcMemory.owner()->getSvmPtr() != nullptr)) { // CPU read ahead, hence release GPU memory gpu().releaseGpuMemoryFence(); - void* src = srcMemory.owner()->getSvmPtr(); - std::memcpy(dstHost, src, size[0]); + char* src = reinterpret_cast(srcMemory.owner()->getSvmPtr()); + std::memcpy(dstHost, src + origin[0], size[0]); // Set HASPENDINGDISPATCH_ FLAG. That will force L2 invalidation on flush gpu().hasPendingDispatch(); return true; @@ -1763,8 +1763,8 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo if ((dstMemory.owner()->getHostMem() == nullptr) && (dstMemory.owner()->getSvmPtr() != nullptr)) { // CPU read ahead, hence release GPU memory gpu().releaseGpuMemoryFence(); - void* dst = dstMemory.owner()->getSvmPtr(); - std::memcpy(dst, srcHost, size[0]); + char* dst = reinterpret_cast(dstMemory.owner()->getSvmPtr()); + std::memcpy(dst + origin[0], srcHost, size[0]); // Set HASPENDINGDISPATCH_ FLAG. Then releaseGpuMemoryFence() will use barrier to invalidate cache gpu().hasPendingDispatch(); gpu().releaseGpuMemoryFence(); diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index f15fc0449f..aea8c403c7 100755 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -906,8 +906,7 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo if (tmp == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) { dev->info_.largeBar_ = false; } else { - // Disable smallCopy optimization for now - dev->info_.largeBar_ = false; + dev->info_.largeBar_ = true; } }