From 4312ca9e32fbb0d3a1646cd0eda8563caf4cfe0d Mon Sep 17 00:00:00 2001 From: victzhan Date: Tue, 15 Aug 2023 15:54:31 -0400 Subject: [PATCH] SWDEV-416580 - Add condition when memory has direct access, only use host fill if image is small Change-Id: I3509c4aa21f6413adad3b46273ec650f5c577ddd [ROCm/clr commit: cb426df1bd2cf2f2b2aacde95c73be04e65e0675] --- projects/clr/rocclr/device/pal/palblit.cpp | 9 ++++++--- projects/clr/rocclr/device/rocm/rocblit.cpp | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/projects/clr/rocclr/device/pal/palblit.cpp b/projects/clr/rocclr/device/pal/palblit.cpp index 3074e11d19..9d88b5e84a 100644 --- a/projects/clr/rocclr/device/pal/palblit.cpp +++ b/projects/clr/rocclr/device/pal/palblit.cpp @@ -2325,9 +2325,12 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern, bool entire) const { amd::ScopedLock k(lockXferOps_); bool result = false; - - // Use host fill if memory has direct access - if (setup_.disableFillImage_ || gpuMem(memory).isHostMemDirectAccess()) { + constexpr size_t kFillImageThreshold = 256 * 256; + + // Use host fill if memory has direct access and image is small + if (setup_.disableFillImage_ || + (gpuMem(memory).isHostMemDirectAccess() && + (size.c[0] * size.c[1] * size.c[2]) <= kFillImageThreshold)) { gpu().releaseGpuMemoryFence(); result = HostBlitManager::fillImage(memory, pattern, origin, size, entire); diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index c1033007e4..befb7b6226 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -2352,9 +2352,12 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern, amd::ScopedLock k(lockXferOps_); bool result = false; - - // Use host fill if memory has direct access - if (setup_.disableFillImage_ || memory.isHostMemDirectAccess()) { + constexpr size_t kFillImageThreshold = 256 * 256; + + // Use host fill if memory has direct access and image is small + if (setup_.disableFillImage_ || + (gpuMem(memory).isHostMemDirectAccess() && + (size.c[0] * size.c[1] * size.c[2]) <= kFillImageThreshold)) { // Stall GPU before CPU access gpu().releaseGpuMemoryFence(); result = HostBlitManager::fillImage(memory, pattern, origin, size, entire);