From cd7727d007cc6b17e7bfedcfdf2c7330b56ff450 Mon Sep 17 00:00:00 2001 From: foreman Date: Mon, 29 Aug 2016 18:31:20 -0400 Subject: [PATCH] P4 to Git Change 1308294 by gandryey@gera-w8 on 2016/08/29 18:22:03 SWDEV-101206 - [CQE OCL][Perf][G][QR] Upto ~9% Performance drop observed while running Video Composition subtest of Compubench; Faulty CL#1306133 - Use the original logic without DMA flush. Flush on staging write helps with a blocking op only, but currently VDI doesn't have that information. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#122 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#7 edit --- rocclr/runtime/device/gpu/gpublit.cpp | 6 +++++- rocclr/runtime/device/pal/palblit.cpp | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/rocclr/runtime/device/gpu/gpublit.cpp b/rocclr/runtime/device/gpu/gpublit.cpp index 8375388911..9783b6cc38 100644 --- a/rocclr/runtime/device/gpu/gpublit.cpp +++ b/rocclr/runtime/device/gpu/gpublit.cpp @@ -60,6 +60,7 @@ DmaBlitManager::readMemoryStaged( else if (xferSize > 256 * Ki) { chunkSize = std::min(amd::alignUp(xferSize / 4, 256), dev().xferRead().bufSize()); + chunkSize = std::max(chunkSize, 128 * Ki); } else { chunkSize = xferSize; @@ -319,7 +320,9 @@ DmaBlitManager::writeMemoryStaged( size_t chunkSize; static const bool CopyRect = false; // Flush DMA for ASYNC copy - static const bool FlushDMA = true; + // @todo Blocking write requires a flush to start earlier, + // but currently VDI doesn't provide that info + static const bool FlushDMA = false; if (dev().xferRead().bufSize() < 128 * Ki) { chunkSize = dev().xferWrite().bufSize(); @@ -327,6 +330,7 @@ DmaBlitManager::writeMemoryStaged( else if (xferSize > 256 * Ki) { chunkSize = std::min(amd::alignUp(xferSize / 4, 256), dev().xferWrite().bufSize()); + chunkSize = std::max(chunkSize, 128 * Ki); } else { chunkSize = xferSize; diff --git a/rocclr/runtime/device/pal/palblit.cpp b/rocclr/runtime/device/pal/palblit.cpp index 8e210acb21..5befc73ec8 100644 --- a/rocclr/runtime/device/pal/palblit.cpp +++ b/rocclr/runtime/device/pal/palblit.cpp @@ -59,6 +59,7 @@ DmaBlitManager::readMemoryStaged( else if (xferSize > 256 * Ki) { chunkSize = std::min(amd::alignUp(xferSize / 4, 256), dev().xferRead().bufSize()); + chunkSize = std::max(chunkSize, 128 * Ki); } else { chunkSize = xferSize; @@ -317,7 +318,9 @@ DmaBlitManager::writeMemoryStaged( size_t chunkSize; static const bool CopyRect = false; // Flush DMA for ASYNC copy - static const bool FlushDMA = true; + // @todo Blocking write requires a flush to start earlier, + // but currently VDI doesn't provide that info + static const bool FlushDMA = false; if (dev().xferRead().bufSize() < 128 * Ki) { chunkSize = dev().xferWrite().bufSize(); @@ -325,6 +328,7 @@ DmaBlitManager::writeMemoryStaged( else if (xferSize > 256 * Ki) { chunkSize = std::min(amd::alignUp(xferSize / 4, 256), dev().xferWrite().bufSize()); + chunkSize = std::max(chunkSize, 128 * Ki); } else { chunkSize = xferSize;