From cd7727d007cc6b17e7bfedcfdf2c7330b56ff450 Mon Sep 17 00:00:00 2001
From: foreman
Date: Mon, 29 Aug 2016 18:31:20 -0400
Subject: [PATCH] P4 to Git Change 1308294 by gandryey@gera-w8 on 2016/08/29
18:22:03
SWDEV-101206 - [CQE OCL][Perf][G][QR] Upto ~9% Performance drop observed while running Video Composition subtest of Compubench; Faulty CL#1306133
- Use the original logic without DMA flush. Flush on staging write helps with a blocking op only, but currently VDI doesn't have that information.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpublit.cpp#122 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#7 edit
---
rocclr/runtime/device/gpu/gpublit.cpp | 6 +++++-
rocclr/runtime/device/pal/palblit.cpp | 6 +++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/rocclr/runtime/device/gpu/gpublit.cpp b/rocclr/runtime/device/gpu/gpublit.cpp
index 8375388911..9783b6cc38 100644
--- a/rocclr/runtime/device/gpu/gpublit.cpp
+++ b/rocclr/runtime/device/gpu/gpublit.cpp
@@ -60,6 +60,7 @@ DmaBlitManager::readMemoryStaged(
else if (xferSize > 256 * Ki) {
chunkSize = std::min(amd::alignUp(xferSize / 4, 256),
dev().xferRead().bufSize());
+ chunkSize = std::max(chunkSize, 128 * Ki);
}
else {
chunkSize = xferSize;
@@ -319,7 +320,9 @@ DmaBlitManager::writeMemoryStaged(
size_t chunkSize;
static const bool CopyRect = false;
// Flush DMA for ASYNC copy
- static const bool FlushDMA = true;
+ // @todo Blocking write requires a flush to start earlier,
+ // but currently VDI doesn't provide that info
+ static const bool FlushDMA = false;
if (dev().xferRead().bufSize() < 128 * Ki) {
chunkSize = dev().xferWrite().bufSize();
@@ -327,6 +330,7 @@ DmaBlitManager::writeMemoryStaged(
else if (xferSize > 256 * Ki) {
chunkSize = std::min(amd::alignUp(xferSize / 4, 256),
dev().xferWrite().bufSize());
+ chunkSize = std::max(chunkSize, 128 * Ki);
}
else {
chunkSize = xferSize;
diff --git a/rocclr/runtime/device/pal/palblit.cpp b/rocclr/runtime/device/pal/palblit.cpp
index 8e210acb21..5befc73ec8 100644
--- a/rocclr/runtime/device/pal/palblit.cpp
+++ b/rocclr/runtime/device/pal/palblit.cpp
@@ -59,6 +59,7 @@ DmaBlitManager::readMemoryStaged(
else if (xferSize > 256 * Ki) {
chunkSize = std::min(amd::alignUp(xferSize / 4, 256),
dev().xferRead().bufSize());
+ chunkSize = std::max(chunkSize, 128 * Ki);
}
else {
chunkSize = xferSize;
@@ -317,7 +318,9 @@ DmaBlitManager::writeMemoryStaged(
size_t chunkSize;
static const bool CopyRect = false;
// Flush DMA for ASYNC copy
- static const bool FlushDMA = true;
+ // @todo Blocking write requires a flush to start earlier,
+ // but currently VDI doesn't provide that info
+ static const bool FlushDMA = false;
if (dev().xferRead().bufSize() < 128 * Ki) {
chunkSize = dev().xferWrite().bufSize();
@@ -325,6 +328,7 @@ DmaBlitManager::writeMemoryStaged(
else if (xferSize > 256 * Ki) {
chunkSize = std::min(amd::alignUp(xferSize / 4, 256),
dev().xferWrite().bufSize());
+ chunkSize = std::max(chunkSize, 128 * Ki);
}
else {
chunkSize = xferSize;