From cfec8e3e2a6f5013ebea79894d38eabc408b5390 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 12 Jan 2017 17:49:08 -0500
Subject: [PATCH] P4 to Git Change 1361404 by asalmanp@asalmanp-opencl-stg on
2017/01/12 17:37:52
SWDEV-107075 - [OCL][PAL] Making some changes for partialMemCopyTo function to move out the sDMA restrictions to out of the main if condition.
ReviewBoardURL = http://ocltc.amd.com/reviews/r/12159/
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#21 edit
[ROCm/clr commit: cff6f0b79ae47511918f50ca655c2bc6133245b0]
---
.../rocclr/runtime/device/pal/palresource.cpp | 71 +++++++++++--------
1 file changed, 40 insertions(+), 31 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
index f72fc39d83..598ba7bcac 100644
--- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
@@ -1245,11 +1245,6 @@ Resource::partialMemCopyTo(
assert(!(desc().cardMemory_ && dstResource.desc().cardMemory_) &&
"Unsupported configuraiton!");
- gpu.engineID_ = SdmaEngine;
-
- // Wait for the resources, since runtime may use async transfers
- wait(gpu, waitOnBusyEngine);
- dstResource.wait(gpu, waitOnBusyEngine);
size_t calSrcOrigin[3], calDstOrigin[3], calSize[3];
calSrcOrigin[0] = srcOrigin[0] + pinOffset();
@@ -1262,6 +1257,42 @@ Resource::partialMemCopyTo(
calSize[1] = size[1];
calSize[2] = size[2];
+ uint64_t gpuMemoryOffset, gpuMemoryRowPitch, imageOffsetx;
+
+ if (desc().buffer_ && !dstResource.desc().buffer_) {
+ imageOffsetx = calDstOrigin[0] % dstResource.elementSize();
+ gpuMemoryOffset = calSrcOrigin[0] + offset();
+ gpuMemoryRowPitch = (calSrcOrigin[1]) ? calSrcOrigin[1] :
+ calSize[0] * dstResource.elementSize();
+ }
+ else if (!desc().buffer_ && dstResource.desc().buffer_) {
+ imageOffsetx = calSrcOrigin[0] % elementSize();
+ gpuMemoryOffset = calDstOrigin[0] + dstResource.offset();
+ gpuMemoryRowPitch = (calDstOrigin[1]) ? calDstOrigin[1] :
+ calSize[0] * elementSize();
+ }
+
+ if ((desc().buffer_ && !dstResource.desc().buffer_) ||
+ (!desc().buffer_ && dstResource.desc().buffer_)) {
+
+ //sDMA cannot be used for the below conditions
+ // Make sure linear pitch in bytes is 4 bytes aligned
+ if (((gpuMemoryRowPitch % 4) != 0) ||
+ // another DRM restriciton... SI has 4 pixels
+ (gpuMemoryOffset % 4 != 0) ||
+ (dev().settings().sdamPageFaultWar_ &&
+ (imageOffsetx != 0))) {
+ return false;
+ }
+
+ }
+
+ gpu.engineID_ = SdmaEngine;
+
+ // Wait for the resources, since runtime may use async transfers
+ wait(gpu, waitOnBusyEngine);
+ dstResource.wait(gpu, waitOnBusyEngine);
+
if (gpu.validateSdmaOverlap(*this, dstResource)) {
gpu.flushDMA(SdmaEngine);
}
@@ -1281,23 +1312,12 @@ Resource::partialMemCopyTo(
copyRegion.imageExtent.height = calSize[1];
copyRegion.imageExtent.depth = calSize[2];
copyRegion.numSlices = 1;
- copyRegion.gpuMemoryOffset = calSrcOrigin[0] + offset();
- copyRegion.gpuMemoryRowPitch = (calSrcOrigin[1]) ? calSrcOrigin[1] :
- calSize[0] * dstResource.elementSize();
+ copyRegion.gpuMemoryOffset = gpuMemoryOffset;
+ copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
copyRegion.gpuMemoryDepthPitch = (calSrcOrigin[2]) ? calSrcOrigin[2] :
copyRegion.gpuMemoryRowPitch * calSize[1];
- // Make sure linear pitch in bytes is 4 bytes aligned
- if (((copyRegion.gpuMemoryRowPitch % 4) != 0) ||
- // another DRM restriciton... SI has 4 pixels
- (copyRegion.gpuMemoryOffset % 4 != 0) ||
- (dev().settings().sdamPageFaultWar_ &&
- (copyRegion.imageOffset.x % dstResource.elementSize() != 0))) {
- result = false;
- }
- else {
gpu.iCmd()->CmdCopyMemoryToImage(*iMem(), *dstResource.image_,
imgLayout, 1, ©Region);
- }
}
else if (!desc().buffer_ && dstResource.desc().buffer_) {
Pal::MemoryImageCopyRegion copyRegion = {};
@@ -1310,23 +1330,12 @@ Resource::partialMemCopyTo(
copyRegion.imageExtent.height = calSize[1];
copyRegion.imageExtent.depth = calSize[2];
copyRegion.numSlices = 1;
- copyRegion.gpuMemoryOffset = calDstOrigin[0] + dstResource.offset();
- copyRegion.gpuMemoryRowPitch = (calDstOrigin[1]) ? calDstOrigin[1] :
- calSize[0] * elementSize();
+ copyRegion.gpuMemoryOffset = gpuMemoryOffset;
+ copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
copyRegion.gpuMemoryDepthPitch = (calDstOrigin[2]) ? calDstOrigin[2] :
copyRegion.gpuMemoryRowPitch * calSize[1];
- // Make sure linear pitch in bytes is 4 bytes aligned
- if (((copyRegion.gpuMemoryRowPitch % 4) != 0) ||
- // another DRM restriciton... SI has 4 pixels
- (copyRegion.gpuMemoryOffset % 4 != 0) ||
- (dev().settings().sdamPageFaultWar_ &&
- (copyRegion.imageOffset.x % elementSize() != 0))) {
- result = false;
- }
- else {
gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout,
*dstResource.iMem(), 1, ©Region);
- }
}
else {
if (enableCopyRect) {