From 2d0ee10d7bfa4cc1f136ca799fbd5b84e6e97fc1 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 6 Nov 2015 16:37:38 -0500
Subject: [PATCH] P4 to Git Change 1208596 by smekhano@stas-rampitec-hsa on
2015/11/06 16:26:28
SWDEV-80874 - fixed staging buffer overflow with HSA printf
Staging buffer is ~2 times smaller than allocated printf buffer, so if amount of data in printf buffer exceeds the size of the staging buffer
we hit assertion in the memory copy. To hit the assertion that is enough to print 2 integers with 64K workitems.
Added loop to read printf buffer into staging in portions.
Testing: smoke, precheckin, conformance printf with HSAIL forced, custom tests
Reviewed by German Andreev
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprintf.cpp#41 edit
[ROCm/clr commit: e18cd1d76e8dac3d65d9e4845a0f211c31a90326]
---
.../rocclr/runtime/device/gpu/gpuprintf.cpp | 81 +++++++++++--------
1 file changed, 46 insertions(+), 35 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp
index 57893512be..ee4d41bf02 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuprintf.cpp
@@ -662,46 +662,57 @@ PrintfDbgHSA::output(
return false;
}
- // Copy the buffer data (i.e., the printfID followed by the
- //argument data for each printf call in th kernel) to the staged buffer
- if (!dbgBuffer_->partialMemCopyTo(gpu,
- amd::Coord3D(2*sizeof(uint32_t), 0, 0), amd::Coord3D(0, 0, 0),
- offsetSize,*xferBufRead_)) {
- return false;
- }
-
- // Get a pointer to the buffer data
- dbgBufferPtr = reinterpret_cast(xferBufRead_->map(&gpu));
- if (NULL == dbgBufferPtr) {
- return false;
- }
-
-
- std::vector::const_iterator ita;
- uint sb = 0;
- uint sbt = 0;
-
- // parse the debug buffer
- while (sbt < offsetSize) {
- assert(((*dbgBufferPtr) < printfInfo.size()) &&
- "Cound't find the reported PrintfID!");
- const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
- sb += sizeof(uint32_t);
- for (ita = info.arguments_.begin();
- ita != info.arguments_.end(); ++ita){
- sb += *ita;
+ size_t bufSize = dev().xferRead().bufSize();
+ size_t copySize = offsetSize;
+ while (copySize != 0) {
+ // Copy the buffer data (i.e., the printfID followed by the
+ //argument data for each printf call in th kernel) to the staged buffer
+ if (!dbgBuffer_->partialMemCopyTo(gpu,
+ amd::Coord3D(2*sizeof(uint32_t) + offsetSize - copySize, 0, 0),
+ amd::Coord3D(0, 0, 0),
+ std::min(copySize, bufSize), *xferBufRead_)) {
+ return false;
}
- size_t idx = 1;
- // There's something in the debug buffer
- outputDbgBuffer(info, dbgBufferPtr, idx);
+ // Get a pointer to the buffer data
+ dbgBufferPtr = reinterpret_cast(xferBufRead_->map(&gpu));
+ if (NULL == dbgBufferPtr) {
+ return false;
+ }
- sbt += sb;
- dbgBufferPtr += sb/sizeof(uint32_t);
- sb = 0;
+
+ std::vector::const_iterator ita;
+ uint sb = 0;
+ uint sbt = 0;
+
+ // parse the debug buffer
+ while (sbt < copySize) {
+ assert(((*dbgBufferPtr) < printfInfo.size()) &&
+ "Cound't find the reported PrintfID!");
+ const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
+ sb += sizeof(uint32_t);
+ for (ita = info.arguments_.begin();
+ ita != info.arguments_.end(); ++ita){
+ sb += *ita;
+ }
+
+ if (sbt + sb > bufSize) {
+ break; // Need new portion of data in staging buffer
+ }
+
+ size_t idx = 1;
+ // There's something in the debug buffer
+ outputDbgBuffer(info, dbgBufferPtr, idx);
+
+ sbt += sb;
+ dbgBufferPtr += sb/sizeof(uint32_t);
+ sb = 0;
+ }
+
+ copySize -= sbt;
+ xferBufRead_->unmap(&gpu);
}
- xferBufRead_->unmap(&gpu);
dev().xferRead().release(gpu, *xferBufRead_);
}