From 621f70b3724654e1f810de44a080c60cc4e9012a Mon Sep 17 00:00:00 2001
From: foreman
Date: Wed, 9 Nov 2016 10:55:17 -0500
Subject: [PATCH] P4 to Git Change 1338735 by gandryey@gera-w8 on 2016/11/09
10:46:19
SWDEV-86035 - Add PAL backend to OpenCL
- Enable split logic in PAL
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#38 edit
---
rocclr/runtime/device/pal/palsettings.cpp | 38 +++++++++++++++++++++--
rocclr/runtime/device/pal/palvirtual.cpp | 12 +++++--
2 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp
index 12ff991110..c76d835ded 100644
--- a/rocclr/runtime/device/pal/palsettings.cpp
+++ b/rocclr/runtime/device/pal/palsettings.cpp
@@ -9,6 +9,11 @@
#include
+#if defined(_WIN32)
+#include "Windows.h"
+#include "VersionHelpers.h"
+#endif
+
namespace pal {
/*! \brief information for adjusting maximum workload time
@@ -20,6 +25,9 @@ struct ModifyMaxWorkload
{
uint32_t time; //!< max work load time (10x ms)
uint32_t minorVersion; //!< OS minor version
+#if defined(_WIN32)
+ BYTE comparisonOps; //!< Comparison option
+#endif
};
@@ -104,7 +112,7 @@ Settings::Settings()
numComputeRings_ = 0;
minWorkloadTime_ = 1; // 0.1 ms
- maxWorkloadTime_ = 5000; // 500 ms
+ maxWorkloadTime_ = 500000; // 500 ms
// Controls tiled images in persistent
//!@note IOL for Linux doesn't setup tiling aperture in CMM/QS
@@ -171,6 +179,13 @@ Settings::create(
if (!aiPlus_) {
// APU systems for VI
apuSystem_ = true;
+ // Fix BSOD/TDR issues observed on Stoney Win7/8.1/10
+ minWorkloadTime_ = 1000;
+ modifyMaxWorkload.time = 1000; // Decided by experiment
+ modifyMaxWorkload.minorVersion = 1; // Win 7
+#if defined(_WIN32)
+ modifyMaxWorkload.comparisonOps = VER_EQUAL; // Limit to Win 7 only
+#endif
}
case Pal::AsicRevision::Iceland:
case Pal::AsicRevision::Tonga:
@@ -192,8 +207,11 @@ Settings::create(
// APU systems for CI
apuSystem_ = true;
// Fix BSOD/TDR issues observed on Kaveri Win7 (EPR#416903)
- modifyMaxWorkload.time = 2500; // 250ms
+ modifyMaxWorkload.time = 250000; // 250ms
modifyMaxWorkload.minorVersion = 1; // Win 7
+#if defined(_WIN32)
+ modifyMaxWorkload.comparisonOps = VER_EQUAL; // limit to Win 7
+#endif
}
// Fall through ...
case Pal::AsicRevision::Bonaire:
@@ -258,6 +276,22 @@ Settings::create(
return false;
}
+#if defined(_WIN32)
+ if (modifyMaxWorkload.time > 0) {
+ OSVERSIONINFOEX versionInfo = { 0 };
+ versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
+ versionInfo.dwMajorVersion = 6;
+ versionInfo.dwMinorVersion = modifyMaxWorkload.minorVersion;
+
+ DWORDLONG conditionMask = 0;
+ VER_SET_CONDITION(conditionMask, VER_MAJORVERSION, modifyMaxWorkload.comparisonOps);
+ VER_SET_CONDITION(conditionMask, VER_MINORVERSION, modifyMaxWorkload.comparisonOps);
+ if (VerifyVersionInfo(&versionInfo, VER_MAJORVERSION | VER_MINORVERSION, conditionMask)) {
+ maxWorkloadTime_ = modifyMaxWorkload.time;
+ }
+ }
+#endif // defined(_WIN32)
+
// Enable atomics support
enableExtension(ClKhrInt64BaseAtomics);
enableExtension(ClKhrInt64ExtendedAtomics);
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index fc8b368131..917e7ecc64 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -468,7 +468,7 @@ VirtualGPU::DmaFlushMgmt::DmaFlushMgmt(const Device& dev)
aluCnt_ = dev.info().simdPerCU_ * dev.info().simdWidth_ * dev.info().maxComputeUnits_;
maxDispatchWorkload_ = static_cast(dev.info().maxClockFrequency_) *
// find time in us
- 100 * dev.settings().maxWorkloadTime_ *
+ dev.settings().maxWorkloadTime_ *
aluCnt_;
resetCbWorkload(dev);
}
@@ -479,7 +479,7 @@ VirtualGPU::DmaFlushMgmt::resetCbWorkload(const Device& dev)
cbWorkload_ = 0;
maxCbWorkload_ = static_cast(dev.info().maxClockFrequency_) *
// find time in us
- 100 * dev.settings().minWorkloadTime_ * aluCnt_;
+ dev.settings().minWorkloadTime_ * aluCnt_;
}
void
@@ -1995,6 +1995,12 @@ VirtualGPU::submitKernelInternal(
dbgManager->allocParamMemList(numParams);
}
+ bool needFlush = false;
+ dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
+ if (dmaFlushMgmt().dispatchSplitSize() != 0) {
+ needFlush = true;
+ }
+
size_t newOffset[3] = {0, 0, 0};
size_t newGlobalSize[3] = {0, 0, 0};
@@ -2296,7 +2302,7 @@ VirtualGPU::submitKernelInternal(
}
// Update the global GPU event
- setGpuEvent(gpuEvent);
+ setGpuEvent(gpuEvent, needFlush);
if (!printfDbgHSA().output(*this, printfEnabled, hsaKernel.printfInfo())) {
LogError("Couldn't read printf data from the buffer!\n");