From 621f70b3724654e1f810de44a080c60cc4e9012a Mon Sep 17 00:00:00 2001 From: foreman Date: Wed, 9 Nov 2016 10:55:17 -0500 Subject: [PATCH] P4 to Git Change 1338735 by gandryey@gera-w8 on 2016/11/09 10:46:19 SWDEV-86035 - Add PAL backend to OpenCL - Enable split logic in PAL Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#38 edit --- rocclr/runtime/device/pal/palsettings.cpp | 38 +++++++++++++++++++++-- rocclr/runtime/device/pal/palvirtual.cpp | 12 +++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/rocclr/runtime/device/pal/palsettings.cpp b/rocclr/runtime/device/pal/palsettings.cpp index 12ff991110..c76d835ded 100644 --- a/rocclr/runtime/device/pal/palsettings.cpp +++ b/rocclr/runtime/device/pal/palsettings.cpp @@ -9,6 +9,11 @@ #include +#if defined(_WIN32) +#include "Windows.h" +#include "VersionHelpers.h" +#endif + namespace pal { /*! \brief information for adjusting maximum workload time @@ -20,6 +25,9 @@ struct ModifyMaxWorkload { uint32_t time; //!< max work load time (10x ms) uint32_t minorVersion; //!< OS minor version +#if defined(_WIN32) + BYTE comparisonOps; //!< Comparison option +#endif }; @@ -104,7 +112,7 @@ Settings::Settings() numComputeRings_ = 0; minWorkloadTime_ = 1; // 0.1 ms - maxWorkloadTime_ = 5000; // 500 ms + maxWorkloadTime_ = 500000; // 500 ms // Controls tiled images in persistent //!@note IOL for Linux doesn't setup tiling aperture in CMM/QS @@ -171,6 +179,13 @@ Settings::create( if (!aiPlus_) { // APU systems for VI apuSystem_ = true; + // Fix BSOD/TDR issues observed on Stoney Win7/8.1/10 + minWorkloadTime_ = 1000; + modifyMaxWorkload.time = 1000; // Decided by experiment + modifyMaxWorkload.minorVersion = 1; // Win 7 +#if defined(_WIN32) + modifyMaxWorkload.comparisonOps = VER_EQUAL; // Limit to Win 7 only +#endif } case Pal::AsicRevision::Iceland: case Pal::AsicRevision::Tonga: @@ -192,8 +207,11 @@ Settings::create( // APU systems for CI apuSystem_ = true; // Fix BSOD/TDR issues observed on Kaveri Win7 (EPR#416903) - modifyMaxWorkload.time = 2500; // 250ms + modifyMaxWorkload.time = 250000; // 250ms modifyMaxWorkload.minorVersion = 1; // Win 7 +#if defined(_WIN32) + modifyMaxWorkload.comparisonOps = VER_EQUAL; // limit to Win 7 +#endif } // Fall through ... case Pal::AsicRevision::Bonaire: @@ -258,6 +276,22 @@ Settings::create( return false; } +#if defined(_WIN32) + if (modifyMaxWorkload.time > 0) { + OSVERSIONINFOEX versionInfo = { 0 }; + versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX); + versionInfo.dwMajorVersion = 6; + versionInfo.dwMinorVersion = modifyMaxWorkload.minorVersion; + + DWORDLONG conditionMask = 0; + VER_SET_CONDITION(conditionMask, VER_MAJORVERSION, modifyMaxWorkload.comparisonOps); + VER_SET_CONDITION(conditionMask, VER_MINORVERSION, modifyMaxWorkload.comparisonOps); + if (VerifyVersionInfo(&versionInfo, VER_MAJORVERSION | VER_MINORVERSION, conditionMask)) { + maxWorkloadTime_ = modifyMaxWorkload.time; + } + } +#endif // defined(_WIN32) + // Enable atomics support enableExtension(ClKhrInt64BaseAtomics); enableExtension(ClKhrInt64ExtendedAtomics); diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp index fc8b368131..917e7ecc64 100644 --- a/rocclr/runtime/device/pal/palvirtual.cpp +++ b/rocclr/runtime/device/pal/palvirtual.cpp @@ -468,7 +468,7 @@ VirtualGPU::DmaFlushMgmt::DmaFlushMgmt(const Device& dev) aluCnt_ = dev.info().simdPerCU_ * dev.info().simdWidth_ * dev.info().maxComputeUnits_; maxDispatchWorkload_ = static_cast(dev.info().maxClockFrequency_) * // find time in us - 100 * dev.settings().maxWorkloadTime_ * + dev.settings().maxWorkloadTime_ * aluCnt_; resetCbWorkload(dev); } @@ -479,7 +479,7 @@ VirtualGPU::DmaFlushMgmt::resetCbWorkload(const Device& dev) cbWorkload_ = 0; maxCbWorkload_ = static_cast(dev.info().maxClockFrequency_) * // find time in us - 100 * dev.settings().minWorkloadTime_ * aluCnt_; + dev.settings().minWorkloadTime_ * aluCnt_; } void @@ -1995,6 +1995,12 @@ VirtualGPU::submitKernelInternal( dbgManager->allocParamMemList(numParams); } + bool needFlush = false; + dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize()); + if (dmaFlushMgmt().dispatchSplitSize() != 0) { + needFlush = true; + } + size_t newOffset[3] = {0, 0, 0}; size_t newGlobalSize[3] = {0, 0, 0}; @@ -2296,7 +2302,7 @@ VirtualGPU::submitKernelInternal( } // Update the global GPU event - setGpuEvent(gpuEvent); + setGpuEvent(gpuEvent, needFlush); if (!printfDbgHSA().output(*this, printfEnabled, hsaKernel.printfInfo())) { LogError("Couldn't read printf data from the buffer!\n");