P4 to Git Change 1338735 by gandryey@gera-w8 on 2016/11/09 10:46:19
SWDEV-86035 - Add PAL backend to OpenCL - Enable split logic in PAL Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#38 edit
This commit is contained in:
@@ -9,6 +9,11 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "Windows.h"
|
||||
#include "VersionHelpers.h"
|
||||
#endif
|
||||
|
||||
namespace pal {
|
||||
|
||||
/*! \brief information for adjusting maximum workload time
|
||||
@@ -20,6 +25,9 @@ struct ModifyMaxWorkload
|
||||
{
|
||||
uint32_t time; //!< max work load time (10x ms)
|
||||
uint32_t minorVersion; //!< OS minor version
|
||||
#if defined(_WIN32)
|
||||
BYTE comparisonOps; //!< Comparison option
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -104,7 +112,7 @@ Settings::Settings()
|
||||
numComputeRings_ = 0;
|
||||
|
||||
minWorkloadTime_ = 1; // 0.1 ms
|
||||
maxWorkloadTime_ = 5000; // 500 ms
|
||||
maxWorkloadTime_ = 500000; // 500 ms
|
||||
|
||||
// Controls tiled images in persistent
|
||||
//!@note IOL for Linux doesn't setup tiling aperture in CMM/QS
|
||||
@@ -171,6 +179,13 @@ Settings::create(
|
||||
if (!aiPlus_) {
|
||||
// APU systems for VI
|
||||
apuSystem_ = true;
|
||||
// Fix BSOD/TDR issues observed on Stoney Win7/8.1/10
|
||||
minWorkloadTime_ = 1000;
|
||||
modifyMaxWorkload.time = 1000; // Decided by experiment
|
||||
modifyMaxWorkload.minorVersion = 1; // Win 7
|
||||
#if defined(_WIN32)
|
||||
modifyMaxWorkload.comparisonOps = VER_EQUAL; // Limit to Win 7 only
|
||||
#endif
|
||||
}
|
||||
case Pal::AsicRevision::Iceland:
|
||||
case Pal::AsicRevision::Tonga:
|
||||
@@ -192,8 +207,11 @@ Settings::create(
|
||||
// APU systems for CI
|
||||
apuSystem_ = true;
|
||||
// Fix BSOD/TDR issues observed on Kaveri Win7 (EPR#416903)
|
||||
modifyMaxWorkload.time = 2500; // 250ms
|
||||
modifyMaxWorkload.time = 250000; // 250ms
|
||||
modifyMaxWorkload.minorVersion = 1; // Win 7
|
||||
#if defined(_WIN32)
|
||||
modifyMaxWorkload.comparisonOps = VER_EQUAL; // limit to Win 7
|
||||
#endif
|
||||
}
|
||||
// Fall through ...
|
||||
case Pal::AsicRevision::Bonaire:
|
||||
@@ -258,6 +276,22 @@ Settings::create(
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
if (modifyMaxWorkload.time > 0) {
|
||||
OSVERSIONINFOEX versionInfo = { 0 };
|
||||
versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
|
||||
versionInfo.dwMajorVersion = 6;
|
||||
versionInfo.dwMinorVersion = modifyMaxWorkload.minorVersion;
|
||||
|
||||
DWORDLONG conditionMask = 0;
|
||||
VER_SET_CONDITION(conditionMask, VER_MAJORVERSION, modifyMaxWorkload.comparisonOps);
|
||||
VER_SET_CONDITION(conditionMask, VER_MINORVERSION, modifyMaxWorkload.comparisonOps);
|
||||
if (VerifyVersionInfo(&versionInfo, VER_MAJORVERSION | VER_MINORVERSION, conditionMask)) {
|
||||
maxWorkloadTime_ = modifyMaxWorkload.time;
|
||||
}
|
||||
}
|
||||
#endif // defined(_WIN32)
|
||||
|
||||
// Enable atomics support
|
||||
enableExtension(ClKhrInt64BaseAtomics);
|
||||
enableExtension(ClKhrInt64ExtendedAtomics);
|
||||
|
||||
@@ -468,7 +468,7 @@ VirtualGPU::DmaFlushMgmt::DmaFlushMgmt(const Device& dev)
|
||||
aluCnt_ = dev.info().simdPerCU_ * dev.info().simdWidth_ * dev.info().maxComputeUnits_;
|
||||
maxDispatchWorkload_ = static_cast<uint64_t>(dev.info().maxClockFrequency_) *
|
||||
// find time in us
|
||||
100 * dev.settings().maxWorkloadTime_ *
|
||||
dev.settings().maxWorkloadTime_ *
|
||||
aluCnt_;
|
||||
resetCbWorkload(dev);
|
||||
}
|
||||
@@ -479,7 +479,7 @@ VirtualGPU::DmaFlushMgmt::resetCbWorkload(const Device& dev)
|
||||
cbWorkload_ = 0;
|
||||
maxCbWorkload_ = static_cast<uint64_t>(dev.info().maxClockFrequency_) *
|
||||
// find time in us
|
||||
100 * dev.settings().minWorkloadTime_ * aluCnt_;
|
||||
dev.settings().minWorkloadTime_ * aluCnt_;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1995,6 +1995,12 @@ VirtualGPU::submitKernelInternal(
|
||||
dbgManager->allocParamMemList(numParams);
|
||||
}
|
||||
|
||||
bool needFlush = false;
|
||||
dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
|
||||
if (dmaFlushMgmt().dispatchSplitSize() != 0) {
|
||||
needFlush = true;
|
||||
}
|
||||
|
||||
size_t newOffset[3] = {0, 0, 0};
|
||||
size_t newGlobalSize[3] = {0, 0, 0};
|
||||
|
||||
@@ -2296,7 +2302,7 @@ VirtualGPU::submitKernelInternal(
|
||||
}
|
||||
|
||||
// Update the global GPU event
|
||||
setGpuEvent(gpuEvent);
|
||||
setGpuEvent(gpuEvent, needFlush);
|
||||
|
||||
if (!printfDbgHSA().output(*this, printfEnabled, hsaKernel.printfInfo())) {
|
||||
LogError("Couldn't read printf data from the buffer!\n");
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user