P4 to Git Change 1338735 by gandryey@gera-w8 on 2016/11/09 10:46:19

SWDEV-86035 - Add PAL backend to OpenCL
	- Enable split logic in PAL

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#38 edit
This commit is contained in:
foreman
2016-11-09 10:55:17 -05:00
parent 0428dca9f2
commit 621f70b372
2 ha cambiato i file con 45 aggiunte e 5 eliminazioni
+36 -2
Vedi File
@@ -9,6 +9,11 @@
#include <algorithm>
#if defined(_WIN32)
#include "Windows.h"
#include "VersionHelpers.h"
#endif
namespace pal {
/*! \brief information for adjusting maximum workload time
@@ -20,6 +25,9 @@ struct ModifyMaxWorkload
{
uint32_t time; //!< max work load time (10x ms)
uint32_t minorVersion; //!< OS minor version
#if defined(_WIN32)
BYTE comparisonOps; //!< Comparison option
#endif
};
@@ -104,7 +112,7 @@ Settings::Settings()
numComputeRings_ = 0;
minWorkloadTime_ = 1; // 0.1 ms
maxWorkloadTime_ = 5000; // 500 ms
maxWorkloadTime_ = 500000; // 500 ms
// Controls tiled images in persistent
//!@note IOL for Linux doesn't setup tiling aperture in CMM/QS
@@ -171,6 +179,13 @@ Settings::create(
if (!aiPlus_) {
// APU systems for VI
apuSystem_ = true;
// Fix BSOD/TDR issues observed on Stoney Win7/8.1/10
minWorkloadTime_ = 1000;
modifyMaxWorkload.time = 1000; // Decided by experiment
modifyMaxWorkload.minorVersion = 1; // Win 7
#if defined(_WIN32)
modifyMaxWorkload.comparisonOps = VER_EQUAL; // Limit to Win 7 only
#endif
}
case Pal::AsicRevision::Iceland:
case Pal::AsicRevision::Tonga:
@@ -192,8 +207,11 @@ Settings::create(
// APU systems for CI
apuSystem_ = true;
// Fix BSOD/TDR issues observed on Kaveri Win7 (EPR#416903)
modifyMaxWorkload.time = 2500; // 250ms
modifyMaxWorkload.time = 250000; // 250ms
modifyMaxWorkload.minorVersion = 1; // Win 7
#if defined(_WIN32)
modifyMaxWorkload.comparisonOps = VER_EQUAL; // limit to Win 7
#endif
}
// Fall through ...
case Pal::AsicRevision::Bonaire:
@@ -258,6 +276,22 @@ Settings::create(
return false;
}
#if defined(_WIN32)
if (modifyMaxWorkload.time > 0) {
OSVERSIONINFOEX versionInfo = { 0 };
versionInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
versionInfo.dwMajorVersion = 6;
versionInfo.dwMinorVersion = modifyMaxWorkload.minorVersion;
DWORDLONG conditionMask = 0;
VER_SET_CONDITION(conditionMask, VER_MAJORVERSION, modifyMaxWorkload.comparisonOps);
VER_SET_CONDITION(conditionMask, VER_MINORVERSION, modifyMaxWorkload.comparisonOps);
if (VerifyVersionInfo(&versionInfo, VER_MAJORVERSION | VER_MINORVERSION, conditionMask)) {
maxWorkloadTime_ = modifyMaxWorkload.time;
}
}
#endif // defined(_WIN32)
// Enable atomics support
enableExtension(ClKhrInt64BaseAtomics);
enableExtension(ClKhrInt64ExtendedAtomics);
+9 -3
Vedi File
@@ -468,7 +468,7 @@ VirtualGPU::DmaFlushMgmt::DmaFlushMgmt(const Device& dev)
aluCnt_ = dev.info().simdPerCU_ * dev.info().simdWidth_ * dev.info().maxComputeUnits_;
maxDispatchWorkload_ = static_cast<uint64_t>(dev.info().maxClockFrequency_) *
// find time in us
100 * dev.settings().maxWorkloadTime_ *
dev.settings().maxWorkloadTime_ *
aluCnt_;
resetCbWorkload(dev);
}
@@ -479,7 +479,7 @@ VirtualGPU::DmaFlushMgmt::resetCbWorkload(const Device& dev)
cbWorkload_ = 0;
maxCbWorkload_ = static_cast<uint64_t>(dev.info().maxClockFrequency_) *
// find time in us
100 * dev.settings().minWorkloadTime_ * aluCnt_;
dev.settings().minWorkloadTime_ * aluCnt_;
}
void
@@ -1995,6 +1995,12 @@ VirtualGPU::submitKernelInternal(
dbgManager->allocParamMemList(numParams);
}
bool needFlush = false;
dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
if (dmaFlushMgmt().dispatchSplitSize() != 0) {
needFlush = true;
}
size_t newOffset[3] = {0, 0, 0};
size_t newGlobalSize[3] = {0, 0, 0};
@@ -2296,7 +2302,7 @@ VirtualGPU::submitKernelInternal(
}
// Update the global GPU event
setGpuEvent(gpuEvent);
setGpuEvent(gpuEvent, needFlush);
if (!printfDbgHSA().output(*this, printfEnabled, hsaKernel.printfInfo())) {
LogError("Couldn't read printf data from the buffer!\n");