P4 to Git Change 1271191 by gandryey@gera-w8 on 2016/05/19 18:42:50
SWDEV-86035 - Add PAL backend to OpenCL - Adds SDMA worakround for pagefault - Removes directSRD and hsail flags - Fixes a BSOD with the latets PAL on Fiji. KMD expects a valid UMD client Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#315 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#225 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#345 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.hpp#96 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palbe/src/core/os/win/winPlatform.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#7 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#6 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#3 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#254 edit
Этот коммит содержится в:
@@ -3889,30 +3889,23 @@ HSAILKernel::loadArguments(
|
||||
//! \note syncCache may call DRM transfer
|
||||
image->wait(gpu, WaitOnBusyEngine);
|
||||
|
||||
if (dev().settings().hsailDirectSRD_) {
|
||||
// Image arguments are of size 48 bytes and aligned to 16 bytes
|
||||
WriteAqlArg(&aqlArgBuf, image->hwState(),
|
||||
HsaImageObjectSize, HsaImageObjectAlignment);
|
||||
//! \note Special case for the image views.
|
||||
//! Copy SRD to CB1, so blit manager will be able to release
|
||||
//! this view without a wait for SRD resource.
|
||||
if (image->memoryType() == Resource::ImageView) {
|
||||
// Copy the current structre into CB1
|
||||
memcpy(aqlStruct, image->hwState(), HsaImageObjectSize);
|
||||
ConstBuffer* cb = gpu.constBufs_[1];
|
||||
cb->uploadDataToHw(HsaImageObjectSize);
|
||||
// Then use a pointer in aqlArgBuffer to CB1
|
||||
uint64_t srd = cb->vmAddress() + cb->wrtOffset();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
memList.push_back(cb);
|
||||
}
|
||||
else {
|
||||
//! \note Special case for the image views.
|
||||
//! Copy SRD to CB1, so blit manager will be able to release
|
||||
//! this view without a wait for SRD resource.
|
||||
if (image->memoryType() == Resource::ImageView) {
|
||||
// Copy the current structre into CB1
|
||||
memcpy(aqlStruct, image->hwState(), HsaImageObjectSize);
|
||||
ConstBuffer* cb = gpu.constBufs_[1];
|
||||
cb->uploadDataToHw(HsaImageObjectSize);
|
||||
// Then use a pointer in aqlArgBuffer to CB1
|
||||
uint64_t srd = cb->vmAddress() + cb->wrtOffset();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
memList.push_back(cb);
|
||||
}
|
||||
else {
|
||||
uint64_t srd = image->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
}
|
||||
uint64_t srd = image->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
}
|
||||
|
||||
//! @todo Compiler has to return read/write attributes
|
||||
@@ -3929,15 +3922,9 @@ HSAILKernel::loadArguments(
|
||||
*reinterpret_cast<amd::Sampler* const*>(paramaddr);
|
||||
const Sampler* gpuSampler = static_cast<Sampler*>
|
||||
(sampler->getDeviceSampler(dev()));
|
||||
if (dev().settings().hsailDirectSRD_) {
|
||||
WriteAqlArg(&aqlArgBuf, gpuSampler->hwState(),
|
||||
HsaSamplerObjectSize, HsaSamplerObjectAlignment);
|
||||
}
|
||||
else {
|
||||
uint64_t srd = gpuSampler->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
}
|
||||
uint64_t srd = gpuSampler->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
break;
|
||||
}
|
||||
case HSAIL_ARGTYPE_QUEUE: {
|
||||
|
||||
@@ -2494,7 +2494,6 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
|
||||
assert(false);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
assert(!program_->dev().settings().hsailDirectSRD_);
|
||||
gpu::Sampler* sampler = new gpu::Sampler(program_->dev());
|
||||
if (!sampler || !sampler->create(state)) {
|
||||
delete sampler;
|
||||
|
||||
@@ -135,9 +135,6 @@ Settings::Settings()
|
||||
// Don't support platform atomics by default.
|
||||
svmAtomics_ = false;
|
||||
|
||||
// Use direct SRD by default
|
||||
hsailDirectSRD_ = GPU_DIRECT_SRD;
|
||||
|
||||
// Use host queue for device enqueuing by default
|
||||
useDeviceQueue_ = GPU_USE_DEVICE_QUEUE;
|
||||
|
||||
|
||||
@@ -69,10 +69,9 @@ public:
|
||||
uint svmFineGrainSystem_: 1; //!< SVM fine grain system support
|
||||
uint apuSystem_: 1; //!< Device is APU system with shared memory
|
||||
uint asyncMemCopy_: 1; //!< Use async memory transfers
|
||||
uint hsailDirectSRD_: 1; //!< Controls direct SRD for HSAIL
|
||||
uint useDeviceQueue_: 1; //!< Submit to separate device queue
|
||||
uint singleFpDenorm_: 1; //!< Support Single FP Denorm
|
||||
uint reserved_: 3;
|
||||
uint reserved_: 4;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
@@ -149,19 +149,14 @@ NullDevice::create(Pal::GfxIpLevel ipLevel)
|
||||
device::Program*
|
||||
NullDevice::createProgram(amd::option::Options* options)
|
||||
{
|
||||
device::Program* nullProgram;
|
||||
if (settings().hsail_) {
|
||||
nullProgram = new HSAILProgram(*this);
|
||||
}
|
||||
else {
|
||||
// AMDIL path
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
if (nullProgram == nullptr) {
|
||||
device::Program* program;
|
||||
program = new HSAILProgram(*this);
|
||||
|
||||
if (program == nullptr) {
|
||||
LogError("Memory allocation has failed!");
|
||||
}
|
||||
|
||||
return nullProgram;
|
||||
return program;
|
||||
}
|
||||
|
||||
void NullDevice::fillDeviceInfo(
|
||||
@@ -643,19 +638,26 @@ Device::create(Pal::IDevice* device)
|
||||
// Update HW info for the device
|
||||
hwInfo_ = &DeviceInfo[static_cast<uint>(properties().revision)];
|
||||
|
||||
// Find the number of available engines
|
||||
numComputeEngines_ =
|
||||
properties().engineProperties[Pal::QueueTypeCompute].engineCount;
|
||||
numDmaEngines_ =
|
||||
properties().engineProperties[Pal::QueueTypeDma].engineCount;
|
||||
|
||||
Pal::PalPublicSettings*const palSettings = iDev()->GetPublicSettings();
|
||||
// Modify settings here
|
||||
// palSettings ...
|
||||
palSettings->textureOptLevel = Pal::TextureFilterOptimizationsDisabled;
|
||||
|
||||
// Commit the new settings for the device
|
||||
result = iDev()->CommitSettingsAndInit();
|
||||
if (result == Pal::Result::Success) {
|
||||
Pal::DeviceFinalizeInfo finalizeInfo = {};
|
||||
|
||||
// Request 2 compute engines
|
||||
finalizeInfo.engineCounts[Pal::QueueTypeCompute] = 2;
|
||||
// Request 2 SDMA engines
|
||||
finalizeInfo.engineCounts[Pal::QueueTypeDma] = 2;
|
||||
// Request all compute engines
|
||||
finalizeInfo.engineCounts[Pal::QueueTypeCompute] = numComputeEngines_;
|
||||
// Request all SDMA engines
|
||||
finalizeInfo.engineCounts[Pal::QueueTypeDma] = numDmaEngines_;
|
||||
|
||||
result = iDev()->Finalize(finalizeInfo);
|
||||
}
|
||||
@@ -670,12 +672,6 @@ Device::create(Pal::IDevice* device)
|
||||
appProfile_.reportAsOCL12Device())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Find the number of available engines
|
||||
numComputeEngines_ =
|
||||
properties().engineProperties[Pal::QueueTypeCompute].engineCount;
|
||||
numDmaEngines_ =
|
||||
properties().engineProperties[Pal::QueueTypeDma].engineCount;
|
||||
numComputeEngines_ = std::min(numComputeEngines_, settings().numComputeRings_);
|
||||
|
||||
amd::Context::Info info = {0};
|
||||
@@ -816,22 +812,20 @@ Device::initializeHeapResources()
|
||||
}
|
||||
|
||||
// Delay compilation due to brig_loader memory allocation
|
||||
if (settings().hsail_ || (settings().oclVersion_ == OpenCL20)) {
|
||||
const char* scheduler = nullptr;
|
||||
const char* ocl20 = nullptr;
|
||||
if (settings().oclVersion_ == OpenCL20) {
|
||||
scheduler = SchedulerSourceCode;
|
||||
ocl20 = "-cl-std=CL2.0";
|
||||
}
|
||||
blitProgram_ = new BlitProgram(context_);
|
||||
// Create blit programs
|
||||
if (blitProgram_ == nullptr ||
|
||||
!blitProgram_->create(this, scheduler, ocl20)) {
|
||||
delete blitProgram_;
|
||||
blitProgram_ = nullptr;
|
||||
LogError("Couldn't create blit kernels!");
|
||||
return false;
|
||||
}
|
||||
const char* scheduler = nullptr;
|
||||
const char* ocl20 = nullptr;
|
||||
if (settings().oclVersion_ == OpenCL20) {
|
||||
scheduler = SchedulerSourceCode;
|
||||
ocl20 = "-cl-std=CL2.0";
|
||||
}
|
||||
blitProgram_ = new BlitProgram(context_);
|
||||
// Create blit programs
|
||||
if (blitProgram_ == nullptr ||
|
||||
!blitProgram_->create(this, scheduler, ocl20)) {
|
||||
delete blitProgram_;
|
||||
blitProgram_ = nullptr;
|
||||
LogError("Couldn't create blit kernels!");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create a synchronized transfer queue
|
||||
@@ -900,20 +894,13 @@ Device::createVirtualDevice(
|
||||
device::Program*
|
||||
Device::createProgram(amd::option::Options* options)
|
||||
{
|
||||
device::Program* gpuProgram;
|
||||
if (settings().hsail_) {
|
||||
gpuProgram = new HSAILProgram(*this);
|
||||
}
|
||||
else {
|
||||
ShouldNotReachHere();
|
||||
//AMDIL
|
||||
//gpuProgram = new Program(*this);
|
||||
}
|
||||
if (gpuProgram == nullptr) {
|
||||
device::Program* program;
|
||||
program = new HSAILProgram(*this);
|
||||
if (program == nullptr) {
|
||||
LogError("We failed memory allocation for program!");
|
||||
}
|
||||
|
||||
return gpuProgram;
|
||||
return program;
|
||||
}
|
||||
|
||||
//! Requested devices list as configured by the GPU_DEVICE_ORDINAL
|
||||
@@ -1410,14 +1397,12 @@ bool
|
||||
Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler) const
|
||||
{
|
||||
*sampler = nullptr;
|
||||
if (settings().hsail_ || (settings().oclVersion_ >= OpenCL20)) {
|
||||
Sampler* gpuSampler = new Sampler(*this);
|
||||
if ((nullptr == gpuSampler) || !gpuSampler->create(owner)) {
|
||||
delete gpuSampler;
|
||||
return false;
|
||||
}
|
||||
*sampler = gpuSampler;
|
||||
Sampler* gpuSampler = new Sampler(*this);
|
||||
if ((nullptr == gpuSampler) || !gpuSampler->create(owner)) {
|
||||
delete gpuSampler;
|
||||
return false;
|
||||
}
|
||||
*sampler = gpuSampler;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -1063,30 +1063,23 @@ HSAILKernel::loadArguments(
|
||||
//! \note syncCache may call DRM transfer
|
||||
image->wait(gpu, WaitOnBusyEngine);
|
||||
|
||||
if (dev().settings().hsailDirectSRD_) {
|
||||
// Image arguments are of size 48 bytes and aligned to 16 bytes
|
||||
WriteAqlArg(&aqlArgBuf, image->hwState(),
|
||||
HsaImageObjectSize, HsaImageObjectAlignment);
|
||||
//! \note Special case for the image views.
|
||||
//! Copy SRD to CB1, so blit manager will be able to release
|
||||
//! this view without a wait for SRD resource.
|
||||
if (image->memoryType() == Resource::ImageView) {
|
||||
// Copy the current structre into CB1
|
||||
memcpy(aqlStruct, image->hwState(), HsaImageObjectSize);
|
||||
ConstBuffer* cb = gpu.constBufs_[1];
|
||||
cb->uploadDataToHw(HsaImageObjectSize);
|
||||
// Then use a pointer in aqlArgBuffer to CB1
|
||||
uint64_t srd = cb->vmAddress() + cb->wrtOffset();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
memList.push_back(cb);
|
||||
}
|
||||
else {
|
||||
//! \note Special case for the image views.
|
||||
//! Copy SRD to CB1, so blit manager will be able to release
|
||||
//! this view without a wait for SRD resource.
|
||||
if (image->memoryType() == Resource::ImageView) {
|
||||
// Copy the current structre into CB1
|
||||
memcpy(aqlStruct, image->hwState(), HsaImageObjectSize);
|
||||
ConstBuffer* cb = gpu.constBufs_[1];
|
||||
cb->uploadDataToHw(HsaImageObjectSize);
|
||||
// Then use a pointer in aqlArgBuffer to CB1
|
||||
uint64_t srd = cb->vmAddress() + cb->wrtOffset();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
memList.push_back(cb);
|
||||
}
|
||||
else {
|
||||
uint64_t srd = image->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
}
|
||||
uint64_t srd = image->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
}
|
||||
|
||||
//! @todo Compiler has to return read/write attributes
|
||||
@@ -1103,15 +1096,9 @@ HSAILKernel::loadArguments(
|
||||
*reinterpret_cast<amd::Sampler* const*>(paramaddr);
|
||||
const Sampler* gpuSampler = static_cast<Sampler*>
|
||||
(sampler->getDeviceSampler(dev()));
|
||||
if (dev().settings().hsailDirectSRD_) {
|
||||
WriteAqlArg(&aqlArgBuf, gpuSampler->hwState(),
|
||||
HsaSamplerObjectSize, HsaSamplerObjectAlignment);
|
||||
}
|
||||
else {
|
||||
uint64_t srd = gpuSampler->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
}
|
||||
uint64_t srd = gpuSampler->hwSrd();
|
||||
WriteAqlArg(&aqlArgBuf, &srd, sizeof(srd));
|
||||
srdResource = true;
|
||||
break;
|
||||
}
|
||||
case HSAIL_ARGTYPE_QUEUE: {
|
||||
|
||||
@@ -844,7 +844,6 @@ hsa_status_t ORCAHSALoaderContext::SamplerCreate(
|
||||
assert(false);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
assert(!program_->dev().settings().hsailDirectSRD_);
|
||||
pal::Sampler* sampler = new pal::Sampler(program_->dev());
|
||||
if (!sampler || !sampler->create(state)) {
|
||||
delete sampler;
|
||||
|
||||
@@ -1178,7 +1178,9 @@ Resource::partialMemCopyTo(
|
||||
// Make sure linear pitch in bytes is 4 bytes aligned
|
||||
if (((copyRegion.gpuMemoryRowPitch % 4) != 0) ||
|
||||
// another DRM restriciton... SI has 4 pixels
|
||||
(copyRegion.gpuMemoryOffset % 4 != 0)) {
|
||||
(copyRegion.gpuMemoryOffset % 4 != 0) ||
|
||||
(dev().settings().sdamPageFaultWar_ &&
|
||||
(copyRegion.imageOffset.x % dstResource.elementSize() != 0))) {
|
||||
result = false;
|
||||
}
|
||||
else {
|
||||
@@ -1204,7 +1206,9 @@ Resource::partialMemCopyTo(
|
||||
// Make sure linear pitch in bytes is 4 bytes aligned
|
||||
if (((copyRegion.gpuMemoryRowPitch % 4) != 0) ||
|
||||
// another DRM restriciton... SI has 4 pixels
|
||||
(copyRegion.gpuMemoryOffset % 4 != 0)) {
|
||||
(copyRegion.gpuMemoryOffset % 4 != 0) ||
|
||||
(dev().settings().sdamPageFaultWar_ &&
|
||||
(copyRegion.imageOffset.x % elementSize() != 0))) {
|
||||
result = false;
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -115,20 +115,17 @@ Settings::Settings()
|
||||
numDeviceEvents_ = 1024;
|
||||
numWaitEvents_ = 8;
|
||||
|
||||
// Disable HSAIL by default
|
||||
hsail_ = false;
|
||||
|
||||
// Don't support platform atomics by default.
|
||||
svmAtomics_ = false;
|
||||
|
||||
// Use direct SRD by default
|
||||
hsailDirectSRD_ = GPU_DIRECT_SRD;
|
||||
|
||||
// Use host queue for device enqueuing by default
|
||||
useDeviceQueue_ = GPU_USE_DEVICE_QUEUE;
|
||||
|
||||
// Don't support Denormals for single precision by default
|
||||
singleFpDenorm_ = false;
|
||||
|
||||
// Disable SDMA workaround by default
|
||||
sdamPageFaultWar_ = false;
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -179,6 +176,9 @@ Settings::create(
|
||||
// Keep this false even though we have support
|
||||
// singleFpDenorm_ = true;
|
||||
viPlus_ = true;
|
||||
// SDMA may have memory access outside of
|
||||
// the valid buffer range and cause a page fault
|
||||
sdamPageFaultWar_ = true;
|
||||
// Fall through to CI ...
|
||||
case Pal::AsicRevision::Kalindi:
|
||||
case Pal::AsicRevision::Spectre:
|
||||
@@ -193,7 +193,6 @@ Settings::create(
|
||||
case Pal::AsicRevision::Bonaire:
|
||||
case Pal::AsicRevision::Hawaii:
|
||||
ciPlus_ = true;
|
||||
hsail_ = true;
|
||||
threadTraceEnable_ = AMD_THREAD_TRACE_ENABLE;
|
||||
reportFMAF_ = false;
|
||||
if (palProp.revision == Pal::AsicRevision::Hawaii) {
|
||||
@@ -228,11 +227,10 @@ Settings::create(
|
||||
// This needs to be cleaned once 64bit addressing is stable
|
||||
if (oclVersion_ < OpenCL20) {
|
||||
use64BitPtr_ = flagIsDefault(GPU_FORCE_64BIT_PTR) ? LP64_SWITCH(false,
|
||||
/*calAttr.isWorkstation ||*/ hsail_) : GPU_FORCE_64BIT_PTR;
|
||||
/*calAttr.isWorkstation ||*/ true) : GPU_FORCE_64BIT_PTR;
|
||||
}
|
||||
else {
|
||||
if (GPU_FORCE_64BIT_PTR || LP64_SWITCH(false, (hsail_
|
||||
|| (oclVersion_ >= OpenCL20)))) {
|
||||
if (GPU_FORCE_64BIT_PTR || LP64_SWITCH(false, true)) {
|
||||
use64BitPtr_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,56 +50,55 @@ public:
|
||||
uint disablePersistent_: 1; //!< Disables using persistent memory for staging
|
||||
uint imageSupport_: 1; //!< Report images support
|
||||
uint doublePrecision_: 1; //!< Enables double precision support
|
||||
uint reportFMAF_: 1; //!< Report FP_FAST_FMAF define in CL program
|
||||
uint reportFMA_: 1; //!< Report FP_FAST_FMA define in CL program
|
||||
uint use64BitPtr_: 1; //!< Use 64bit pointers on GPU
|
||||
uint reportFMAF_: 1; //!< Report FP_FAST_FMAF define in CL program
|
||||
uint reportFMA_: 1; //!< Report FP_FAST_FMA define in CL program
|
||||
uint use64BitPtr_: 1; //!< Use 64bit pointers on GPU
|
||||
uint force32BitOcl20_: 1; //!< Force 32bit apps to take CLANG/HSAIL path on GPU
|
||||
uint imageDMA_: 1; //!< Enable direct image DMA transfers
|
||||
uint syncObject_: 1; //!< Enable syncobject
|
||||
uint ciPlus_: 1; //!< CI and post CI features
|
||||
uint viPlus_: 1; //!< VI and post VI features
|
||||
uint aiPlus_: 1; //!< AI and post AI features
|
||||
uint imageDMA_: 1; //!< Enable direct image DMA transfers
|
||||
uint syncObject_: 1; //!< Enable syncobject
|
||||
uint ciPlus_: 1; //!< CI and post CI features
|
||||
uint viPlus_: 1; //!< VI and post VI features
|
||||
uint aiPlus_: 1; //!< AI and post AI features
|
||||
uint threadTraceEnable_: 1; //!< Thread trace enable
|
||||
uint linearPersistentImage_: 1; //!< Allocates linear images in persistent
|
||||
uint useSingleScratch_: 1; //!< Allocates single scratch per device
|
||||
uint hsail_: 1; //!< Enables HSAIL compilation
|
||||
uint stagingWritePersistent_: 1; //!< Enables persistent writes
|
||||
uint svmAtomics_: 1; //!< SVM device atomics
|
||||
uint svmFineGrainSystem_: 1; //!< SVM fine grain system support
|
||||
uint apuSystem_: 1; //!< Device is APU system with shared memory
|
||||
uint hsailDirectSRD_: 1; //!< Controls direct SRD for HSAIL
|
||||
uint useDeviceQueue_: 1; //!< Submit to separate device queue
|
||||
uint singleFpDenorm_: 1; //!< Support Single FP Denorm
|
||||
uint reserved_: 5;
|
||||
uint svmAtomics_: 1; //!< SVM device atomics
|
||||
uint svmFineGrainSystem_: 1; //!< SVM fine grain system support
|
||||
uint apuSystem_: 1; //!< Device is APU system with shared memory
|
||||
uint useDeviceQueue_: 1; //!< Submit to separate device queue
|
||||
uint singleFpDenorm_: 1; //!< Support Single FP Denorm
|
||||
uint sdamPageFaultWar_: 1; //!< SDAM page fault workaround
|
||||
uint reserved_: 7;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
uint oclVersion_; //!< Reported OpenCL version support
|
||||
uint debugFlags_; //!< Debug GPU flags
|
||||
size_t stagedXferSize_; //!< Staged buffer size
|
||||
uint maxRenames_; //!< Maximum number of possible renames
|
||||
uint maxRenameSize_; //!< Maximum size for all renames
|
||||
uint hwLDSSize_; //!< HW local data store size
|
||||
uint maxWorkGroupSize_; //!< Requested workgroup size for this device
|
||||
uint hostMemDirectAccess_; //!< Enables direct access to the host memory
|
||||
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
|
||||
uint workloadSplitSize_; //!< Workload split size
|
||||
uint minWorkloadTime_; //!< Minimal workload time in 0.1 ms
|
||||
uint maxWorkloadTime_; //!< Maximum workload time in 0.1 ms
|
||||
uint blitEngine_; //!< Blit engine type
|
||||
size_t pinnedXferSize_; //!< Pinned buffer size for transfer
|
||||
size_t pinnedMinXferSize_; //!< Minimal buffer size for pinned transfer
|
||||
size_t resourceCacheSize_; //!< Resource cache size in MB
|
||||
uint64_t maxAllocSize_; //!< Maximum single allocation size
|
||||
size_t numMemDependencies_;//!< The array size for memory dependencies tracking
|
||||
uint cacheLineSize_; //!< Cache line size in bytes
|
||||
uint cacheSize_; //!< L1 cache size in bytes
|
||||
size_t xferBufSize_; //!< Transfer buffer size for image copy optimization
|
||||
uint numComputeRings_; //!< 0 - disabled, 1 , 2,.. - the number of compute rings
|
||||
uint numDeviceEvents_; //!< The number of device events
|
||||
uint numWaitEvents_; //!< The number of wait events for device enqueue
|
||||
uint hostMemDirectAccess_; //!< Enables direct access to the host memory
|
||||
size_t xferBufSize_; //!< Transfer buffer size for image copy optimization
|
||||
size_t stagedXferSize_; //!< Staged buffer size
|
||||
size_t pinnedXferSize_; //!< Pinned buffer size for transfer
|
||||
size_t pinnedMinXferSize_; //!< Minimal buffer size for pinned transfer
|
||||
size_t resourceCacheSize_; //!< Resource cache size in MB
|
||||
size_t numMemDependencies_;//!< The array size for memory dependencies tracking
|
||||
uint64_t maxAllocSize_; //!< Maximum single allocation size
|
||||
|
||||
amd::LibrarySelector libSelector_; //!< Select linking libraries for compiler
|
||||
|
||||
//! Default constructor
|
||||
Settings();
|
||||
|
||||
@@ -139,8 +139,6 @@ release(bool, GPU_HSAIL_ENABLE, LP64_SWITCH(LINUX_SWITCH(false,true),true), \
|
||||
"Enable HSAIL on dGPU stack (requires CI+ HW)") \
|
||||
release(uint, GPU_PRINT_CHILD_KERNEL, 0, \
|
||||
"Prints the specified number of the child kernels") \
|
||||
release(bool, GPU_DIRECT_SRD, false, \
|
||||
"Use indirect SRD access in HSAIL") \
|
||||
release(bool, GPU_USE_DEVICE_QUEUE, false, \
|
||||
"Use a dedicated device queue for the actual submissions") \
|
||||
release(bool, GPU_ENABLE_LARGE_ALLOCATION, true, \
|
||||
|
||||
Ссылка в новой задаче
Block a user