P4 to Git Change 1580311 by gandryey@gera-w8 on 2018/07/13 14:24:01
SWDEV-154474 - [Task]: TAN Dual Realtime Queue Enhancement For Radeon Rays
- Add support for the second RT queue
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#422 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#97 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#114 edit
[ROCm/clr commit: 6b409a027b]
Этот коммит содержится в:
@@ -422,10 +422,15 @@ bool VirtualGPU::create(bool profiling, uint rtCUs, uint deviceQueueSize,
|
||||
: (dev().getFirstAvailableComputeEngineID() + idx)));
|
||||
|
||||
} else {
|
||||
if (priority == amd::CommandQueue::Priority::Medium) {
|
||||
if ((priority == amd::CommandQueue::Priority::Medium) &&
|
||||
(amd::CommandQueue::RealTimeDisabled == rtCUs)) {
|
||||
engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_MEDIUM_PRIORITY));
|
||||
} else {
|
||||
engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
|
||||
if (priority == amd::CommandQueue::Priority::Medium) {
|
||||
engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
|
||||
} else {
|
||||
engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
|
||||
}
|
||||
}
|
||||
//!@todo This is not a generic solution and
|
||||
// may have issues with > 8 queues
|
||||
|
||||
@@ -717,7 +717,6 @@ Device::Device()
|
||||
mapCache_(nullptr),
|
||||
resourceCache_(nullptr),
|
||||
numComputeEngines_(0),
|
||||
numExclusiveComputeEngines_(0),
|
||||
numDmaEngines_(0),
|
||||
heapInitComplete_(false),
|
||||
xferQueue_(nullptr),
|
||||
@@ -822,11 +821,18 @@ bool Device::create(Pal::IDevice* device) {
|
||||
if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].maxNumDedicatedCu > 0) {
|
||||
for (uint i = 0; i < properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineCount;
|
||||
++i) {
|
||||
if ((properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
|
||||
Pal::EngineSubType::RtCuHighCompute) ||
|
||||
(properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
|
||||
Pal::EngineSubType::RtCuMedCompute)) {
|
||||
numExclusiveComputeEngines_++;
|
||||
if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
|
||||
Pal::EngineSubType::RtCuHighCompute) {
|
||||
if (exclusiveComputeEnginesId_.find(ExclusiveQueueType::RealTime0) !=
|
||||
exclusiveComputeEnginesId_.end()) {
|
||||
exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime1, i});
|
||||
} else {
|
||||
exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime0, i});
|
||||
}
|
||||
}
|
||||
if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
|
||||
Pal::EngineSubType::RtCuMedCompute) {
|
||||
exclusiveComputeEnginesId_.insert({ExclusiveQueueType::Medium, i});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -976,9 +982,10 @@ bool Device::initializeHeapResources() {
|
||||
// Request all compute engines
|
||||
finalizeInfo.requestedEngineCounts[Pal::EngineTypeCompute].engines =
|
||||
((1 << numComputeEngines_) - 1);
|
||||
// Request real time compute engines
|
||||
finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines =
|
||||
((1 << numExclusiveComputeEngines_) - 1);
|
||||
for (const auto& it: exclusiveComputeEnginesId_) {
|
||||
// Request real time compute engines
|
||||
finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines |= (1 << it.second);
|
||||
}
|
||||
// Request all SDMA engines
|
||||
finalizeInfo.requestedEngineCounts[Pal::EngineTypeDma].engines = (1 << numDmaEngines_) - 1;
|
||||
|
||||
|
||||
@@ -155,7 +155,11 @@ class ThreadTrace;
|
||||
#ifndef CL_FILTER_NONE
|
||||
#define CL_FILTER_NONE 0x1142
|
||||
#endif
|
||||
|
||||
enum class ExclusiveQueueType : uint32_t {
|
||||
RealTime0 = 0,
|
||||
RealTime1,
|
||||
Medium
|
||||
};
|
||||
class Sampler : public device::Sampler {
|
||||
public:
|
||||
//! Constructor
|
||||
@@ -400,7 +404,10 @@ class Device : public NullDevice {
|
||||
uint numComputeEngines() const { return numComputeEngines_; }
|
||||
|
||||
//! Returns the number of available compute rings
|
||||
uint numExclusiveComputeEngines() const { return numExclusiveComputeEngines_; }
|
||||
uint numExclusiveComputeEngines() const { return exclusiveComputeEnginesId_.size(); }
|
||||
|
||||
//! Returns the array of available compute rings
|
||||
const auto& exclusiveComputeEnginesId() const { return exclusiveComputeEnginesId_; }
|
||||
|
||||
//! Returns the number of available DMA engines
|
||||
uint numDMAEngines() const { return numDmaEngines_; }
|
||||
@@ -583,7 +590,7 @@ class Device : public NullDevice {
|
||||
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
|
||||
ResourceCache* resourceCache_; //!< Resource cache
|
||||
uint numComputeEngines_; //!< The number of available compute engines
|
||||
uint numExclusiveComputeEngines_; //!< The number of available compute engines
|
||||
std::map<ExclusiveQueueType, uint32_t> exclusiveComputeEnginesId_;//!< The number of available compute engines
|
||||
uint numDmaEngines_; //!< The number of available compute engines
|
||||
bool heapInitComplete_; //!< Keep track of initialization status of heap resources
|
||||
VirtualGPU* xferQueue_; //!< Transfer queue
|
||||
|
||||
@@ -50,17 +50,30 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy
|
||||
} else {
|
||||
cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeCompute;
|
||||
}
|
||||
|
||||
if (priority == amd::CommandQueue::Priority::Medium) {
|
||||
qCreateInfo.engineIndex = 0x1;
|
||||
std::map<ExclusiveQueueType, uint32_t>::const_iterator it;
|
||||
if ((priority == amd::CommandQueue::Priority::Medium) &&
|
||||
(amd::CommandQueue::RealTimeDisabled == rtCU)) {
|
||||
it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::Medium);
|
||||
cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute;
|
||||
} else if (amd::CommandQueue::RealTimeDisabled != rtCU) {
|
||||
qCreateInfo.numReservedCu = rtCU;
|
||||
qCreateInfo.engineIndex = 0x0;
|
||||
if (priority == amd::CommandQueue::Priority::Medium) {
|
||||
it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime1);
|
||||
} else {
|
||||
it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime0);
|
||||
}
|
||||
cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute;
|
||||
cmdCreateInfo.flags.realtimeComputeUnits = true;
|
||||
}
|
||||
|
||||
// If the app creates an exclusive compute, then find the engine id
|
||||
if (qCreateInfo.engineType == Pal::EngineTypeExclusiveCompute) {
|
||||
if (it != gpu.dev().exclusiveComputeEnginesId().end()) {
|
||||
qCreateInfo.engineIndex = it->second;
|
||||
}
|
||||
else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
// Find queue object size
|
||||
size_t qSize = palDev->GetQueueSize(qCreateInfo, &result);
|
||||
if (result != Pal::Result::Success) {
|
||||
|
||||
Ссылка в новой задаче
Block a user