P4 to Git Change 1561015 by gandryey@gera-w8 on 2018/05/29 13:10:06
SWDEV-79445 - OCL generic changes and code clean-up Optimize memory dependency tracking logic: 1. Add modified_ filed to the event object to track memory writes into device memory. 2. Check memory dependency only if a write operation is currently requested or previously performed. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#66 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#52 edit
Este commit está contenido en:
@@ -44,22 +44,27 @@ struct HwDbgKernelInfo {
|
||||
enum EngineType { MainEngine = 0, SdmaEngine, AllEngines };
|
||||
|
||||
struct GpuEvent {
|
||||
static const unsigned int InvalidID = ((1 << 30) - 1);
|
||||
static constexpr uint32_t InvalidID = ((1 << 30) - 1);
|
||||
|
||||
struct {
|
||||
uint32_t id : 31; ///< actual event id
|
||||
uint32_t engineId_ : 1; ///< type of the id
|
||||
uint32_t id_ : 30; ///< Actual event id
|
||||
uint32_t modified_ : 1; ///< Resource associated with the event was modified
|
||||
uint32_t engineId_ : 1; ///< Type of the id
|
||||
};
|
||||
//! GPU event default constructor
|
||||
GpuEvent() : id(InvalidID), engineId_(MainEngine) {}
|
||||
GpuEvent() : id_(InvalidID), engineId_(MainEngine), modified_(false) {}
|
||||
//! GPU event constructor
|
||||
GpuEvent(uint evt) : id(evt), engineId_(MainEngine) {}
|
||||
GpuEvent(uint evt) : id_(evt), engineId_(MainEngine), modified_(false) {}
|
||||
|
||||
//! Returns true if the current event is valid
|
||||
bool isValid() const { return (id != InvalidID) ? true : false; }
|
||||
bool isValid() const { return (id_ != InvalidID) ? true : false; }
|
||||
|
||||
//! Set invalid event id
|
||||
void invalidate() { id = InvalidID; }
|
||||
void invalidate() { id_ = InvalidID; }
|
||||
|
||||
// Overwrite default assign operator to preserve modified_ field
|
||||
GpuEvent& operator=(const GpuEvent& evt)
|
||||
{ id_ = evt.id_; engineId_ = evt.engineId_; return *this; }
|
||||
};
|
||||
|
||||
/*! \addtogroup PAL
|
||||
|
||||
@@ -1665,6 +1665,31 @@ GpuEvent* Resource::getGpuEvent(const VirtualGPU& gpu) const {
|
||||
return &events_[idx];
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Resource::setModified(VirtualGPU& gpu, bool modified) const {
|
||||
uint idx = gpu.index();
|
||||
assert(idx < events_.size());
|
||||
events_[idx].modified_ = modified;
|
||||
|
||||
// If current resource is a view, then update the parent as well
|
||||
if (viewOwner_ != nullptr) {
|
||||
viewOwner_->setModified(gpu, modified);
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Resource::isModified(VirtualGPU& gpu) const {
|
||||
uint idx = gpu.index();
|
||||
assert(idx < events_.size());
|
||||
bool modified = events_[idx].modified_;
|
||||
|
||||
// If current resource is a view, then get the parent state as well
|
||||
if (viewOwner_ != nullptr) {
|
||||
modified |= viewOwner_->isModified(gpu);
|
||||
}
|
||||
return modified;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Resource::palFree() const {
|
||||
if (desc().type_ == OGLInterop) {
|
||||
|
||||
@@ -382,6 +382,12 @@ class Resource : public amd::HeapObject {
|
||||
}
|
||||
}
|
||||
|
||||
//! Update the modified field of the event, meaning the resource was updated
|
||||
void setModified(VirtualGPU& gpu, bool modified) const;
|
||||
|
||||
//! Update the modified field of the event, meaning the resource was updated
|
||||
bool isModified(VirtualGPU& gpu) const;
|
||||
|
||||
protected:
|
||||
/*! \brief Creates a PAL iamge object, associated with the resource
|
||||
*
|
||||
|
||||
@@ -427,24 +427,26 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
uint64_t curStart = memory->vmAddress();
|
||||
uint64_t curEnd = curStart + memory->size();
|
||||
|
||||
// Loop through all memory objects in the queue and find dependency
|
||||
// @note don't include objects from the current kernel
|
||||
for (size_t j = 0; j < endMemObjectsInQueue_; ++j) {
|
||||
// Check if the queue already contains this mem object and
|
||||
// GPU operations aren't readonly
|
||||
uint64_t busyStart = memObjectsInQueue_[j].start_;
|
||||
uint64_t busyEnd = memObjectsInQueue_[j].end_;
|
||||
if (memory->isModified(gpu) || !readOnly) {
|
||||
// Loop through all memory objects in the queue and find dependency
|
||||
// @note don't include objects from the current kernel
|
||||
for (size_t j = 0; j < endMemObjectsInQueue_; ++j) {
|
||||
// Check if the queue already contains this mem object and
|
||||
// GPU operations aren't readonly
|
||||
uint64_t busyStart = memObjectsInQueue_[j].start_;
|
||||
uint64_t busyEnd = memObjectsInQueue_[j].end_;
|
||||
|
||||
// Check if the start inside the busy region
|
||||
if ((((curStart >= busyStart) && (curStart < busyEnd)) ||
|
||||
// Check if the end inside the busy region
|
||||
((curEnd > busyStart) && (curEnd <= busyEnd)) ||
|
||||
// Check if the start/end cover the busy region
|
||||
((curStart <= busyStart) && (curEnd >= busyEnd))) &&
|
||||
// If the buys region was written or the current one is for write
|
||||
(!memObjectsInQueue_[j].readOnly_ || !readOnly)) {
|
||||
flushL1Cache = true;
|
||||
break;
|
||||
// Check if the start inside the busy region
|
||||
if ((((curStart >= busyStart) && (curStart < busyEnd)) ||
|
||||
// Check if the end inside the busy region
|
||||
((curEnd > busyStart) && (curEnd <= busyEnd)) ||
|
||||
// Check if the start/end cover the busy region
|
||||
((curStart <= busyStart) && (curEnd >= busyEnd))) &&
|
||||
// If the buys region was written or the current one is for write
|
||||
(!memObjectsInQueue_[j].readOnly_ || !readOnly)) {
|
||||
flushL1Cache = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -471,6 +473,8 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
memObjectsInQueue_[numMemObjectsInQueue_].end_ = curEnd;
|
||||
memObjectsInQueue_[numMemObjectsInQueue_].readOnly_ = readOnly;
|
||||
numMemObjectsInQueue_++;
|
||||
// Mark resource as modified
|
||||
memory->setModified(gpu, !readOnly);
|
||||
}
|
||||
|
||||
void VirtualGPU::MemoryDependency::clear(bool all) {
|
||||
@@ -1955,7 +1959,7 @@ void VirtualGPU::PostDeviceEnqueue(
|
||||
uint64_t vmParentWrap,
|
||||
GpuEvent* gpuEvent)
|
||||
{
|
||||
uint32_t id = gpuEvent->id;
|
||||
uint32_t id = gpuEvent->id_;
|
||||
amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
|
||||
|
||||
// Make sure exculsive access to the device queue
|
||||
@@ -2036,7 +2040,7 @@ void VirtualGPU::PostDeviceEnqueue(
|
||||
iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
|
||||
vmParentWrap + offsetof(AmdAqlWrap, child_counter),
|
||||
signalAddr, dev().settings().useDeviceQueue_);
|
||||
if (id != gpuEvent->id) {
|
||||
if (id != gpuEvent->id_) {
|
||||
LogError("Something is wrong. ID mismatch!\n");
|
||||
}
|
||||
eventEnd(MainEngine, *gpuEvent);
|
||||
@@ -2133,7 +2137,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
|
||||
for (int iter = 0; iter < iteration; ++iter) {
|
||||
GpuEvent gpuEvent(queues_[MainEngine]->cmdBufId());
|
||||
uint32_t id = gpuEvent.id;
|
||||
uint32_t id = gpuEvent.id_;
|
||||
// Reset global size for dimension dim if split is needed
|
||||
if (dim != -1) {
|
||||
newOffset[dim] = sizes.offset()[dim] + globalStep * iter;
|
||||
@@ -2184,7 +2188,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
if (profiling() || state_.profileEnabled_) {
|
||||
addBarrier();
|
||||
}
|
||||
if (id != gpuEvent.id) {
|
||||
if (id != gpuEvent.id_) {
|
||||
LogError("Something is wrong. ID mismatch!\n");
|
||||
}
|
||||
eventEnd(MainEngine, gpuEvent);
|
||||
|
||||
@@ -463,11 +463,11 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
constexpr bool End = false;
|
||||
if (forceExec) {
|
||||
constexpr bool ForceFlush = true;
|
||||
event.id = queues_[engId]->submit(ForceFlush);
|
||||
event.id_ = queues_[engId]->submit(ForceFlush);
|
||||
profileEvent(engId, End);
|
||||
} else {
|
||||
profileEvent(engId, End);
|
||||
event.id = queues_[engId]->submit(GPU_FLUSH_ON_EXECUTION);
|
||||
event.id_ = queues_[engId]->submit(GPU_FLUSH_ON_EXECUTION);
|
||||
}
|
||||
event.engineId_ = engId;
|
||||
}
|
||||
@@ -475,7 +475,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
void waitForEvent(GpuEvent* event) const {
|
||||
if (event->isValid()) {
|
||||
assert(event->engineId_ < AllEngines);
|
||||
queues_[event->engineId_]->waitForEvent(event->id);
|
||||
queues_[event->engineId_]->waitForEvent(event->id_);
|
||||
event->invalidate();
|
||||
}
|
||||
}
|
||||
@@ -483,7 +483,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
bool isDone(GpuEvent* event) {
|
||||
if (event->isValid()) {
|
||||
assert(event->engineId_ < AllEngines);
|
||||
if (queues_[event->engineId_]->isDone(event->id)) {
|
||||
if (queues_[event->engineId_]->isDone(event->id_)) {
|
||||
event->invalidate();
|
||||
return true;
|
||||
}
|
||||
@@ -623,9 +623,8 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
};
|
||||
|
||||
inline void VirtualGPU::addVmMemory(const Memory* memory) {
|
||||
GpuEvent event(queues_[MainEngine]->cmdBufId());
|
||||
queues_[MainEngine]->addCmdMemRef(memory->memRef());
|
||||
memory->setBusy(*this, event);
|
||||
memory->setBusy(*this, queues_[MainEngine]->cmdBufId());
|
||||
}
|
||||
|
||||
inline void VirtualGPU::AddKernel(const amd::Kernel& kernel) const {
|
||||
|
||||
Referencia en una nueva incidencia
Block a user