P4 to Git Change 1497344 by gandryey@gera-w8 on 2017/12/21 11:12:49
SWDEV-79445 - OCL generic changes and code clean-up
- Rename flushCUCaches() to addBarrier().
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#68 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#38 edit
[ROCm/clr commit: 013d49e0a6]
Bu işleme şunda yer alıyor:
@@ -646,7 +646,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
return false;
|
||||
}
|
||||
Pal::BufferViewInfo viewInfo = {};
|
||||
viewInfo.gpuAddr = memRef_->iMem()->Desc().gpuVirtAddr + offset();
|
||||
viewInfo.gpuAddr = vmAddress() + offset();
|
||||
viewInfo.range = memRef_->iMem()->Desc().size;
|
||||
viewInfo.stride = elementSize();
|
||||
viewInfo.swizzledFormat.format = format;
|
||||
@@ -760,7 +760,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
mapCount_++;
|
||||
}
|
||||
Pal::BufferViewInfo viewInfo = {};
|
||||
viewInfo.gpuAddr = memRef_->iMem()->Desc().gpuVirtAddr + offset();
|
||||
viewInfo.gpuAddr = vmAddress() + offset();
|
||||
viewInfo.range = memRef_->iMem()->Desc().size;
|
||||
viewInfo.stride = elementSize();
|
||||
viewInfo.swizzledFormat.format = format;
|
||||
@@ -1058,7 +1058,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
desc_.cardMemory_ = false;
|
||||
if ((nullptr != params) && (nullptr != params->owner_) &&
|
||||
(nullptr != params->owner_->getSvmPtr())) {
|
||||
params->owner_->setSvmPtr(reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr));
|
||||
params->owner_->setSvmPtr(reinterpret_cast<void*>(vmAddress()));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -1265,7 +1265,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
|
||||
|
||||
if (gpu.validateSdmaOverlap(*this, dstResource)) {
|
||||
// Note: PAL should insert a NOP into the command buffer for synchronization
|
||||
gpu.flushCUCaches();
|
||||
gpu.addBarrier();
|
||||
}
|
||||
|
||||
Pal::ImageLayout imgLayout = {};
|
||||
|
||||
@@ -416,7 +416,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
|
||||
if (maxMemObjectsInQueue_ == 0) {
|
||||
// Flush cache
|
||||
gpu.flushCUCaches();
|
||||
gpu.addBarrier();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -452,7 +452,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
|
||||
if (flushL1Cache) {
|
||||
// Flush cache
|
||||
if (!gpu.profiling()) {
|
||||
gpu.flushCUCaches();
|
||||
gpu.addBarrier();
|
||||
}
|
||||
|
||||
// Clear memory dependency state
|
||||
@@ -1985,7 +1985,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
// Note: This a workaround for incorrect results reported with release_mem packet,
|
||||
// when the packet can be processed later after this dispatch and including extra time
|
||||
if (profiling() || state_.profileEnabled_) {
|
||||
flushCUCaches();
|
||||
addBarrier();
|
||||
}
|
||||
eventEnd(MainEngine, gpuEvent);
|
||||
|
||||
@@ -2127,7 +2127,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
gpuDefQueue->schedParamIdx_,
|
||||
gpuDefQueue->vqHeader_->aql_slot_num / (DeviceQueueMaskSize * maskGroups_));
|
||||
const static bool FlushL2 = true;
|
||||
gpuDefQueue->flushCUCaches(FlushL2);
|
||||
gpuDefQueue->addBarrier(FlushL2);
|
||||
|
||||
// Get the address of PM4 template and add write it to params
|
||||
//! @note DMA flush must not occur between patch and the scheduler
|
||||
@@ -2955,7 +2955,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
if (!supportFineGrainedSystem) {
|
||||
return false;
|
||||
} else if (sync) {
|
||||
flushCUCaches();
|
||||
addBarrier();
|
||||
// Clear memory dependency state
|
||||
const static bool All = true;
|
||||
memoryDependency().clear(!All);
|
||||
@@ -3001,7 +3001,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
svmMem =
|
||||
amd::SvmManager::FindSvmBuffer(*reinterpret_cast<void* const*>(params + desc.offset_));
|
||||
if (!svmMem) {
|
||||
flushCUCaches();
|
||||
addBarrier();
|
||||
// Clear memory dependency state
|
||||
const static bool All = true;
|
||||
memoryDependency().clear(!All);
|
||||
|
||||
@@ -433,7 +433,7 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Returns queue, associated with VirtualGPU
|
||||
Queue& queue(EngineType id) const { return *queues_[id]; }
|
||||
|
||||
void flushCUCaches(bool flushL2 = false) const {
|
||||
void addBarrier(bool flushL2 = false) const {
|
||||
Pal::BarrierInfo barrier = {};
|
||||
barrier.pipePointWaitCount = 1;
|
||||
Pal::HwPipePoint point = Pal::HwPipePostCs;
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle