diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp index f8933edd1a..5cae62145b 100644 --- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp @@ -646,7 +646,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) { return false; } Pal::BufferViewInfo viewInfo = {}; - viewInfo.gpuAddr = memRef_->iMem()->Desc().gpuVirtAddr + offset(); + viewInfo.gpuAddr = vmAddress() + offset(); viewInfo.range = memRef_->iMem()->Desc().size; viewInfo.stride = elementSize(); viewInfo.swizzledFormat.format = format; @@ -760,7 +760,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) { mapCount_++; } Pal::BufferViewInfo viewInfo = {}; - viewInfo.gpuAddr = memRef_->iMem()->Desc().gpuVirtAddr + offset(); + viewInfo.gpuAddr = vmAddress() + offset(); viewInfo.range = memRef_->iMem()->Desc().size; viewInfo.stride = elementSize(); viewInfo.swizzledFormat.format = format; @@ -1058,7 +1058,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) { desc_.cardMemory_ = false; if ((nullptr != params) && (nullptr != params->owner_) && (nullptr != params->owner_->getSvmPtr())) { - params->owner_->setSvmPtr(reinterpret_cast(memRef_->iMem()->Desc().gpuVirtAddr)); + params->owner_->setSvmPtr(reinterpret_cast(vmAddress())); } return true; } @@ -1265,7 +1265,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin, if (gpu.validateSdmaOverlap(*this, dstResource)) { // Note: PAL should insert a NOP into the command buffer for synchronization - gpu.flushCUCaches(); + gpu.addBarrier(); } Pal::ImageLayout imgLayout = {}; diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 0f2c685c51..d8db321394 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -416,7 +416,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor if (maxMemObjectsInQueue_ == 0) { // Flush cache - gpu.flushCUCaches(); + gpu.addBarrier(); return; } @@ -452,7 +452,7 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor if (flushL1Cache) { // Flush cache if (!gpu.profiling()) { - gpu.flushCUCaches(); + gpu.addBarrier(); } // Clear memory dependency state @@ -1985,7 +1985,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const // Note: This a workaround for incorrect results reported with release_mem packet, // when the packet can be processed later after this dispatch and including extra time if (profiling() || state_.profileEnabled_) { - flushCUCaches(); + addBarrier(); } eventEnd(MainEngine, gpuEvent); @@ -2127,7 +2127,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const gpuDefQueue->schedParamIdx_, gpuDefQueue->vqHeader_->aql_slot_num / (DeviceQueueMaskSize * maskGroups_)); const static bool FlushL2 = true; - gpuDefQueue->flushCUCaches(FlushL2); + gpuDefQueue->addBarrier(FlushL2); // Get the address of PM4 template and add write it to params //! @note DMA flush must not occur between patch and the scheduler @@ -2955,7 +2955,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p if (!supportFineGrainedSystem) { return false; } else if (sync) { - flushCUCaches(); + addBarrier(); // Clear memory dependency state const static bool All = true; memoryDependency().clear(!All); @@ -3001,7 +3001,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p svmMem = amd::SvmManager::FindSvmBuffer(*reinterpret_cast(params + desc.offset_)); if (!svmMem) { - flushCUCaches(); + addBarrier(); // Clear memory dependency state const static bool All = true; memoryDependency().clear(!All); diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp index f12cb5d7e5..12ff774012 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp @@ -433,7 +433,7 @@ class VirtualGPU : public device::VirtualDevice { //! Returns queue, associated with VirtualGPU Queue& queue(EngineType id) const { return *queues_[id]; } - void flushCUCaches(bool flushL2 = false) const { + void addBarrier(bool flushL2 = false) const { Pal::BarrierInfo barrier = {}; barrier.pipePointWaitCount = 1; Pal::HwPipePoint point = Pal::HwPipePostCs;