diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 9532a3c134..d919ae973e 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -2454,8 +2454,12 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const iCmd()->CmdCommentString(buf); } + bool imageBufferWrtBack = false; // Image buffer write back is required + std::vector wrtBackImageBuffer; // Array of images for write back + // Check memory dependency and SVM objects - if (!processMemObjectsHSA(kernel, parameters, nativeMem, ldsSize)) { + if (!processMemObjectsHSA(kernel, parameters, nativeMem, ldsSize, + imageBufferWrtBack, wrtBackImageBuffer)) { LogError("Wrong memory objects!"); return false; } @@ -2573,12 +2577,10 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const } // Check if image buffer write back is required - if (state_.imageBufferWrtBack_) { - // Avoid recursive write back - state_.imageBufferWrtBack_ = false; + if (imageBufferWrtBack) { // Make sure the original kernel execution is done addBarrier(RgpSqqtBarrierReason::MemDependency); - for (const auto imageBuffer : wrtBackImageBuffer_) { + for (const auto imageBuffer : wrtBackImageBuffer) { Memory* buffer = dev().getGpuMemory(imageBuffer->owner()->parent()); amd::Image* image = imageBuffer->owner()->asImage(); amd::Coord3D offs(0); @@ -2587,7 +2589,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const offs, image->getRegion(), true, image->getRowPitch(), image->getSlicePitch()); } - wrtBackImageBuffer_.clear(); } // Perform post dispatch logic for RGP traces @@ -3294,7 +3295,9 @@ void VirtualGPU::profileEvent(EngineType engine, bool type) const { } bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address params, - bool nativeMem, size_t& ldsAddress) { + bool nativeMem, size_t& ldsAddress, + bool& imageBufferWrtBack, + std::vector& wrtBackImageBuffer) { const amd::KernelParameters& kernelParams = kernel.parameters(); // Mark the tracker with a new kernel, @@ -3470,8 +3473,8 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p addVmMemory(imageBuffer->CopyImageBuffer()); // If it's not a read only resource, then runtime has to write back if (!info.readOnly_) { - wrtBackImageBuffer_.push_back(imageBuffer); - state_.imageBufferWrtBack_ = true; + wrtBackImageBuffer.push_back(imageBuffer); + imageBufferWrtBack = true; } } } diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index 0614f1721c..b7ab2df22a 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -226,7 +226,6 @@ class VirtualGPU : public device::VirtualDevice { uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled uint rgpCaptureEnabled_ : 1; //!< RGP capture is enabled in the runtime - uint imageBufferWrtBack_ : 1; //!< Enable image buffer write back }; uint value_; State() : value_(0) {} @@ -584,7 +583,9 @@ class VirtualGPU : public device::VirtualDevice { bool processMemObjectsHSA(const amd::Kernel& kernel, //!< AMD kernel object for execution const_address params, //!< Pointer to the param's store bool nativeMem, //!< Native memory objects - size_t& ldsAddess //!< Returns LDS size, used in the kernel + size_t& ldsAddess, //!< Returns LDS size, used in the kernel + bool& imageBufferWrtBack, //!< Image buffer write back is required + std::vector& wrtBackImageBuffer //!< images for write back ); //! Common function for fill memory used by both svm Fill and non-svm fill @@ -674,7 +675,6 @@ class VirtualGPU : public device::VirtualDevice { Pal::ICmdAllocator* cmdAllocator_; //!< Command buffer allocator Queue* queues_[AllEngines]; //!< HW queues for all engines MemoryRange sdmaRange_; //!< SDMA memory range for write access - std::vector wrtBackImageBuffer_; //!< Array of images for write back void* hostcallBuffer_; //!< Hostcall buffer };