SWDEV-436796 - Enable device memory for kernel arguments

Extra CPU read back will be performed before every submission to make sure
previous writes over PCIE reached GPU. HDP flush is done by CP.

Change-Id: I402d28ca26c8cee4a3920feb3599af8c285d0889
This commit is contained in:
German
2023-12-08 19:02:50 -05:00
zatwierdzone przez German Andryeyev
rodzic c8b3253a24
commit cfc07c88ee
3 zmienionych plików z 17 dodań i 7 usunięć
+4
Wyświetl plik
@@ -65,6 +65,10 @@ class ManagedBuffer : public amd::EmbeddedObject {
//! Returns VirtualGPU object this managed resource associated
VirtualGPU& gpu() const { return gpu_; }
void CpuReadBack() const {
volatile auto tmp = *reinterpret_cast<uint64_t*>(pool_[activeBuffer_].buf->data());
}
private:
struct TimeStampedBuffer {
Memory* buf;
+1 -1
Wyświetl plik
@@ -143,7 +143,7 @@ Settings::Settings() {
alwaysResident_ = amd::IS_HIP ? true : false;
prepinnedMinSize_ = 0;
cpDmaCopySizeMax_ = GPU_CP_DMA_COPY_SIZE * Ki;
useDeviceKernelArg_ = flagIsDefault(HIP_FORCE_DEV_KERNARG) ? false : HIP_FORCE_DEV_KERNARG;
useDeviceKernelArg_ = flagIsDefault(HIP_FORCE_DEV_KERNARG) ? true : HIP_FORCE_DEV_KERNARG;
limit_blit_wg_ = 16;
}
+12 -6
Wyświetl plik
@@ -370,8 +370,9 @@ void VirtualGPU::Queue::addCmdDoppRef(Pal::IGpuMemory* iMem, bool lastDoppCmd, b
// ================================================================================================
bool VirtualGPU::Queue::flush() {
amd::ScopedLock l(lock_);
const Settings& settings = gpu_.dev().settings();
if (!gpu_.dev().settings().alwaysResident_ && palMemRefs_.size() != 0) {
if (!settings.alwaysResident_ && palMemRefs_.size() != 0) {
if (Pal::Result::Success !=
iDev_->AddGpuMemoryReferences(palMemRefs_.size(), &palMemRefs_[0], iQueue_,
Pal::GpuMemoryRefCantTrim)) {
@@ -410,10 +411,15 @@ bool VirtualGPU::Queue::flush() {
submitInfo.fenceCount = 1;
submitInfo.ppFences = &iCmdFences_[cmdBufIdSlot_];
if (amd::IS_HIP) {
// HIP disables per resource tracking, because the app may embed SVM ptr into other buffers.
// Force CPU sync if there are pending operations on SDMA, until OS fences will be added
if (iQueue_->Type() == Pal::QueueTypeCompute) {
if (iQueue_->Type() == Pal::QueueTypeCompute) {
if (settings.useDeviceKernelArg_) {
// If runtime uses device memory for kernel arguments, then perform a CPU read back on
// submission. That will make sure NBIO puches all previous CPU write requests through PCIE
gpu_.managedBuffer().CpuReadBack();
}
if (amd::IS_HIP) {
// HIP disables per resource tracking, because the app may embed SVM ptr into other buffers.
// Force CPU sync if there are pending operations on SDMA, until OS fences will be added
gpu_.WaitForIdleSdma();
}
}
@@ -487,7 +493,7 @@ bool VirtualGPU::Queue::flush() {
}
}
}
if (!gpu_.dev().settings().alwaysResident_ && palMems_.size() != 0) {
if (!settings.alwaysResident_ && palMems_.size() != 0) {
iDev_->RemoveGpuMemoryReferences(palMems_.size(), &palMems_[0], iQueue_);
palMems_.clear();
}