From c4567a9188c5b4cd7378d643f3f2d6f2e62cfc8d Mon Sep 17 00:00:00 2001 From: "GunaShekar, Ajay" Date: Mon, 28 Apr 2025 22:53:43 -0700 Subject: [PATCH] SWDEV-523028 - print PAL failure return values in logs (#81) * print PAL failure return values in logs * dump kernel info incase of PAL failure [ROCm/clr commit: 99ef573399af2245d47b7089ff48e5480c697790] --- projects/clr/rocclr/device/pal/palvirtual.cpp | 37 ++++++++++++------- projects/clr/rocclr/device/pal/palvirtual.hpp | 2 +- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 4a6dc96bbc..15b7bf220a 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -369,24 +369,29 @@ bool VirtualGPU::Queue::flush() { const Settings& settings = gpu_.dev().settings(); if (!settings.alwaysResident_ && palMemRefs_.size() != 0) { - if (Pal::Result::Success != - iDev_->AddGpuMemoryReferences(palMemRefs_.size(), &palMemRefs_[0], iQueue_, - Pal::GpuMemoryRefCantTrim)) { - LogError("PAL failed to make resident resources!"); + Pal::Result result = iDev_->AddGpuMemoryReferences( + palMemRefs_.size(), + &palMemRefs_[0], iQueue_, + Pal::GpuMemoryRefCantTrim); + if (Pal::Result::Success != result) { + LogPrintfError("PAL failed to make resident resources! result: %d", result); return false; } palMemRefs_.clear(); } // Stop commands building - if (Pal::Result::Success != iCmdBuffs_[cmdBufIdSlot_]->End()) { - LogError("PAL failed to finalize a command buffer!"); + Pal::Result result; + result = iCmdBuffs_[cmdBufIdSlot_]->End(); + if (Pal::Result::Success != result) { + LogPrintfError("PAL failed to finalize a command buffer! result: %d", result); return false; } // Reset the fence. PAL will reset OS event - if (Pal::Result::Success != iDev_->ResetFences(1, &iCmdFences_[cmdBufIdSlot_])) { - LogError("PAL failed to reset a fence!"); + result = iDev_->ResetFences(1, &iCmdFences_[cmdBufIdSlot_]); + if (Pal::Result::Success != result) { + LogPrintfError("PAL failed to reset a fence! result:%d", result); return false; } @@ -420,14 +425,16 @@ bool VirtualGPU::Queue::flush() { } } // Submit command buffer to OS - Pal::Result result; if (gpu_.rgpCaptureEna()) { result = gpu_.dev().captureMgr()->TimedQueueSubmit(iQueue_, cmdBufIdCurrent_, submitInfo); } else { result = iQueue_->Submit(submitInfo); } if (Pal::Result::Success != result) { - LogError("PAL failed to submit CMD!"); + LogPrintfError("PAL failed to submit CMD! result:%d", result); + if (GPU_ANALYZE_HANG) { + DumpMemoryReferences(); + } return false; } // Make sure the slot isn't busy @@ -461,15 +468,17 @@ bool VirtualGPU::Queue::flush() { } // Reset command buffer, so CB chunks could be reused - if (Pal::Result::Success != iCmdBuffs_[cmdBufIdSlot_]->Reset(nullptr, false)) { - LogError("PAL failed CB reset!"); + result = iCmdBuffs_[cmdBufIdSlot_]->Reset(nullptr, false); + if (Pal::Result::Success != result) { + LogPrintfError("PAL failed CB reset! result:%d", result); return false; } // Start command buffer building Pal::CmdBufferBuildInfo cmdBuildInfo = {}; cmdBuildInfo.pMemAllocator = &vlAlloc_; - if (Pal::Result::Success != iCmdBuffs_[cmdBufIdSlot_]->Begin(cmdBuildInfo)) { - LogError("PAL failed CB building initialization!"); + result = iCmdBuffs_[cmdBufIdSlot_]->Begin(cmdBuildInfo); + if (Pal::Result::Success != result) { + LogPrintfError("PAL failed CB building initialization! result:%d", result); return false; } diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index 8a21da8367..2d4a123296 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -161,7 +161,7 @@ class VirtualGPU : public device::VirtualDevice { if (Pal::Result::Success == result) { break; } else if ((Pal::Result::NotReady == result) || (Pal::Result::Timeout == result)) { - LogWarning("PAL fence isn't ready!"); + LogPrintfWarning("PAL fence isn't ready! result:%d", result); if (GPU_ANALYZE_HANG) { DumpMemoryReferences(); }