From 3f0bcf7834f0e89ce9fe901d357de950c32a4b01 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Fri, 5 Apr 2024 02:02:55 +0000 Subject: [PATCH] SWDEV-301667 - Fix SDMA mask reuse If we are using the mask returned by getLastUsedSdmaEngine() then we need to apply the SDMA Read/Write mask to it before using with HSA copy_on_engine API. Change-Id: I6e5dc6c187eeb3c61ee159e9d2a0fa7b4737c06e --- rocclr/device/rocm/rocblit.cpp | 6 +++++- rocclr/device/rocm/rocblit.hpp | 2 ++ rocclr/device/rocm/rocdevice.cpp | 6 ++++++ rocclr/device/rocm/rocdevice.hpp | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 40de5dcd0c..a8f2f17d04 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -33,7 +33,9 @@ DmaBlitManager::DmaBlitManager(VirtualGPU& gpu, Setup setup) MinSizeForPinnedTransfer(dev().settings().pinnedMinXferSize_), completeOperation_(false), context_(nullptr), - sdmaEngineRetainCount_(0) {} + sdmaEngineRetainCount_(0) { + dev().getSdmaRWMasks(&sdmaEngineReadMask_, &sdmaEngineWriteMask_); + } inline void DmaBlitManager::synchronize() const { if (syncOperation_) { @@ -731,6 +733,8 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, // Check if there a recently used SDMA engine for the stream copyMask = gpu().getLastUsedSdmaEngine(); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask); + copyMask &= (engine == HwQueueEngine::SdmaRead ? + sdmaEngineReadMask_ : sdmaEngineWriteMask_); } if (copyMask == 0) { // Check SDMA engine status diff --git a/rocclr/device/rocm/rocblit.hpp b/rocclr/device/rocm/rocblit.hpp index 0533a11f56..f8b4c1a28b 100644 --- a/rocclr/device/rocm/rocblit.hpp +++ b/rocclr/device/rocm/rocblit.hpp @@ -238,6 +238,8 @@ class DmaBlitManager : public device::HostBlitManager { amd::Context* context_; //!< A dummy context mutable size_t sdmaEngineRetainCount_; //!< Keeps track of memcopies to either get the last //!< used SDMA engine or fetch the new mask + uint32_t sdmaEngineReadMask_; //!< SDMA Engine Read Mask + uint32_t sdmaEngineWriteMask_; //!< SDMA Engine Write Mask private: //! Disable copy constructor diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 2dd8a7bb14..4b451136da 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -3473,6 +3473,12 @@ uint32_t Device::fetchSDMAMask(const device::BlitManager* handle, bool readEngin return (readEngine ? maxSdmaReadMask_ : maxSdmaWriteMask_) & engine; } +// ================================================================================================ +void Device::getSdmaRWMasks(uint32_t* readMask, uint32_t* writeMask) const { + *readMask = maxSdmaReadMask_; + *writeMask = maxSdmaWriteMask_; +} + // ================================================================================================ void Device::resetSDMAMask(const device::BlitManager* handle) const { amd::ScopedLock lock(vgpusAccess()); diff --git a/rocclr/device/rocm/rocdevice.hpp b/rocclr/device/rocm/rocdevice.hpp index eef891f2e5..89dbfe9b60 100644 --- a/rocclr/device/rocm/rocdevice.hpp +++ b/rocclr/device/rocm/rocdevice.hpp @@ -583,7 +583,8 @@ class Device : public NullDevice { void HiddenHeapAlloc(const VirtualGPU& gpu); uint32_t fetchSDMAMask(const device::BlitManager* handle, bool readEngine = true) const; - void resetSDMAMask(const device::BlitManager* handle) const ; + void resetSDMAMask(const device::BlitManager* handle) const; + void getSdmaRWMasks(uint32_t* readMask, uint32_t* writeMask) const; bool isXgmi() const { return isXgmi_; } private: