diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 40de5dcd0c..a8f2f17d04 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -33,7 +33,9 @@ DmaBlitManager::DmaBlitManager(VirtualGPU& gpu, Setup setup) MinSizeForPinnedTransfer(dev().settings().pinnedMinXferSize_), completeOperation_(false), context_(nullptr), - sdmaEngineRetainCount_(0) {} + sdmaEngineRetainCount_(0) { + dev().getSdmaRWMasks(&sdmaEngineReadMask_, &sdmaEngineWriteMask_); + } inline void DmaBlitManager::synchronize() const { if (syncOperation_) { @@ -731,6 +733,8 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, // Check if there a recently used SDMA engine for the stream copyMask = gpu().getLastUsedSdmaEngine(); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask); + copyMask &= (engine == HwQueueEngine::SdmaRead ? + sdmaEngineReadMask_ : sdmaEngineWriteMask_); } if (copyMask == 0) { // Check SDMA engine status diff --git a/rocclr/device/rocm/rocblit.hpp b/rocclr/device/rocm/rocblit.hpp index 0533a11f56..f8b4c1a28b 100644 --- a/rocclr/device/rocm/rocblit.hpp +++ b/rocclr/device/rocm/rocblit.hpp @@ -238,6 +238,8 @@ class DmaBlitManager : public device::HostBlitManager { amd::Context* context_; //!< A dummy context mutable size_t sdmaEngineRetainCount_; //!< Keeps track of memcopies to either get the last //!< used SDMA engine or fetch the new mask + uint32_t sdmaEngineReadMask_; //!< SDMA Engine Read Mask + uint32_t sdmaEngineWriteMask_; //!< SDMA Engine Write Mask private: //! Disable copy constructor diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 2dd8a7bb14..4b451136da 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -3473,6 +3473,12 @@ uint32_t Device::fetchSDMAMask(const device::BlitManager* handle, bool readEngin return (readEngine ? maxSdmaReadMask_ : maxSdmaWriteMask_) & engine; } +// ================================================================================================ +void Device::getSdmaRWMasks(uint32_t* readMask, uint32_t* writeMask) const { + *readMask = maxSdmaReadMask_; + *writeMask = maxSdmaWriteMask_; +} + // ================================================================================================ void Device::resetSDMAMask(const device::BlitManager* handle) const { amd::ScopedLock lock(vgpusAccess()); diff --git a/rocclr/device/rocm/rocdevice.hpp b/rocclr/device/rocm/rocdevice.hpp index eef891f2e5..89dbfe9b60 100644 --- a/rocclr/device/rocm/rocdevice.hpp +++ b/rocclr/device/rocm/rocdevice.hpp @@ -583,7 +583,8 @@ class Device : public NullDevice { void HiddenHeapAlloc(const VirtualGPU& gpu); uint32_t fetchSDMAMask(const device::BlitManager* handle, bool readEngine = true) const; - void resetSDMAMask(const device::BlitManager* handle) const ; + void resetSDMAMask(const device::BlitManager* handle) const; + void getSdmaRWMasks(uint32_t* readMask, uint32_t* writeMask) const; bool isXgmi() const { return isXgmi_; } private: