From 0141e6809f6cfbf3e803d321f341a8d13c4d148e Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Mon, 8 May 2023 16:43:29 -0700 Subject: [PATCH] SWDEV-398151 - Partly relax static engine allocation Change-Id: I4903b51a34b597a2e84d771b52cf629f877dba05 [ROCm/clr commit: 0b475284e93fae11979d892a18fee90e74f7131c] --- projects/clr/rocclr/device/rocm/rocblit.cpp | 9 ++------ projects/clr/rocclr/device/rocm/rocdevice.cpp | 21 +++++++++---------- projects/clr/rocclr/device/rocm/rocdevice.hpp | 5 ++--- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index 1c0f8741ef..7284ad1b09 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -692,15 +692,10 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, } if (engine != HwQueueEngine::Unknown) { - if (copyMask == Device::kSkipQueryStatus) { - // Do not query engine status or take copy_on_engine path - status = HSA_STATUS_ERROR_OUT_OF_RESOURCES; - } - if (copyMask == 0) { // Check SDMA engine status status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask); - ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Query copy engine status %x, free_engine mask %x", + ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Query copy engine status %x, free_engine mask 0x%x", status, freeEngineMask); // Return a mask with the rightmost bit set copyMask = freeEngineMask - (freeEngineMask & (freeEngineMask - 1)); @@ -729,7 +724,7 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, auto wait_events = gpu().Barriers().WaitingSignal(engine); hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp()); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, - "HSA Async Blit Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, " + "HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, " "completion_signal=0x%zx", dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index bb791d4c6f..f4c2ebc766 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -1243,17 +1243,17 @@ bool Device::populateOCLDeviceConstants() { // Find SDMA read mask if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(), - &maxSdmaReadMask)) { + &maxSdmaReadMask_)) { return false; } - assert(maxSdmaReadMask > 0 && "No SDMA engines available for Read"); + assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read"); // Find SDMA write mask if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(), - &maxSdmaWriteMask)) { + &maxSdmaWriteMask_)) { return false; } - assert(maxSdmaWriteMask > 0 && "No SDMA engines available for Write"); + assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write"); info_.localMemSizePerCU_ = group_segment_size; info_.localMemSize_ = group_segment_size; @@ -1657,8 +1657,12 @@ bool Device::populateOCLDeviceConstants() { LogError("HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS query failed."); } - ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HMM support: %d, xnack: %d, direct host access: %d", + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Gfx Major/Minor/Stepping: %d/%d/%d", isa().versionMajor(), + isa().versionMinor(), isa().versionStepping()); + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HMM support: %d, XNACK: %d, Direct host access: %d", info_.hmmSupported_, info_.hmmCpuMemoryAccessible_, info_.hmmDirectHostAccess_); + ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Max SDMA Read Mask: 0x%x, Max SDMA Write Mask: 0x%x", + maxSdmaReadMask_, maxSdmaWriteMask_); info_.globalCUMask_ = {}; info_.virtualMemoryManagement_ = false; @@ -3338,12 +3342,7 @@ uint32_t Device::fetchSDMAMask(const device::BlitManager* handle, bool readEngin } } - uint32_t mask = (readEngine ? maxSdmaReadMask : maxSdmaWriteMask) & engine; - if (engine != 0 && mask == 0 ) { - return kSkipQueryStatus; - } else { - return mask; - } + return (readEngine ? maxSdmaReadMask_ : maxSdmaWriteMask_) & engine; } // ================================================================================================ diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 10a02b9b3d..e23c25a1e3 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -624,13 +624,12 @@ class Device : public NullDevice { std::vector> queueWithCUMaskPool_; //! Read and Write mask for device<->host - uint32_t maxSdmaReadMask; - uint32_t maxSdmaWriteMask; + uint32_t maxSdmaReadMask_; + uint32_t maxSdmaWriteMask_; //! Map of SDMA engineId<->stream mutable std::map engineAssignMap_; public: - constexpr static uint32_t kSkipQueryStatus = 1 << 31; std::atomic numOfVgpus_; //!< Virtual gpu unique index //! enum for keeping the total and available queue priorities