SWDEV-398151 - Partly relax static engine allocation

Change-Id: I4903b51a34b597a2e84d771b52cf629f877dba05


[ROCm/clr commit: 0b475284e9]
This commit is contained in:
Saleel Kudchadker
2023-05-08 16:43:29 -07:00
rodzic cb3642e26e
commit 0141e6809f
3 zmienionych plików z 14 dodań i 21 usunięć
@@ -692,15 +692,10 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
}
if (engine != HwQueueEngine::Unknown) {
if (copyMask == Device::kSkipQueryStatus) {
// Do not query engine status or take copy_on_engine path
status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
if (copyMask == 0) {
// Check SDMA engine status
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Query copy engine status %x, free_engine mask %x",
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Query copy engine status %x, free_engine mask 0x%x",
status, freeEngineMask);
// Return a mask with the rightmost bit set
copyMask = freeEngineMask - (freeEngineMask & (freeEngineMask - 1));
@@ -729,7 +724,7 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
auto wait_events = gpu().Barriers().WaitingSignal(engine);
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
"HSA Async Blit Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
"completion_signal=0x%zx",
dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0,
active.handle);
@@ -1243,17 +1243,17 @@ bool Device::populateOCLDeviceConstants() {
// Find SDMA read mask
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(),
&maxSdmaReadMask)) {
&maxSdmaReadMask_)) {
return false;
}
assert(maxSdmaReadMask > 0 && "No SDMA engines available for Read");
assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read");
// Find SDMA write mask
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(),
&maxSdmaWriteMask)) {
&maxSdmaWriteMask_)) {
return false;
}
assert(maxSdmaWriteMask > 0 && "No SDMA engines available for Write");
assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write");
info_.localMemSizePerCU_ = group_segment_size;
info_.localMemSize_ = group_segment_size;
@@ -1657,8 +1657,12 @@ bool Device::populateOCLDeviceConstants() {
LogError("HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS query failed.");
}
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HMM support: %d, xnack: %d, direct host access: %d",
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Gfx Major/Minor/Stepping: %d/%d/%d", isa().versionMajor(),
isa().versionMinor(), isa().versionStepping());
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HMM support: %d, XNACK: %d, Direct host access: %d",
info_.hmmSupported_, info_.hmmCpuMemoryAccessible_, info_.hmmDirectHostAccess_);
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Max SDMA Read Mask: 0x%x, Max SDMA Write Mask: 0x%x",
maxSdmaReadMask_, maxSdmaWriteMask_);
info_.globalCUMask_ = {};
info_.virtualMemoryManagement_ = false;
@@ -3338,12 +3342,7 @@ uint32_t Device::fetchSDMAMask(const device::BlitManager* handle, bool readEngin
}
}
uint32_t mask = (readEngine ? maxSdmaReadMask : maxSdmaWriteMask) & engine;
if (engine != 0 && mask == 0 ) {
return kSkipQueryStatus;
} else {
return mask;
}
return (readEngine ? maxSdmaReadMask_ : maxSdmaWriteMask_) & engine;
}
// ================================================================================================
@@ -624,13 +624,12 @@ class Device : public NullDevice {
std::vector<std::map<hsa_queue_t*, QueueInfo>> queueWithCUMaskPool_;
//! Read and Write mask for device<->host
uint32_t maxSdmaReadMask;
uint32_t maxSdmaWriteMask;
uint32_t maxSdmaReadMask_;
uint32_t maxSdmaWriteMask_;
//! Map of SDMA engineId<->stream
mutable std::map<uint32_t, const device::BlitManager*> engineAssignMap_;
public:
constexpr static uint32_t kSkipQueryStatus = 1 << 31;
std::atomic<uint> numOfVgpus_; //!< Virtual gpu unique index
//! enum for keeping the total and available queue priorities