SWDEV-398151 - Partly relax static engine allocation
Change-Id: I4903b51a34b597a2e84d771b52cf629f877dba05
[ROCm/clr commit: 0b475284e9]
This commit is contained in:
@@ -692,15 +692,10 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
}
|
||||
|
||||
if (engine != HwQueueEngine::Unknown) {
|
||||
if (copyMask == Device::kSkipQueryStatus) {
|
||||
// Do not query engine status or take copy_on_engine path
|
||||
status = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
if (copyMask == 0) {
|
||||
// Check SDMA engine status
|
||||
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Query copy engine status %x, free_engine mask %x",
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Query copy engine status %x, free_engine mask 0x%x",
|
||||
status, freeEngineMask);
|
||||
// Return a mask with the rightmost bit set
|
||||
copyMask = freeEngineMask - (freeEngineMask & (freeEngineMask - 1));
|
||||
@@ -729,7 +724,7 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
auto wait_events = gpu().Barriers().WaitingSignal(engine);
|
||||
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
|
||||
"HSA Async Blit Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
|
||||
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
|
||||
"completion_signal=0x%zx",
|
||||
dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0,
|
||||
active.handle);
|
||||
|
||||
@@ -1243,17 +1243,17 @@ bool Device::populateOCLDeviceConstants() {
|
||||
|
||||
// Find SDMA read mask
|
||||
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(),
|
||||
&maxSdmaReadMask)) {
|
||||
&maxSdmaReadMask_)) {
|
||||
return false;
|
||||
}
|
||||
assert(maxSdmaReadMask > 0 && "No SDMA engines available for Read");
|
||||
assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read");
|
||||
|
||||
// Find SDMA write mask
|
||||
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(),
|
||||
&maxSdmaWriteMask)) {
|
||||
&maxSdmaWriteMask_)) {
|
||||
return false;
|
||||
}
|
||||
assert(maxSdmaWriteMask > 0 && "No SDMA engines available for Write");
|
||||
assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write");
|
||||
|
||||
info_.localMemSizePerCU_ = group_segment_size;
|
||||
info_.localMemSize_ = group_segment_size;
|
||||
@@ -1657,8 +1657,12 @@ bool Device::populateOCLDeviceConstants() {
|
||||
LogError("HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS query failed.");
|
||||
}
|
||||
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HMM support: %d, xnack: %d, direct host access: %d",
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Gfx Major/Minor/Stepping: %d/%d/%d", isa().versionMajor(),
|
||||
isa().versionMinor(), isa().versionStepping());
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HMM support: %d, XNACK: %d, Direct host access: %d",
|
||||
info_.hmmSupported_, info_.hmmCpuMemoryAccessible_, info_.hmmDirectHostAccess_);
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Max SDMA Read Mask: 0x%x, Max SDMA Write Mask: 0x%x",
|
||||
maxSdmaReadMask_, maxSdmaWriteMask_);
|
||||
|
||||
info_.globalCUMask_ = {};
|
||||
info_.virtualMemoryManagement_ = false;
|
||||
@@ -3338,12 +3342,7 @@ uint32_t Device::fetchSDMAMask(const device::BlitManager* handle, bool readEngin
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t mask = (readEngine ? maxSdmaReadMask : maxSdmaWriteMask) & engine;
|
||||
if (engine != 0 && mask == 0 ) {
|
||||
return kSkipQueryStatus;
|
||||
} else {
|
||||
return mask;
|
||||
}
|
||||
return (readEngine ? maxSdmaReadMask_ : maxSdmaWriteMask_) & engine;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
|
||||
@@ -624,13 +624,12 @@ class Device : public NullDevice {
|
||||
std::vector<std::map<hsa_queue_t*, QueueInfo>> queueWithCUMaskPool_;
|
||||
|
||||
//! Read and Write mask for device<->host
|
||||
uint32_t maxSdmaReadMask;
|
||||
uint32_t maxSdmaWriteMask;
|
||||
uint32_t maxSdmaReadMask_;
|
||||
uint32_t maxSdmaWriteMask_;
|
||||
//! Map of SDMA engineId<->stream
|
||||
mutable std::map<uint32_t, const device::BlitManager*> engineAssignMap_;
|
||||
|
||||
public:
|
||||
constexpr static uint32_t kSkipQueryStatus = 1 << 31;
|
||||
std::atomic<uint> numOfVgpus_; //!< Virtual gpu unique index
|
||||
|
||||
//! enum for keeping the total and available queue priorities
|
||||
|
||||
Reference in New Issue
Block a user