SWDEV-384557 - Enable SDMA query
Change-Id: Ibb0a8d131f799985a4d4adbf753261e58c04157f
[ROCm/clr commit: aa6eb555e2]
This commit is contained in:
@@ -683,23 +683,23 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
|
||||
uint32_t copyMask = 0;
|
||||
uint32_t freeEngineMask = 0;
|
||||
bool useRegularCopyApi = !DEBUG_CLR_USE_SDMA_QUERY;
|
||||
bool kUseRegularCopyApi = 0;
|
||||
|
||||
HwQueueEngine engine = HwQueueEngine::Unknown;
|
||||
if ((srcAgent.handle == dev().getCpuAgent().handle) &&
|
||||
(dstAgent.handle != dev().getCpuAgent().handle)) {
|
||||
engine = HwQueueEngine::SdmaWrite;
|
||||
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
|
||||
copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
|
||||
} else if ((srcAgent.handle != dev().getCpuAgent().handle) &&
|
||||
(dstAgent.handle == dev().getCpuAgent().handle)) {
|
||||
engine = HwQueueEngine::SdmaRead;
|
||||
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
|
||||
copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
|
||||
}
|
||||
|
||||
auto wait_events = gpu().Barriers().WaitingSignal(engine);
|
||||
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
|
||||
|
||||
if (!useRegularCopyApi && engine != HwQueueEngine::Unknown) {
|
||||
if (!kUseRegularCopyApi && engine != HwQueueEngine::Unknown) {
|
||||
if (copyMask == 0) {
|
||||
// Check SDMA engine status
|
||||
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
|
||||
@@ -723,11 +723,11 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
size[0], wait_events.size(),
|
||||
wait_events.data(), active, copyEngine, false);
|
||||
} else {
|
||||
useRegularCopyApi = true;
|
||||
kUseRegularCopyApi = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (engine == HwQueueEngine::Unknown || useRegularCopyApi) {
|
||||
if (engine == HwQueueEngine::Unknown || kUseRegularCopyApi) {
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
|
||||
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
|
||||
"completion_signal=0x%zx",
|
||||
|
||||
@@ -1255,21 +1255,19 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
assert(group_segment_size > 0);
|
||||
|
||||
if (DEBUG_CLR_USE_SDMA_QUERY) {
|
||||
// Find SDMA read mask
|
||||
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(),
|
||||
&maxSdmaReadMask_)) {
|
||||
return false;
|
||||
}
|
||||
assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read");
|
||||
|
||||
// Find SDMA write mask
|
||||
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(),
|
||||
&maxSdmaWriteMask_)) {
|
||||
return false;
|
||||
}
|
||||
assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write");
|
||||
// Find SDMA read mask
|
||||
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(),
|
||||
&maxSdmaReadMask_)) {
|
||||
return false;
|
||||
}
|
||||
assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read");
|
||||
|
||||
// Find SDMA write mask
|
||||
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(),
|
||||
&maxSdmaWriteMask_)) {
|
||||
return false;
|
||||
}
|
||||
assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write");
|
||||
|
||||
info_.localMemSizePerCU_ = group_segment_size;
|
||||
info_.localMemSize_ = group_segment_size;
|
||||
|
||||
@@ -288,12 +288,11 @@ release(size_t, HIP_INITIAL_DM_SIZE, 8 * Mi, \
|
||||
"Set initial heap size for device malloc.") \
|
||||
release(bool, HIP_FORCE_DEV_KERNARG, 0, \
|
||||
"Force device mem for kernel args.") \
|
||||
release(bool, DEBUG_CLR_USE_SDMA_QUERY, 0, \
|
||||
"Use SDMA query API to make copy decisions.") \
|
||||
release(uint, DEBUG_CLR_GRAPH_MAX_AQL_BUFFER_SIZE, 32, \
|
||||
"Size of AQL buffering queue") \
|
||||
release(bool, DEBUG_CLR_GRAPH_ENABLE_BUFFERING, false, \
|
||||
"Enable/Disable graph AQL buffering")
|
||||
|
||||
namespace amd {
|
||||
|
||||
extern bool IS_HIP;
|
||||
|
||||
Reference in New Issue
Block a user