SWDEV-384557 - Enable SDMA query

Change-Id: Ibb0a8d131f799985a4d4adbf753261e58c04157f


[ROCm/clr commit: aa6eb555e2]
This commit is contained in:
Saleel Kudchadker
2023-07-13 09:16:42 -07:00
parent e45838a25c
commit 520b17fb07
3 changed files with 19 additions and 22 deletions
+6 -6
View File
@@ -683,23 +683,23 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
uint32_t copyMask = 0;
uint32_t freeEngineMask = 0;
bool useRegularCopyApi = !DEBUG_CLR_USE_SDMA_QUERY;
bool kUseRegularCopyApi = 0;
HwQueueEngine engine = HwQueueEngine::Unknown;
if ((srcAgent.handle == dev().getCpuAgent().handle) &&
(dstAgent.handle != dev().getCpuAgent().handle)) {
engine = HwQueueEngine::SdmaWrite;
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
} else if ((srcAgent.handle != dev().getCpuAgent().handle) &&
(dstAgent.handle == dev().getCpuAgent().handle)) {
engine = HwQueueEngine::SdmaRead;
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
}
auto wait_events = gpu().Barriers().WaitingSignal(engine);
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
if (!useRegularCopyApi && engine != HwQueueEngine::Unknown) {
if (!kUseRegularCopyApi && engine != HwQueueEngine::Unknown) {
if (copyMask == 0) {
// Check SDMA engine status
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
@@ -723,11 +723,11 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
size[0], wait_events.size(),
wait_events.data(), active, copyEngine, false);
} else {
useRegularCopyApi = true;
kUseRegularCopyApi = true;
}
}
if (engine == HwQueueEngine::Unknown || useRegularCopyApi) {
if (engine == HwQueueEngine::Unknown || kUseRegularCopyApi) {
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
"completion_signal=0x%zx",
+12 -14
View File
@@ -1255,21 +1255,19 @@ bool Device::populateOCLDeviceConstants() {
}
assert(group_segment_size > 0);
if (DEBUG_CLR_USE_SDMA_QUERY) {
// Find SDMA read mask
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(),
&maxSdmaReadMask_)) {
return false;
}
assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read");
// Find SDMA write mask
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(),
&maxSdmaWriteMask_)) {
return false;
}
assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write");
// Find SDMA read mask
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(),
&maxSdmaReadMask_)) {
return false;
}
assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read");
// Find SDMA write mask
if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(),
&maxSdmaWriteMask_)) {
return false;
}
assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write");
info_.localMemSizePerCU_ = group_segment_size;
info_.localMemSize_ = group_segment_size;
+1 -2
View File
@@ -288,12 +288,11 @@ release(size_t, HIP_INITIAL_DM_SIZE, 8 * Mi, \
"Set initial heap size for device malloc.") \
release(bool, HIP_FORCE_DEV_KERNARG, 0, \
"Force device mem for kernel args.") \
release(bool, DEBUG_CLR_USE_SDMA_QUERY, 0, \
"Use SDMA query API to make copy decisions.") \
release(uint, DEBUG_CLR_GRAPH_MAX_AQL_BUFFER_SIZE, 32, \
"Size of AQL buffering queue") \
release(bool, DEBUG_CLR_GRAPH_ENABLE_BUFFERING, false, \
"Enable/Disable graph AQL buffering")
namespace amd {
extern bool IS_HIP;