From aa6eb555e267fffdb81fac2ea16a103ab849f2b5 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Thu, 13 Jul 2023 09:16:42 -0700 Subject: [PATCH] SWDEV-384557 - Enable SDMA query Change-Id: Ibb0a8d131f799985a4d4adbf753261e58c04157f --- rocclr/device/rocm/rocblit.cpp | 12 ++++++------ rocclr/device/rocm/rocdevice.cpp | 26 ++++++++++++-------------- rocclr/utils/flags.hpp | 3 +-- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 1cdd232dac..caabbf7469 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -683,23 +683,23 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, uint32_t copyMask = 0; uint32_t freeEngineMask = 0; - bool useRegularCopyApi = !DEBUG_CLR_USE_SDMA_QUERY; + bool kUseRegularCopyApi = 0; HwQueueEngine engine = HwQueueEngine::Unknown; if ((srcAgent.handle == dev().getCpuAgent().handle) && (dstAgent.handle != dev().getCpuAgent().handle)) { engine = HwQueueEngine::SdmaWrite; - copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false); + copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false); } else if ((srcAgent.handle != dev().getCpuAgent().handle) && (dstAgent.handle == dev().getCpuAgent().handle)) { engine = HwQueueEngine::SdmaRead; - copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true); + copyMask = kUseRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true); } auto wait_events = gpu().Barriers().WaitingSignal(engine); hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp()); - if (!useRegularCopyApi && engine != HwQueueEngine::Unknown) { + if (!kUseRegularCopyApi && engine != HwQueueEngine::Unknown) { if (copyMask == 0) { // Check SDMA engine status status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask); @@ -723,11 +723,11 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory, size[0], wait_events.size(), wait_events.data(), active, copyEngine, false); } else { - useRegularCopyApi = true; + kUseRegularCopyApi = true; } } - if (engine == HwQueueEngine::Unknown || useRegularCopyApi) { + if (engine == HwQueueEngine::Unknown || kUseRegularCopyApi) { ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, " "completion_signal=0x%zx", diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 8897edafff..3a9de22ac8 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -1255,21 +1255,19 @@ bool Device::populateOCLDeviceConstants() { } assert(group_segment_size > 0); - if (DEBUG_CLR_USE_SDMA_QUERY) { - // Find SDMA read mask - if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(), - &maxSdmaReadMask_)) { - return false; - } - assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read"); - - // Find SDMA write mask - if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(), - &maxSdmaWriteMask_)) { - return false; - } - assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write"); + // Find SDMA read mask + if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getCpuAgent(), getBackendDevice(), + &maxSdmaReadMask_)) { + return false; } + assert(maxSdmaReadMask_ > 0 && "No SDMA engines available for Read"); + + // Find SDMA write mask + if (HSA_STATUS_SUCCESS != hsa_amd_memory_copy_engine_status(getBackendDevice(), getCpuAgent(), + &maxSdmaWriteMask_)) { + return false; + } + assert(maxSdmaWriteMask_ > 0 && "No SDMA engines available for Write"); info_.localMemSizePerCU_ = group_segment_size; info_.localMemSize_ = group_segment_size; diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index 3fdd0b9765..3baae3e590 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -288,12 +288,11 @@ release(size_t, HIP_INITIAL_DM_SIZE, 8 * Mi, \ "Set initial heap size for device malloc.") \ release(bool, HIP_FORCE_DEV_KERNARG, 0, \ "Force device mem for kernel args.") \ -release(bool, DEBUG_CLR_USE_SDMA_QUERY, 0, \ - "Use SDMA query API to make copy decisions.") \ release(uint, DEBUG_CLR_GRAPH_MAX_AQL_BUFFER_SIZE, 32, \ "Size of AQL buffering queue") \ release(bool, DEBUG_CLR_GRAPH_ENABLE_BUFFERING, false, \ "Enable/Disable graph AQL buffering") + namespace amd { extern bool IS_HIP;