SWDEV-384557 - Use toggle for SDMA query

- Use HIP_USE_SDMA_QUERY env var toggle for new API use. Env var is 0 by
default

Change-Id: If725a0c41e15f78a1a6c3f47942954fe9240b4db
Этот коммит содержится в:
Saleel Kudchadker
2023-06-14 06:07:20 +00:00
коммит произвёл Maneesh Gupta
родитель 8047d8e3e8
Коммит 8d193c32bb
2 изменённых файлов: 15 добавлений и 19 удалений
+12 -18
Просмотреть файл
@@ -679,20 +679,23 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
uint32_t copyMask = 0;
uint32_t freeEngineMask = 0;
bool useRegularCopyApi = false;
bool useRegularCopyApi = !HIP_USE_SDMA_QUERY;
HwQueueEngine engine = HwQueueEngine::Unknown;
if ((srcAgent.handle == dev().getCpuAgent().handle) &&
(dstAgent.handle != dev().getCpuAgent().handle)) {
engine = HwQueueEngine::SdmaWrite;
copyMask = dev().fetchSDMAMask(this, false);
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
} else if ((srcAgent.handle != dev().getCpuAgent().handle) &&
(dstAgent.handle == dev().getCpuAgent().handle)) {
engine = HwQueueEngine::SdmaRead;
copyMask = dev().fetchSDMAMask(this, true);
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
}
if (engine != HwQueueEngine::Unknown) {
auto wait_events = gpu().Barriers().WaitingSignal(engine);
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
if (!useRegularCopyApi && engine != HwQueueEngine::Unknown) {
if (copyMask == 0) {
// Check SDMA engine status
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
@@ -703,8 +706,6 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
}
if (copyMask != 0 && status == HSA_STATUS_SUCCESS) {
auto wait_events = gpu().Barriers().WaitingSignal(engine);
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
// Copy on the first available free engine if ROCr returns a valid mask
hsa_amd_sdma_engine_id_t copyEngine = static_cast<hsa_amd_sdma_engine_id_t>(copyMask);
@@ -717,33 +718,26 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
status = hsa_amd_memory_async_copy_on_engine(dst, dstAgent, src, srcAgent,
size[0], wait_events.size(),
wait_events.data(), active, copyEngine, false);
if (status != HSA_STATUS_SUCCESS) {
gpu().Barriers().ResetCurrentSignal();
}
} else {
useRegularCopyApi = true;
}
}
if (engine == HwQueueEngine::Unknown || useRegularCopyApi) {
auto wait_events = gpu().Barriers().WaitingSignal(engine);
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
"completion_signal=0x%zx",
dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0,
active.handle);
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
"completion_signal=0x%zx",
dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0,
active.handle);
status = hsa_amd_memory_async_copy(dst, dstAgent, src, srcAgent,
size[0], wait_events.size(), wait_events.data(), active);
if (status != HSA_STATUS_SUCCESS) {
gpu().Barriers().ResetCurrentSignal();
}
}
if (status == HSA_STATUS_SUCCESS) {
gpu().addSystemScope();
} else {
gpu().Barriers().ResetCurrentSignal();
LogPrintfError("HSA copy failed with code %d, falling to Blit copy", status);
}
+3 -1
Просмотреть файл
@@ -285,7 +285,9 @@ release(bool, HIPRTC_USE_RUNTIME_UNBUNDLER, false, \
release(size_t, HIP_INITIAL_DM_SIZE, 8 * Mi, \
"Set initial heap size for device malloc.") \
release(bool, HIP_FORCE_DEV_KERNARG, 0, \
"Force device mem for kernel args")
"Force device mem for kernel args") \
release(bool, HIP_USE_SDMA_QUERY, 0, \
"Use SDMA query API to make copy decisions")
namespace amd {