SWDEV-384557 - Use toggle for SDMA query
- Use HIP_USE_SDMA_QUERY env var toggle for new API use. Env var is 0 by default Change-Id: If725a0c41e15f78a1a6c3f47942954fe9240b4db
Этот коммит содержится в:
коммит произвёл
Maneesh Gupta
родитель
8047d8e3e8
Коммит
8d193c32bb
@@ -679,20 +679,23 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
|
||||
uint32_t copyMask = 0;
|
||||
uint32_t freeEngineMask = 0;
|
||||
bool useRegularCopyApi = false;
|
||||
bool useRegularCopyApi = !HIP_USE_SDMA_QUERY;
|
||||
|
||||
HwQueueEngine engine = HwQueueEngine::Unknown;
|
||||
if ((srcAgent.handle == dev().getCpuAgent().handle) &&
|
||||
(dstAgent.handle != dev().getCpuAgent().handle)) {
|
||||
engine = HwQueueEngine::SdmaWrite;
|
||||
copyMask = dev().fetchSDMAMask(this, false);
|
||||
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, false);
|
||||
} else if ((srcAgent.handle != dev().getCpuAgent().handle) &&
|
||||
(dstAgent.handle == dev().getCpuAgent().handle)) {
|
||||
engine = HwQueueEngine::SdmaRead;
|
||||
copyMask = dev().fetchSDMAMask(this, true);
|
||||
copyMask = useRegularCopyApi ? 0 : dev().fetchSDMAMask(this, true);
|
||||
}
|
||||
|
||||
if (engine != HwQueueEngine::Unknown) {
|
||||
auto wait_events = gpu().Barriers().WaitingSignal(engine);
|
||||
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
|
||||
|
||||
if (!useRegularCopyApi && engine != HwQueueEngine::Unknown) {
|
||||
if (copyMask == 0) {
|
||||
// Check SDMA engine status
|
||||
status = hsa_amd_memory_copy_engine_status(dstAgent, srcAgent, &freeEngineMask);
|
||||
@@ -703,8 +706,6 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
}
|
||||
|
||||
if (copyMask != 0 && status == HSA_STATUS_SUCCESS) {
|
||||
auto wait_events = gpu().Barriers().WaitingSignal(engine);
|
||||
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
|
||||
// Copy on the first available free engine if ROCr returns a valid mask
|
||||
hsa_amd_sdma_engine_id_t copyEngine = static_cast<hsa_amd_sdma_engine_id_t>(copyMask);
|
||||
|
||||
@@ -717,33 +718,26 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
|
||||
status = hsa_amd_memory_async_copy_on_engine(dst, dstAgent, src, srcAgent,
|
||||
size[0], wait_events.size(),
|
||||
wait_events.data(), active, copyEngine, false);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
gpu().Barriers().ResetCurrentSignal();
|
||||
}
|
||||
} else {
|
||||
useRegularCopyApi = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (engine == HwQueueEngine::Unknown || useRegularCopyApi) {
|
||||
auto wait_events = gpu().Barriers().WaitingSignal(engine);
|
||||
hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
|
||||
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
|
||||
"completion_signal=0x%zx",
|
||||
dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0,
|
||||
active.handle);
|
||||
"HSA Async Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
|
||||
"completion_signal=0x%zx",
|
||||
dst, src, size[0], (wait_events.size() != 0) ? wait_events[0].handle : 0,
|
||||
active.handle);
|
||||
|
||||
status = hsa_amd_memory_async_copy(dst, dstAgent, src, srcAgent,
|
||||
size[0], wait_events.size(), wait_events.data(), active);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
gpu().Barriers().ResetCurrentSignal();
|
||||
}
|
||||
}
|
||||
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
gpu().addSystemScope();
|
||||
} else {
|
||||
gpu().Barriers().ResetCurrentSignal();
|
||||
LogPrintfError("HSA copy failed with code %d, falling to Blit copy", status);
|
||||
}
|
||||
|
||||
|
||||
@@ -285,7 +285,9 @@ release(bool, HIPRTC_USE_RUNTIME_UNBUNDLER, false, \
|
||||
release(size_t, HIP_INITIAL_DM_SIZE, 8 * Mi, \
|
||||
"Set initial heap size for device malloc.") \
|
||||
release(bool, HIP_FORCE_DEV_KERNARG, 0, \
|
||||
"Force device mem for kernel args")
|
||||
"Force device mem for kernel args") \
|
||||
release(bool, HIP_USE_SDMA_QUERY, 0, \
|
||||
"Use SDMA query API to make copy decisions")
|
||||
|
||||
namespace amd {
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user