From 9e8859636e05d69bfb3e71a48647e7196e1121fb Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Mon, 10 Mar 2025 14:51:25 +0000 Subject: [PATCH] rocr: Workaround for SDMA POLL_REGMEM on gfx9.0 Poll the dependent signals twice on all gfx9.0 GPUs except gfx90a. This is needed as a work-around for a rare issue where SDMA_POLL_REGMEM may return before the memory is actually cleared. [ROCm/ROCR-Runtime commit: 6903a41b1d08783e137e74ff23a5beec3820d2c4] --- .../core/runtime/amd_blit_sdma.cpp | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index b3ba00454a..0e212b20b6 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -301,6 +301,15 @@ hsa_status_t BlitSdma: num_poll_command++; } } + + // Workaround for rare-issue on gfx908 where SDMA_OP_POLL_REGMEM returns before + // polled memory is cleared + static bool doublePoll = agent_->supported_isas()[0]->GetMajorVersion() == 9 && + agent_->supported_isas()[0]->GetMinorVersion() == 0 && + agent_->supported_isas()[0]->GetStepping() != 10; + if (doublePoll) + num_poll_command *= 2; + const uint32_t total_poll_command_size = (num_poll_command * poll_command_size_); @@ -386,12 +395,26 @@ hsa_status_t BlitSdma: command_addr += poll_command_size_; bytes_written_[wrapped_index] = prior_bytes; wrapped_index += poll_command_size_; + + if (doublePoll) { + BuildPollCommand(command_addr, &signal_addr[1], 0); + command_addr += poll_command_size_; + bytes_written_[wrapped_index] = prior_bytes; + wrapped_index += poll_command_size_; + } } // Then wait for the lower 32 bits to 0. BuildPollCommand(command_addr, &signal_addr[0], 0); command_addr += poll_command_size_; bytes_written_[wrapped_index] = prior_bytes; wrapped_index += poll_command_size_; + + if (doublePoll) { + BuildPollCommand(command_addr, &signal_addr[0], 0); + command_addr += poll_command_size_; + bytes_written_[wrapped_index] = prior_bytes; + wrapped_index += poll_command_size_; + } } }