rocr: Workaround for SDMA POLL_REGMEM on gfx9.0

Poll the dependent signals twice on all gfx9.0 GPUs except gfx90a.
This is needed as a work-around for a rare issue where SDMA_POLL_REGMEM
may return before the memory is actually cleared.


[ROCm/ROCR-Runtime commit: 6903a41b1d]
Этот коммит содержится в:
David Yat Sin
2025-03-10 14:51:25 +00:00
коммит произвёл Yat Sin, David
родитель ecb119aec3
Коммит 9e8859636e
+23
Просмотреть файл
@@ -301,6 +301,15 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
num_poll_command++;
}
}
// Workaround for rare-issue on gfx908 where SDMA_OP_POLL_REGMEM returns before
// polled memory is cleared
static bool doublePoll = agent_->supported_isas()[0]->GetMajorVersion() == 9 &&
agent_->supported_isas()[0]->GetMinorVersion() == 0 &&
agent_->supported_isas()[0]->GetStepping() != 10;
if (doublePoll)
num_poll_command *= 2;
const uint32_t total_poll_command_size =
(num_poll_command * poll_command_size_);
@@ -386,12 +395,26 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
command_addr += poll_command_size_;
bytes_written_[wrapped_index] = prior_bytes;
wrapped_index += poll_command_size_;
if (doublePoll) {
BuildPollCommand(command_addr, &signal_addr[1], 0);
command_addr += poll_command_size_;
bytes_written_[wrapped_index] = prior_bytes;
wrapped_index += poll_command_size_;
}
}
// Then wait for the lower 32 bits to 0.
BuildPollCommand(command_addr, &signal_addr[0], 0);
command_addr += poll_command_size_;
bytes_written_[wrapped_index] = prior_bytes;
wrapped_index += poll_command_size_;
if (doublePoll) {
BuildPollCommand(command_addr, &signal_addr[0], 0);
command_addr += poll_command_size_;
bytes_written_[wrapped_index] = prior_bytes;
wrapped_index += poll_command_size_;
}
}
}