SWDEV-278896 - Increase thresholds to match MT behavior
MT doesn't use GPU waits, but CPU for sync between engines.
Change the threshold values for CPU waits for direct dispatch.
That will bring behavior closer to MT.
Change-Id: Ia41c3cb812614962aff2746b6cf858f1bf77dda2
[ROCm/clr commit: ca2ea70a6c]
此提交包含在:
@@ -394,8 +394,8 @@ hsa_signal_t* VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) {
|
||||
// Early signal status check
|
||||
if (hsa_signal_load_relaxed(prof_signal->signal_) > 0) {
|
||||
const Settings& settings = gpu_.dev().settings();
|
||||
// Actively wait on CPU for 50 us to avoid extra overheads of signal tracking on GPU
|
||||
if (!WaitForSignal<kTimeout50us>(prof_signal->signal_)) {
|
||||
// Actively wait on CPU for 750 us to avoid extra overheads of signal tracking on GPU
|
||||
if (!WaitForSignal<kTimeout750us>(prof_signal->signal_)) {
|
||||
if (settings.cpu_wait_for_signal_) {
|
||||
// Wait on CPU for completion if requested
|
||||
CpuWaitForSignal(prof_signal);
|
||||
|
||||
@@ -52,8 +52,8 @@ struct ProfilingSignal : public amd::HeapObject {
|
||||
constexpr static hsa_signal_value_t kInitSignalValueOne = 1;
|
||||
|
||||
// Timeouts for HSA signal wait
|
||||
constexpr static uint64_t kTimeout30us = 30000;
|
||||
constexpr static uint64_t kTimeout50us = 50000;
|
||||
constexpr static uint64_t kTimeout100us = 100000;
|
||||
constexpr static uint64_t kTimeout750us = 750000;
|
||||
constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
template <uint64_t wait_time = 0>
|
||||
@@ -64,7 +64,7 @@ inline bool WaitForSignal(hsa_signal_t signal) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout30us;
|
||||
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout100us;
|
||||
|
||||
// Active wait with a timeout
|
||||
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
|
||||
|
||||
新增問題並參考
封鎖使用者