Allow default kernel to spin freely at first.
Impacts GPU_ONLY signal type latency when waiting for small operations. Using this type improves total SDMA small copy performance by ~40% if the signal is allowed to spin freely. Change-Id: I27aa128c63a1bacb3f51fb08f166e4e1d6fef651
This commit is contained in:
@@ -89,6 +89,10 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
|
||||
timer::fast_clock::time_point start_time, time;
|
||||
start_time = timer::fast_clock::now();
|
||||
|
||||
// Set a polling timeout value
|
||||
// Should be a few times bigger than null kernel latency
|
||||
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
|
||||
|
||||
uint64_t hsa_freq;
|
||||
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
|
||||
const timer::fast_clock::duration fast_timeout =
|
||||
@@ -127,7 +131,9 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
return hsa_signal_value_t(value);
|
||||
}
|
||||
os::uSleep(20);
|
||||
if (time - start_time > kMaxElapsed) {
|
||||
os::uSleep(20);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user