Allow default kernel to spin freely at first.

Impacts GPU_ONLY signal type latency when waiting for small operations.
Using this type improves total SDMA small copy performance by ~40% if
the signal is allowed to spin freely.

Change-Id: I27aa128c63a1bacb3f51fb08f166e4e1d6fef651
This commit is contained in:
Sean Keely
2019-08-12 18:31:43 -05:00
rodzic ea8c99f452
commit 5adb73fffd
@@ -89,6 +89,10 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
timer::fast_clock::time_point start_time, time;
start_time = timer::fast_clock::now();
// Set a polling timeout value
// Should be a few times bigger than null kernel latency
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
uint64_t hsa_freq;
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
const timer::fast_clock::duration fast_timeout =
@@ -127,7 +131,9 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
return hsa_signal_value_t(value);
}
os::uSleep(20);
if (time - start_time > kMaxElapsed) {
os::uSleep(20);
}
}
}