SWDEV-272496 - Wait on CPU before switching to GPU wait

GPU waits have noticeable overheads on compute with extra
AQL barrier packet and on SDMA with power saving features. This
change introduces a wait on CPU for 30 us in case the app has tiny
operations.

Change-Id: I761ba3af595f3f48544980058a9077dda15aa5f9


[ROCm/clr commit: ac387f9b03]
This commit is contained in:
German Andryeyev
2021-02-17 14:48:08 -05:00
کامیت شده توسط Saleel Kudchadker
والد 98c59bae8b
کامیت 33c1e3d14d
2فایلهای تغییر یافته به همراه29 افزوده شده و 15 حذف شده
@@ -388,11 +388,15 @@ hsa_signal_t* VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) {
// Early signal status check
if (hsa_signal_load_relaxed(prof_signal->signal_) > 0) {
const Settings& settings = gpu_.dev().settings();
// Wait on CPU if requested
if (settings.cpu_wait_for_signal_) {
CpuWaitForSignal(prof_signal);
} else {
return &prof_signal->signal_;
// Actively wait on CPU for 30 us to avoid extra overheads of signal tracking on GPU
if (!WaitForSignal<kTimeout30us>(prof_signal->signal_)) {
if (settings.cpu_wait_for_signal_) {
// Wait on CPU for completion if requested
CpuWaitForSignal(prof_signal);
} else {
// Return HSA signal for tracking on GPU
return &prof_signal->signal_;
}
}
}
}
@@ -50,21 +50,31 @@ struct ProfilingSignal : public amd::HeapObject {
};
// Initial HSA signal value
constexpr hsa_signal_value_t kInitSignalValueOne = 1;
constexpr static hsa_signal_value_t kInitSignalValueOne = 1;
// Timeouts for HSA signal wait
constexpr static uint64_t kTimeout30us = 30000;
constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
template <uint64_t wait_time = 0>
inline bool WaitForSignal(hsa_signal_t signal) {
constexpr uint64_t Timeout30us = 30000;
constexpr uint64_t UnlimitedWait = std::numeric_limits<uint64_t>::max();
uint64_t timeout = (ROC_ACTIVE_WAIT) ? UnlimitedWait : Timeout30us;
// Active wait with a timeout
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
timeout, HSA_WAIT_STATE_ACTIVE) != 0) {
// Wait until the completion with CPU suspend
if (wait_time != 0) {
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
UnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) {
wait_time, HSA_WAIT_STATE_ACTIVE) != 0) {
return false;
}
} else {
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout30us;
// Active wait with a timeout
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
timeout, HSA_WAIT_STATE_ACTIVE) != 0) {
// Wait until the completion with CPU suspend
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) {
return false;
}
}
}
return true;
}