SWDEV-272496 - Wait on CPU before switching to GPU wait
GPU waits have noticeable overheads on compute with extra
AQL barrier packet and on SDMA with power saving features. This
change introduces a wait on CPU for 30 us in case the app has tiny
operations.
Change-Id: I761ba3af595f3f48544980058a9077dda15aa5f9
[ROCm/clr commit: ac387f9b03]
This commit is contained in:
کامیت شده توسط
Saleel Kudchadker
والد
98c59bae8b
کامیت
33c1e3d14d
@@ -388,11 +388,15 @@ hsa_signal_t* VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) {
|
||||
// Early signal status check
|
||||
if (hsa_signal_load_relaxed(prof_signal->signal_) > 0) {
|
||||
const Settings& settings = gpu_.dev().settings();
|
||||
// Wait on CPU if requested
|
||||
if (settings.cpu_wait_for_signal_) {
|
||||
CpuWaitForSignal(prof_signal);
|
||||
} else {
|
||||
return &prof_signal->signal_;
|
||||
// Actively wait on CPU for 30 us to avoid extra overheads of signal tracking on GPU
|
||||
if (!WaitForSignal<kTimeout30us>(prof_signal->signal_)) {
|
||||
if (settings.cpu_wait_for_signal_) {
|
||||
// Wait on CPU for completion if requested
|
||||
CpuWaitForSignal(prof_signal);
|
||||
} else {
|
||||
// Return HSA signal for tracking on GPU
|
||||
return &prof_signal->signal_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,21 +50,31 @@ struct ProfilingSignal : public amd::HeapObject {
|
||||
};
|
||||
|
||||
// Initial HSA signal value
|
||||
constexpr hsa_signal_value_t kInitSignalValueOne = 1;
|
||||
constexpr static hsa_signal_value_t kInitSignalValueOne = 1;
|
||||
|
||||
// Timeouts for HSA signal wait
|
||||
constexpr static uint64_t kTimeout30us = 30000;
|
||||
constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
template <uint64_t wait_time = 0>
|
||||
inline bool WaitForSignal(hsa_signal_t signal) {
|
||||
constexpr uint64_t Timeout30us = 30000;
|
||||
constexpr uint64_t UnlimitedWait = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t timeout = (ROC_ACTIVE_WAIT) ? UnlimitedWait : Timeout30us;
|
||||
|
||||
// Active wait with a timeout
|
||||
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
|
||||
timeout, HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
// Wait until the completion with CPU suspend
|
||||
if (wait_time != 0) {
|
||||
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
|
||||
UnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
wait_time, HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout30us;
|
||||
|
||||
// Active wait with a timeout
|
||||
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
|
||||
timeout, HSA_WAIT_STATE_ACTIVE) != 0) {
|
||||
// Wait until the completion with CPU suspend
|
||||
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
|
||||
kUnlimitedWait, HSA_WAIT_STATE_BLOCKED) != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
مرجع در شماره جدید
Block a user