SWDEV-278896 - Increase thresholds to match MT behavior

MT doesn't use GPU waits, but CPU for sync between engines.
Change the threshold values for CPU waits for direct dispatch.
That will bring behavior closer to MT.

Change-Id: Ia41c3cb812614962aff2746b6cf858f1bf77dda2


[ROCm/clr commit: ca2ea70a6c]
此提交包含在:
German Andryeyev
2021-04-16 17:47:57 -04:00
父節點 c8c1ed4c13
當前提交 bb636d4fca
共有 2 個檔案被更改,包括 5 行新增5 行删除
+2 -2
查看文件
@@ -394,8 +394,8 @@ hsa_signal_t* VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) {
// Early signal status check
if (hsa_signal_load_relaxed(prof_signal->signal_) > 0) {
const Settings& settings = gpu_.dev().settings();
// Actively wait on CPU for 50 us to avoid extra overheads of signal tracking on GPU
if (!WaitForSignal<kTimeout50us>(prof_signal->signal_)) {
// Actively wait on CPU for 750 us to avoid extra overheads of signal tracking on GPU
if (!WaitForSignal<kTimeout750us>(prof_signal->signal_)) {
if (settings.cpu_wait_for_signal_) {
// Wait on CPU for completion if requested
CpuWaitForSignal(prof_signal);
+3 -3
查看文件
@@ -52,8 +52,8 @@ struct ProfilingSignal : public amd::HeapObject {
constexpr static hsa_signal_value_t kInitSignalValueOne = 1;
// Timeouts for HSA signal wait
constexpr static uint64_t kTimeout30us = 30000;
constexpr static uint64_t kTimeout50us = 50000;
constexpr static uint64_t kTimeout100us = 100000;
constexpr static uint64_t kTimeout750us = 750000;
constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
template <uint64_t wait_time = 0>
@@ -64,7 +64,7 @@ inline bool WaitForSignal(hsa_signal_t signal) {
return false;
}
} else {
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout30us;
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout100us;
// Active wait with a timeout
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,