SWDEV-278896 - Increase thresholds to match MT behavior

MT doesn't use GPU waits, but CPU for sync between engines.
Change the threshold values for CPU waits for direct dispatch.
That will bring behavior closer to MT.

Change-Id: Ia41c3cb812614962aff2746b6cf858f1bf77dda2


[ROCm/clr commit: ca2ea70a6c]
Этот коммит содержится в:
German Andryeyev
2021-04-16 17:47:57 -04:00
родитель c8c1ed4c13
Коммит bb636d4fca
2 изменённых файлов: 5 добавлений и 5 удалений
+2 -2
Просмотреть файл
@@ -394,8 +394,8 @@ hsa_signal_t* VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) {
// Early signal status check
if (hsa_signal_load_relaxed(prof_signal->signal_) > 0) {
const Settings& settings = gpu_.dev().settings();
// Actively wait on CPU for 50 us to avoid extra overheads of signal tracking on GPU
if (!WaitForSignal<kTimeout50us>(prof_signal->signal_)) {
// Actively wait on CPU for 750 us to avoid extra overheads of signal tracking on GPU
if (!WaitForSignal<kTimeout750us>(prof_signal->signal_)) {
if (settings.cpu_wait_for_signal_) {
// Wait on CPU for completion if requested
CpuWaitForSignal(prof_signal);
+3 -3
Просмотреть файл
@@ -52,8 +52,8 @@ struct ProfilingSignal : public amd::HeapObject {
constexpr static hsa_signal_value_t kInitSignalValueOne = 1;
// Timeouts for HSA signal wait
constexpr static uint64_t kTimeout30us = 30000;
constexpr static uint64_t kTimeout50us = 50000;
constexpr static uint64_t kTimeout100us = 100000;
constexpr static uint64_t kTimeout750us = 750000;
constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
template <uint64_t wait_time = 0>
@@ -64,7 +64,7 @@ inline bool WaitForSignal(hsa_signal_t signal) {
return false;
}
} else {
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout30us;
uint64_t timeout = (ROC_ACTIVE_WAIT) ? kUnlimitedWait : kTimeout100us;
// Active wait with a timeout
if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,