From 5438b6362e6ffb0864327d4005cb47980f4fbddc Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Thu, 5 Oct 2023 17:33:31 -0400 Subject: [PATCH] SWDEV-424603 - Force CPU wait if profiling Some pytorch tests use a tracer plugin and rely on profiling information to be reported right after hipDeviceSynchronize() Change-Id: Ib021a1e7b1a30b3c24de72627c471810f7f7878d --- rocclr/platform/commandqueue.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index b30b1873f4..ca66cdb7a8 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -124,6 +124,9 @@ void HostQueue::finish(bool cpu_wait) { // If command doesn't contain HW event and runtime didn't request CPU wait, // then force marker submit bool force_marker = false; + // Force CPU wait if profiler is enabled. Pytorch tests may use tracer's plugin and rely on + // profiling information to be available right after finish. + cpu_wait = activity_prof::IsEnabled(OP_ID_DISPATCH); if (AMD_DIRECT_DISPATCH && (command != nullptr) && !cpu_wait) { void* hw_event = (command->NotifyEvent() != nullptr) ? command->NotifyEvent()->HwEvent() : command->HwEvent();