diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index b30b1873f4..ca66cdb7a8 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -124,6 +124,9 @@ void HostQueue::finish(bool cpu_wait) { // If command doesn't contain HW event and runtime didn't request CPU wait, // then force marker submit bool force_marker = false; + // Force CPU wait if profiler is enabled. Pytorch tests may use tracer's plugin and rely on + // profiling information to be available right after finish. + cpu_wait = activity_prof::IsEnabled(OP_ID_DISPATCH); if (AMD_DIRECT_DISPATCH && (command != nullptr) && !cpu_wait) { void* hw_event = (command->NotifyEvent() != nullptr) ? command->NotifyEvent()->HwEvent() : command->HwEvent();