diff --git a/rocclr/platform/command.cpp b/rocclr/platform/command.cpp index 522f95cd03..fb98c982a8 100644 --- a/rocclr/platform/command.cpp +++ b/rocclr/platform/command.cpp @@ -85,6 +85,9 @@ uint64_t Event::recordProfilingInfo(int32_t status, uint64_t timeStamp) { return timeStamp; } +// Global epoch time since the first processed command +uint64_t epoch = 0; + bool Event::setStatus(int32_t status, uint64_t timeStamp) { assert(status <= CL_QUEUED && "invalid status"); @@ -96,6 +99,9 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { if (profilingInfo().enabled_) { timeStamp = recordProfilingInfo(status, timeStamp); + if (epoch == 0) { + epoch = profilingInfo().queued_; + } } if (!make_atomic(status_).compareAndSet(currentStatus, status)) { @@ -112,8 +118,6 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { } if (status <= CL_COMPLETE) { - ClPrint(LOG_DEBUG, LOG_CMD, "command %p complete", &command()); - // Before we notify the waiters that this event reached the CL_COMPLETE // status, we release all the resources associated with this instance. releaseResources(); @@ -123,6 +127,13 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { if (referenceCount() > 1) { signal(); } + + ClPrint(LOG_DEBUG, LOG_CMD, "command %p complete (Wall: %ld, CPU: %ld, GPU: %ld us)", + &command(), + ((profilingInfo().end_ - epoch) / 1000), + ((profilingInfo().submitted_ - profilingInfo().queued_) / 1000), + ((profilingInfo().end_ - profilingInfo().start_) / 1000)); + release(); } diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index 6a43cc7cd4..63252fb455 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -86,15 +86,24 @@ bool HostQueue::terminate() { } void HostQueue::finish() { - // Send a finish to make sure we finished all commands - Command* command = new Marker(*this, false); - if (command == NULL) { - return; + Command* command = nullptr; + if (IS_HIP) { + command = getLastQueuedCommand(false); + if (nullptr != command) { + command->awaitCompletion(); + } + } + if (nullptr == command) { + // Send a finish to make sure we finished all commands + command = new Marker(*this, false); + if (command == NULL) { + return; + } + ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued"); + command->enqueue(); + command->awaitCompletion(); + command->release(); } - ClPrint(LOG_DEBUG, LOG_CMD, "marker is queued"); - command->enqueue(); - command->awaitCompletion(); - command->release(); ClPrint(LOG_DEBUG, LOG_CMD, "All commands finished"); } diff --git a/rocclr/utils/debug.hpp b/rocclr/utils/debug.hpp index 6672c2e6df..eacb0bbc68 100644 --- a/rocclr/utils/debug.hpp +++ b/rocclr/utils/debug.hpp @@ -47,6 +47,7 @@ enum LogMask { LOG_AQL2 = 0x00002000, //!< Show raw bytes of AQL packet LOG_CODE = 0x00004000, //!< Show code creation debug LOG_CMD2 = 0x00008000, //!< More detailed command info, including barrier commands + LOG_LOCATION = 0x00010000, //!< Log message location LOG_ALWAYS = 0xFFFFFFFF, //!< Log always even mask flag is zero }; @@ -178,7 +179,11 @@ inline void warning(const char* msg) { amd::report_warning(msg); } do { \ if (LOG_LEVEL >= level) { \ if (GPU_LOG_MASK & mask || mask == amd::LOG_ALWAYS) { \ - amd::log_printf(level, __FILE__, __LINE__, format, ##__VA_ARGS__); \ + if (GPU_LOG_MASK & amd::LOG_LOCATION) { \ + amd::log_printf(level, __FILE__, __LINE__, format, ##__VA_ARGS__); \ + } else { \ + amd::log_printf(level, "", 0, format, ##__VA_ARGS__); \ + } \ } \ } \ } while (false)