From 7f47eb9d0229bf3bf3d95bf21497c4c79f0d916a Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Tue, 26 Jul 2022 16:04:20 -0700 Subject: [PATCH] Add TraceBuffer entry construction/destruction Change-Id: I354f36b0d7a0baea0efb75d5e81f169b5f969542 [ROCm/roctracer commit: 2513f9f51fef648e0ef445824d3d280420d35bba] --- .../roctracer/src/tracer_tool/trace_buffer.h | 70 +++++---- .../roctracer/src/tracer_tool/tracer_tool.cpp | 137 +++++++++--------- .../roctracer/test/directed/trace_buffer.cpp | 4 +- 3 files changed, 111 insertions(+), 100 deletions(-) diff --git a/projects/roctracer/src/tracer_tool/trace_buffer.h b/projects/roctracer/src/tracer_tool/trace_buffer.h index 6f5ffef712..04abae0771 100644 --- a/projects/roctracer/src/tracer_tool/trace_buffer.h +++ b/projects/roctracer/src/tracer_tool/trace_buffer.h @@ -145,6 +145,44 @@ class TraceBuffer : protected TraceBufferBase { } } + // Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are + // monotonically increasing indices, with read_pointer % size always indexing inside the first + // buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with + // the next invocation after changing its state to 'complete'. + void Flush() override { + std::lock_guard lock(write_mutex_); + auto write_index = write_index_.load(std::memory_order_relaxed); + + for (auto it = buffer_list_.begin(); it != buffer_list_.end();) { + auto end_of_buffer = read_index_ - read_index_ % size_ + size_; + + while (read_index_ < std::min(write_index.index, end_of_buffer)) { + Entry* entry = &(*it)[read_index_ % size_]; + + // The entry is not yet complete, stop flushing here. + if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return; + + flush_callback_(entry); + entry->~Entry(); + + ++read_index_; + } + + // The buffer is still in use or the read pointer did not reach the end of the buffer. + if (*it == write_index.buffer || read_index_ != end_of_buffer) return; + + // All entries in the current buffer are now processed. Destroy the buffer and move onto the + // next buffer in the list. + allocator_.deallocate(*it, size_); + it = buffer_list_.erase(it); + } + } + + template Entry& Emplace(Args... args) { + return *new (GetEntry()) Entry(std::forward(args)...); + } + + private: Entry* GetEntry() { auto current = write_index_.load(std::memory_order_relaxed); @@ -194,38 +232,6 @@ class TraceBuffer : protected TraceBufferBase { } } - // Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are - // monotonically increasing indices, with read_pointer % size always indexing inside the first - // buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with - // the next invocation after changing its state to 'complete'. - void Flush() override { - std::lock_guard lock(write_mutex_); - auto write_index = write_index_.load(std::memory_order_relaxed); - - for (auto it = buffer_list_.begin(); it != buffer_list_.end();) { - auto end_of_buffer = read_index_ - read_index_ % size_ + size_; - - while (read_index_ < std::min(write_index.index, end_of_buffer)) { - Entry* entry = &(*it)[read_index_ % size_]; - - // The entry is not yet complete, stop flushing here. - if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return; - - flush_callback_(entry); - ++read_index_; - } - - // The buffer is still in use or the read pointer did not reach the end of the buffer. - if (*it == write_index.buffer || read_index_ != end_of_buffer) return; - - // All entries in the current buffer are now processed. Destroy the buffer and move onto the - // next buffer in the list. - allocator_.deallocate(*it, size_); - it = buffer_list_.erase(it); - } - } - - private: void AllocateFreeBuffer() { assert(free_buffer_ == nullptr); diff --git a/projects/roctracer/src/tracer_tool/tracer_tool.cpp b/projects/roctracer/src/tracer_tool/tracer_tool.cpp index cb749d3eea..35c4156739 100644 --- a/projects/roctracer/src/tracer_tool/tracer_tool.cpp +++ b/projects/roctracer/src/tracer_tool/tracer_tool.cpp @@ -238,6 +238,19 @@ struct roctx_trace_entry_t { uint32_t tid; roctx_range_id_t rid; const char* message; + + roctx_trace_entry_t(uint32_t cid_, roctracer_timestamp_t time_, uint32_t pid_, uint32_t tid_, + roctx_range_id_t rid_, const char* message_) + : valid(roctracer::TRACE_ENTRY_INIT), + cid(cid_), + time(time_), + pid(pid_), + tid(tid_), + rid(rid_), + message(message_ != nullptr ? strdup(message_) : nullptr) {} + ~roctx_trace_entry_t() { + if (message != nullptr) free(const_cast(message)); + } }; // rocTX buffer flush function @@ -261,16 +274,9 @@ void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data void* /* user_arg */) { const roctx_api_data_t* data = reinterpret_cast(callback_data); - roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); - entry->cid = cid; - entry->time = util::timestamp_ns(); - entry->pid = GetPid(); - entry->tid = GetTid(); - entry->rid = data->args.id; - entry->message = (data->args.message != NULL) - ? strdup(data->args.message) /* FIXME: Who frees the message? */ - : NULL; - entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); + roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace( + cid, util::timestamp_ns(), GetPid(), GetTid(), data->args.id, data->args.message); + entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } /////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -284,6 +290,17 @@ struct hsa_api_trace_entry_t { uint32_t pid; uint32_t tid; hsa_api_data_t data; + + hsa_api_trace_entry_t(uint32_t cid_, roctracer_timestamp_t begin_, roctracer_timestamp_t end_, + uint32_t pid_, uint32_t tid_, const hsa_api_data_t& data_) + : valid(roctracer::TRACE_ENTRY_INIT), + cid(cid_), + begin(begin_), + end(end_), + pid(pid_), + tid(tid_), + data(data_) {} + ~hsa_api_trace_entry_t() {} }; void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) { @@ -307,14 +324,9 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, } else { const roctracer_timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : util::timestamp_ns(); - hsa_api_trace_entry_t* entry = hsa_api_trace_buffer.GetEntry(); - entry->cid = cid; - entry->begin = hsa_begin_timestamp; - entry->end = end_timestamp; - entry->pid = GetPid(); - entry->tid = GetTid(); - entry->data = *data; - entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); + hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace( + cid, hsa_begin_timestamp, end_timestamp, GetPid(), GetTid(), *data); + entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } } @@ -323,7 +335,7 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, struct hip_api_trace_entry_t { std::atomic valid; - uint32_t domain; + activity_domain_t domain; uint32_t cid; roctracer_timestamp_t begin; roctracer_timestamp_t end; @@ -332,6 +344,24 @@ struct hip_api_trace_entry_t { hip_api_data_t data; const char* name; void* ptr; + + hip_api_trace_entry_t(activity_domain_t domain_, uint32_t cid_, roctracer_timestamp_t begin_, + roctracer_timestamp_t end_, uint32_t pid_, uint32_t tid_, + const hip_api_data_t& data_, const char* name_, void* ptr_) + : valid(roctracer::TRACE_ENTRY_INIT), + domain(domain_), + cid(cid_), + begin(begin_), + end(end_), + pid(pid_), + tid(tid_), + data(data_), + name(name_ != nullptr ? strdup(name_) : nullptr), + ptr(ptr_) {} + + ~hip_api_trace_entry_t() { + if (name != nullptr) free(const_cast(name)); + } }; static inline bool is_hip_kernel_launch_api(const uint32_t& cid) { @@ -395,7 +425,6 @@ void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, (void)arg; const hip_api_data_t* data = reinterpret_cast(callback_data); const roctracer_timestamp_t timestamp = util::timestamp_ns(); - hip_api_trace_entry_t* entry = NULL; if (data->phase == ACTIVITY_API_PHASE_ENTER) { hip_begin_timestamp = timestamp; @@ -403,68 +432,58 @@ void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, // Post init of HIP APU args hipApiArgsInit((hip_api_id_t)cid, const_cast(data)); - entry = hip_api_trace_buffer.GetEntry(); - entry->cid = cid; - entry->domain = domain; - entry->begin = hip_begin_timestamp; - entry->end = timestamp; - entry->pid = GetPid(); - entry->tid = GetTid(); - entry->data = *data; - entry->name = NULL; - entry->ptr = NULL; + std::string kernel_name; - if (cid == HIP_API_ID_hipMalloc) { - entry->ptr = *(data->args.hipMalloc.ptr); - } else if (is_hip_kernel_launch_api(cid)) { + if (is_hip_kernel_launch_api(cid)) { switch (cid) { case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: { const hipLaunchParams* listKernels = data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList; - std::string name_str = ""; for (int i = 0; i < data->args.hipLaunchCooperativeKernelMultiDevice.numDevices; ++i) { + std::stringstream ss; const hipLaunchParams& lp = listKernels[i]; - if (lp.func != NULL) { - const char* kernel_name = - roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream); - const int device_id = roctracer::HipLoader::Instance().GetStreamDeviceId(lp.stream); - name_str += std::string(kernel_name) + ":" + std::to_string(device_id) + ";"; + if (lp.func != nullptr) { + ss << roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream) << ":" + << roctracer::HipLoader::Instance().GetStreamDeviceId(lp.stream) << ";"; } + kernel_name = ss.str(); } - entry->name = strdup(name_str.c_str()); break; } case HIP_API_ID_hipLaunchKernel: case HIP_API_ID_hipLaunchCooperativeKernel: { const void* f = data->args.hipLaunchKernel.function_address; hipStream_t stream = data->args.hipLaunchKernel.stream; - if (f != NULL) - entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream)); + if (f != nullptr) + kernel_name = roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream); break; } case HIP_API_ID_hipExtLaunchKernel: { const void* f = data->args.hipExtLaunchKernel.function_address; hipStream_t stream = data->args.hipExtLaunchKernel.stream; - if (f != NULL) - entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream)); + if (f != nullptr) + kernel_name = roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream); break; } default: { const hipFunction_t f = data->args.hipModuleLaunchKernel.f; - if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f)); + if (f != nullptr) kernel_name = roctracer::HipLoader::Instance().KernelNameRef(f); } } } - - entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); + hip_api_trace_entry_t& entry = hip_api_trace_buffer.Emplace( + static_cast(domain), cid, hip_begin_timestamp, timestamp, GetPid(), + GetTid(), *data, kernel_name.c_str(), + cid == HIP_API_ID_hipMalloc ? data->args.hipMalloc.ptr : nullptr); + entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } DEBUG_TRACE( "hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") " "correlation_id(%lu) timestamp(%lu)\n", roctracer_op_string(domain, cid, 0), data->phase, cid, data, entry, - (entry) ? entry->name : NULL, data->correlation_id, timestamp); + (entry.name != nullptr) ? entry.name : "", data->correlation_id, timestamp); } void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { @@ -472,29 +491,15 @@ void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, const char* name = reinterpret_cast(callback_data); const roctracer_timestamp_t timestamp = util::timestamp_ns(); - hip_api_trace_entry_t* entry = hip_api_trace_buffer.GetEntry(); - entry->cid = 0; - entry->domain = domain; - entry->begin = timestamp; - entry->end = timestamp + 1; - entry->pid = GetPid(); - entry->tid = GetTid(); - entry->data = {}; - entry->name = strdup(name); - entry->ptr = NULL; - entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); + hip_api_trace_entry_t& entry = hip_api_trace_buffer.Emplace( + static_cast(domain), cid, timestamp, timestamp + 1, GetPid(), GetTid(), + hip_api_data_t{}, name, nullptr); + entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } /////////////////////////////////////////////////////////////////////////////////////////////////////// // HSA API tracing -struct hip_act_trace_entry_t { - std::atomic valid; - uint32_t kind; - roctracer_timestamp_t dur; - uint64_t correlation_id; -}; - // Activity tracing callback // hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) void pool_activity_callback(const char* begin, const char* end, void* arg) { diff --git a/projects/roctracer/test/directed/trace_buffer.cpp b/projects/roctracer/test/directed/trace_buffer.cpp index 4adbdb72f5..3c48a46468 100644 --- a/projects/roctracer/test/directed/trace_buffer.cpp +++ b/projects/roctracer/test/directed/trace_buffer.cpp @@ -62,8 +62,8 @@ int main() { for (auto&& thread : threads) { thread = std::thread([&trace_buffer]() { for (std::size_t j = 0; j < num_iterations; ++j) { - auto* entry = trace_buffer.GetEntry(); - entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); + auto& entry = trace_buffer.Emplace(); + entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); } }); }