From b664937ebd6e0b89e775bc2393c370ac4b3259df Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Mon, 26 Sep 2022 09:35:03 -0700 Subject: [PATCH] Use the "safe" Stack for begin_timestamp The tracer tool needs to remember the begin timestamps for API callbacks, and uses a thread_local std::stack for that purpose. The issue with thread_local objects is that they are destructed before anything else when the main thread exits. To work around that issue, we use a "safe" stack in the roctracer API. Use the same "safe" stack in the tracer tool. Change-Id: I0d69d4eb44f0205f4102d0d5ef9803a1ec1800a5 --- src/roctracer/correlation_id.cpp | 2 +- src/tracer_tool/tracer_tool.cpp | 54 ++++++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/roctracer/correlation_id.cpp b/src/roctracer/correlation_id.cpp index 50c9e10908..a76cce1f5e 100644 --- a/src/roctracer/correlation_id.cpp +++ b/src/roctracer/correlation_id.cpp @@ -39,7 +39,7 @@ template class Stack : std::stack> { template auto& emplace(Args&&... args) { return is_valid() ? parent_type::emplace(std::forward(args)...) - : *new (&dummy_element_) T(std::forward(args)...); + : dummy_element_ = T(std::forward(args)...); } void push(const T& v) { if (is_valid()) parent_type::push(v); diff --git a/src/tracer_tool/tracer_tool.cpp b/src/tracer_tool/tracer_tool.cpp index db21e2cbc9..f942fef946 100644 --- a/src/tracer_tool/tracer_tool.cpp +++ b/src/tracer_tool/tracer_tool.cpp @@ -67,8 +67,42 @@ TRACE_BUFFER_INSTANTIATE(); namespace { -thread_local std::stack> - hsa_begin_timestamp, hip_begin_timestamp; +// A stack that can be used for TLS variables. TLS destructors are invoked before global destructors +// which is a problem if operations invoked by global destructors use TLS variables. If the TLS +// stack is destructed, it still has well defined behavior by always returning a dummy element. +template class Stack : std::stack> { + using parent_type = typename std::stack>; + + public: + Stack() { valid_.store(true, std::memory_order_relaxed); } + ~Stack() { valid_.store(false, std::memory_order_relaxed); } + + template auto& emplace(Args&&... args) { + return is_valid() ? parent_type::emplace(std::forward(args)...) + : dummy_element_ = T(std::forward(args)...); + } + void push(const T& v) { + if (is_valid()) parent_type::push(v); + } + void push(T&& v) { + if (is_valid()) parent_type::push(std::move(v)); + } + void pop() { + if (is_valid()) parent_type::pop(); + } + const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; } + auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); } + + bool is_valid() const { return valid_.load(std::memory_order_relaxed); } + size_t size() const { return is_valid() ? parent_type::size() : 0; } + bool empty() const { return size() == 0; } + + private: + std::atomic valid_{false}; + T dummy_element_; // Dummy element used when the stack is not valid. +}; + +thread_local Stack begin_timestamp_stack; inline roctracer_timestamp_t timestamp_ns() { roctracer_timestamp_t timestamp; @@ -270,14 +304,14 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, (void)arg; const hsa_api_data_t* data = reinterpret_cast(callback_data); if (data->phase == ACTIVITY_API_PHASE_ENTER) { - hsa_begin_timestamp.push(timestamp_ns()); + begin_timestamp_stack.push(timestamp_ns()); } else { const roctracer_timestamp_t end_timestamp = - (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp.top() : timestamp_ns(); + (cid == HSA_API_ID_hsa_shut_down) ? begin_timestamp_stack.top() : timestamp_ns(); hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace( - cid, hsa_begin_timestamp.top(), end_timestamp, GetPid(), GetTid(), *data); + cid, begin_timestamp_stack.top(), end_timestamp, GetPid(), GetTid(), *data); entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); - hsa_begin_timestamp.pop(); + begin_timestamp_stack.pop(); } } @@ -408,16 +442,16 @@ void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, std::optional kernel_name; if (data->phase == ACTIVITY_API_PHASE_ENTER) { - hip_begin_timestamp.push(timestamp); + begin_timestamp_stack.push(timestamp); } else { // Post init of HIP APU args hipApiArgsInit((hip_api_id_t)cid, const_cast(data)); kernel_name = getKernelName(cid, data); hip_api_trace_entry_t& entry = - hip_api_trace_buffer.Emplace(cid, hip_begin_timestamp.top(), timestamp, GetPid(), GetTid(), - *data, kernel_name ? kernel_name->c_str() : nullptr); + hip_api_trace_buffer.Emplace(cid, begin_timestamp_stack.top(), timestamp, GetPid(), + GetTid(), *data, kernel_name ? kernel_name->c_str() : nullptr); entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release); - hip_begin_timestamp.pop(); + begin_timestamp_stack.pop(); } }