Remove the thread local begin_timestamp stack
Using a thread_local object is problematic as the thread local
destructors are called first before any global destructor, making
the object invalid while tearing down the process.
rocblas uses a global destructor to clean up the loaded HIP modules
and ends up calling hip_executable_destroy after the timestamp stack
is destructed. As a result the begin timestamp for that API function
is 0.
The solution is to store the phase_enter timestamp in the phase_data.
Change-Id: If143f4d123dfb111c72fb20365431d07e73fc570
[ROCm/roctracer commit: 8a575d8d6e]
This commit is contained in:
@@ -401,6 +401,7 @@ class API_DescrParser:
|
||||
content += ' } ' + call + ';\n'
|
||||
else:
|
||||
content += ' } args;\n'
|
||||
content += ' uint64_t *phase_data;\n'
|
||||
content += '};\n'
|
||||
return content
|
||||
|
||||
|
||||
@@ -325,6 +325,7 @@ template <activity_domain_t domain> struct ApiTracer {
|
||||
if (auto user_callback = callback_table.Get(operation_id)) {
|
||||
assert(trace_data != nullptr);
|
||||
trace_data->api_data.phase = ACTIVITY_API_PHASE_ENTER;
|
||||
trace_data->api_data.phase_data = &trace_data->phase_data;
|
||||
user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second);
|
||||
trace_data->phase_exit = Exit_UserCallback;
|
||||
} else {
|
||||
|
||||
@@ -67,43 +67,6 @@ TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
namespace {
|
||||
|
||||
// A stack that can be used for TLS variables. TLS destructors are invoked before global destructors
|
||||
// which is a problem if operations invoked by global destructors use TLS variables. If the TLS
|
||||
// stack is destructed, it still has well defined behavior by always returning a dummy element.
|
||||
template <typename T> class Stack : std::stack<T, std::vector<T>> {
|
||||
using parent_type = typename std::stack<T, std::vector<T>>;
|
||||
|
||||
public:
|
||||
Stack() { valid_.store(true, std::memory_order_relaxed); }
|
||||
~Stack() { valid_.store(false, std::memory_order_relaxed); }
|
||||
|
||||
template <class... Args> auto& emplace(Args&&... args) {
|
||||
return is_valid() ? parent_type::emplace(std::forward<Args>(args)...)
|
||||
: dummy_element_ = T(std::forward<Args>(args)...);
|
||||
}
|
||||
void push(const T& v) {
|
||||
if (is_valid()) parent_type::push(v);
|
||||
}
|
||||
void push(T&& v) {
|
||||
if (is_valid()) parent_type::push(std::move(v));
|
||||
}
|
||||
void pop() {
|
||||
if (is_valid()) parent_type::pop();
|
||||
}
|
||||
const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; }
|
||||
auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); }
|
||||
|
||||
bool is_valid() const { return valid_.load(std::memory_order_relaxed); }
|
||||
size_t size() const { return is_valid() ? parent_type::size() : 0; }
|
||||
bool empty() const { return size() == 0; }
|
||||
|
||||
private:
|
||||
std::atomic<bool> valid_{false};
|
||||
T dummy_element_; // Dummy element used when the stack is not valid.
|
||||
};
|
||||
|
||||
thread_local Stack<roctracer_timestamp_t> begin_timestamp_stack;
|
||||
|
||||
inline roctracer_timestamp_t timestamp_ns() {
|
||||
roctracer_timestamp_t timestamp;
|
||||
CHECK_ROCTRACER(roctracer_get_timestamp(×tamp));
|
||||
@@ -304,14 +267,14 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
(void)arg;
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
begin_timestamp_stack.push(timestamp_ns());
|
||||
*data->phase_data = timestamp_ns();
|
||||
} else {
|
||||
const roctracer_timestamp_t begin_timestamp = *data->phase_data;
|
||||
const roctracer_timestamp_t end_timestamp =
|
||||
(cid == HSA_API_ID_hsa_shut_down) ? begin_timestamp_stack.top() : timestamp_ns();
|
||||
hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace(
|
||||
cid, begin_timestamp_stack.top(), end_timestamp, GetPid(), GetTid(), *data);
|
||||
(cid == HSA_API_ID_hsa_shut_down) ? begin_timestamp : timestamp_ns();
|
||||
hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace(cid, begin_timestamp, end_timestamp,
|
||||
GetPid(), GetTid(), *data);
|
||||
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
begin_timestamp_stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -442,16 +405,15 @@ void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
std::optional<std::string> kernel_name;
|
||||
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
begin_timestamp_stack.push(timestamp);
|
||||
*data->phase_data = timestamp;
|
||||
} else {
|
||||
// Post init of HIP APU args
|
||||
hipApiArgsInit((hip_api_id_t)cid, const_cast<hip_api_data_t*>(data));
|
||||
kernel_name = getKernelName(cid, data);
|
||||
hip_api_trace_entry_t& entry =
|
||||
hip_api_trace_buffer.Emplace(cid, begin_timestamp_stack.top(), timestamp, GetPid(),
|
||||
GetTid(), *data, kernel_name ? kernel_name->c_str() : nullptr);
|
||||
hip_api_trace_buffer.Emplace(cid, *data->phase_data, timestamp, GetPid(), GetTid(), *data,
|
||||
kernel_name ? kernel_name->c_str() : nullptr);
|
||||
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
begin_timestamp_stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user