Use a global correlation_id for all records
Change-Id: I87fe16fefb52a95242bc64b7007b71c9d8978d44
[ROCm/roctracer commit: ac3214d32a]
This commit is contained in:
@@ -341,9 +341,6 @@ class API_DescrParser:
|
||||
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
|
||||
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
|
||||
|
||||
self.cpp_content += 'std::atomic<uint64_t> hsa_counter_{1};\n'
|
||||
self.cpp_content += 'static thread_local uint64_t hsa_correlation_id_tls = 0;\n'
|
||||
|
||||
self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks)
|
||||
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
|
||||
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
|
||||
@@ -429,17 +426,21 @@ class API_DescrParser:
|
||||
if call == 'hsa_amd_memory_async_copy_rect' and var == 'range':
|
||||
content += ' api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n'
|
||||
content += ' auto [ api_callback_fun, api_callback_arg ] = cb_table.Get(' + call_id + ');\n'
|
||||
content += ' api_data.phase = 0;\n'
|
||||
content += ' api_data.correlation_id = hsa_support::hsa_counter_.fetch_add(1, std::memory_order_relaxed);\n'
|
||||
content += ' hsa_correlation_id_tls = api_data.correlation_id;\n'
|
||||
content += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
|
||||
content += ' if (api_callback_fun) {\n'
|
||||
content += ' api_data.phase = ACTIVITY_API_PHASE_ENTER;\n'
|
||||
content += ' api_data.correlation_id = CorrelationIdPush();\n'
|
||||
content += ' api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
|
||||
content += ' }\n'
|
||||
if ret_type != 'void':
|
||||
content += ' ' + ret_type + ' ret ='
|
||||
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
|
||||
content += ' if (api_callback_fun) {\n'
|
||||
if ret_type != 'void':
|
||||
content += ' api_data.' + ret_type + '_retval = ret;\n'
|
||||
content += ' api_data.phase = 1;\n'
|
||||
content += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
|
||||
content += ' api_data.' + ret_type + '_retval = ret;\n'
|
||||
content += ' api_data.phase = ACTIVITY_API_PHASE_EXIT;\n'
|
||||
content += ' api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
|
||||
content += ' CorrelationIdPop();\n'
|
||||
content += ' }\n'
|
||||
if ret_type != 'void':
|
||||
content += ' return ret;\n'
|
||||
content += '}\n'
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "correlation_id.h"
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
// A stack that can be used for TLS variables. TLS destructors are invoked before global destructors
|
||||
// which is a problem if operations invoked by global destructors use TLS variables. If the TLS
|
||||
// stack is destructed, it still has well defined behavior by always returning a dummy element.
|
||||
template <typename T> class Stack : std::stack<T, std::vector<T>> {
|
||||
using parent_type = typename std::stack<T, std::vector<T>>;
|
||||
|
||||
public:
|
||||
Stack() { valid_.store(true, std::memory_order_relaxed); }
|
||||
~Stack() { valid_.store(false, std::memory_order_relaxed); }
|
||||
|
||||
template <class... Args> auto& emplace(Args&&... args) {
|
||||
return is_valid() ? parent_type::emplace(std::forward<Args>(args)...)
|
||||
: *new (&dummy_element_) T(std::forward<Args>(args)...);
|
||||
}
|
||||
void push(const T& v) {
|
||||
if (is_valid()) parent_type::push(v);
|
||||
}
|
||||
void push(T&& v) {
|
||||
if (is_valid()) parent_type::push(std::move(v));
|
||||
}
|
||||
void pop() {
|
||||
if (is_valid()) parent_type::pop();
|
||||
}
|
||||
const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; }
|
||||
auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); }
|
||||
|
||||
bool is_valid() const { return valid_.load(std::memory_order_relaxed); }
|
||||
size_t size() const { return is_valid() ? parent_type::size() : 0; }
|
||||
bool empty() const { return size() == 0; }
|
||||
|
||||
private:
|
||||
std::atomic<bool> valid_{false};
|
||||
T dummy_element_; // Dummy element used when the stack is not valid.
|
||||
};
|
||||
|
||||
thread_local Stack<activity_correlation_id_t> correlation_id_stack{};
|
||||
thread_local Stack<activity_correlation_id_t> external_id_stack{};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
activity_correlation_id_t CorrelationIdPush() {
|
||||
static std::atomic<uint64_t> counter{1};
|
||||
return correlation_id_stack.emplace(counter.fetch_add(1, std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
void CorrelationIdPop() { correlation_id_stack.pop(); }
|
||||
|
||||
activity_correlation_id_t CorrelationId() {
|
||||
return correlation_id_stack.empty() ? 0 : correlation_id_stack.top();
|
||||
}
|
||||
|
||||
void ExternalCorrelationIdPush(activity_correlation_id_t external_id) {
|
||||
external_id_stack.push(external_id);
|
||||
}
|
||||
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop() {
|
||||
if (external_id_stack.empty()) return std::nullopt;
|
||||
|
||||
auto external_id = external_id_stack.top();
|
||||
external_id_stack.pop();
|
||||
return std::make_optional(external_id);
|
||||
}
|
||||
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationId() {
|
||||
return external_id_stack.empty() ? std::nullopt : std::make_optional(external_id_stack.top());
|
||||
}
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
// Start a new correlation ID region and push it onto the thread local stack. Correlation ID
|
||||
// regions are nested and per-thread.
|
||||
activity_correlation_id_t CorrelationIdPush();
|
||||
|
||||
// Stop the current correlation ID region and pop it from the thread local stack.
|
||||
void CorrelationIdPop();
|
||||
|
||||
// Return the ID currently active correlation ID region, or 0 if no regin is active.
|
||||
activity_correlation_id_t CorrelationId();
|
||||
|
||||
// Start a new external correlation ID region for the given \p external_id. As for the internal
|
||||
// correlation ID regions, external correlation ID regions are nested and per-thread.
|
||||
void ExternalCorrelationIdPush(activity_correlation_id_t external_id);
|
||||
|
||||
// Stop the current external correlation ID region and return the external_id used to start the
|
||||
// region. Return a nullopt if no region was active.
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop();
|
||||
|
||||
// Return the current external correlation ID or nullopt is no region is active.
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationId();
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -38,7 +38,10 @@ class MemoryPool {
|
||||
// Pool definition: The memory pool is split in 2 buffers of equal size. When first initialized,
|
||||
// the write pointer points to the first element of the first buffer. When a buffer is full, or
|
||||
// when Flush() is called, the write pointer moves to the other buffer.
|
||||
const size_t allocation_size = 2 * properties_.buffer_size;
|
||||
// Each buffer should be large enough to hold at least 2 activity records, as record pairs may
|
||||
// be written when external correlation ids are used.
|
||||
const size_t allocation_size =
|
||||
2 * std::max(2 * sizeof(roctracer_record_t), properties_.buffer_size);
|
||||
pool_begin_ = nullptr;
|
||||
AllocateMemory(&pool_begin_, allocation_size);
|
||||
assert(pool_begin_ != nullptr && "pool allocator failed");
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "correlation_id.h"
|
||||
#include "journal.h"
|
||||
#include "loader.h"
|
||||
#include "memory_pool.h"
|
||||
@@ -161,16 +162,6 @@ roctracer_status_t GetExcStatus(const std::exception& e) {
|
||||
return (roctracer_exc_ptr) ? roctracer_exc_ptr->status() : ROCTRACER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
static auto NextCorrelationId() {
|
||||
static std::atomic<uint64_t> counter{1};
|
||||
return counter.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Correlation id storage
|
||||
static thread_local activity_correlation_id_t correlation_id_tls = 0;
|
||||
|
||||
static thread_local std::stack<activity_correlation_id_t> external_id_stack;
|
||||
|
||||
std::mutex hip_activity_mutex;
|
||||
|
||||
enum { API_CB_MASK = 0x1, API_ACT_MASK = 0x2 };
|
||||
@@ -192,13 +183,9 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_
|
||||
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
// Generate a new correlation ID.
|
||||
uint64_t correlation_id = NextCorrelationId();
|
||||
uint64_t correlation_id = CorrelationIdPush();
|
||||
data->correlation_id = correlation_id;
|
||||
|
||||
// Record the correlation ID in a TLS variable so that it can be passed
|
||||
// to an asynchronous activity started before the API function returns.
|
||||
correlation_id_tls = correlation_id;
|
||||
|
||||
if (pool != nullptr) {
|
||||
// Filing record info
|
||||
record->domain = ACTIVITY_DOMAIN_HIP_API;
|
||||
@@ -211,21 +198,22 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_
|
||||
}
|
||||
} else {
|
||||
if (pool != nullptr) {
|
||||
if (!external_id_stack.empty()) {
|
||||
record->end_ns = util::timestamp_ns();
|
||||
|
||||
if (auto external_id = ExternalCorrelationId()) {
|
||||
roctracer_record_t ext_record{};
|
||||
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
|
||||
ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID;
|
||||
ext_record.correlation_id = record->correlation_id;
|
||||
ext_record.external_id = external_id_stack.top();
|
||||
pool->Write(ext_record);
|
||||
ext_record.external_id = *external_id;
|
||||
// Write the external correlation id record directly followed by the activity record.
|
||||
pool->Write(std::array<roctracer_record_t, 2>{ext_record, *record});
|
||||
} else {
|
||||
// Write record to the buffer.
|
||||
pool->Write(*record);
|
||||
}
|
||||
|
||||
// Write record to the buffer
|
||||
record->end_ns = util::timestamp_ns();
|
||||
pool->Write(*record);
|
||||
}
|
||||
// Clear correlation ID
|
||||
correlation_id_tls = 0;
|
||||
CorrelationIdPop();
|
||||
}
|
||||
|
||||
DEBUG_TRACE(
|
||||
@@ -291,7 +279,7 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = hsa_async_copy_handler;
|
||||
entry->pool = async_copy_callback_memory_pool;
|
||||
entry->correlation_id = hsa_correlation_id_tls;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
@@ -315,7 +303,7 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = hsa_async_copy_handler;
|
||||
entry->pool = async_copy_callback_memory_pool;
|
||||
entry->correlation_id = hsa_correlation_id_tls;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
@@ -897,22 +885,24 @@ ROCTRACER_API roctracer_status_t roctracer_flush_activity() {
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) {
|
||||
API_METHOD_PREFIX
|
||||
external_id_stack.push(id);
|
||||
ExternalCorrelationIdPush(id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Notifies that the calling thread is leaving an external API region.
|
||||
// Pop an external correlation id for the calling thread.
|
||||
// 'lastId' returns the last external correlation
|
||||
// Pop an external correlation id for the calling thread, and return it in 'last_id' if not null.
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) {
|
||||
API_METHOD_PREFIX
|
||||
if (last_id != nullptr) *last_id = 0;
|
||||
if (external_id_stack.empty())
|
||||
|
||||
auto external_id = ExternalCorrelationIdPop();
|
||||
if (!external_id) {
|
||||
if (last_id != nullptr) *last_id = 0;
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID,
|
||||
"not matching external range pop");
|
||||
if (last_id != nullptr) *last_id = external_id_stack.top();
|
||||
external_id_stack.pop();
|
||||
"unbalanced external correlation id pop");
|
||||
}
|
||||
|
||||
if (last_id != nullptr) *last_id = *external_id;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user