Use a global correlation_id for all records

Change-Id: I87fe16fefb52a95242bc64b7007b71c9d8978d44


[ROCm/roctracer commit: ac3214d32a]
This commit is contained in:
Laurent Morichetti
2022-08-18 20:55:54 -07:00
rodzic f7c8382e33
commit 0197ed4253
5 zmienionych plików z 188 dodań i 45 usunięć
+11 -10
Wyświetl plik
@@ -341,9 +341,6 @@ class API_DescrParser:
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
self.cpp_content += 'std::atomic<uint64_t> hsa_counter_{1};\n'
self.cpp_content += 'static thread_local uint64_t hsa_correlation_id_tls = 0;\n'
self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks)
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
@@ -429,17 +426,21 @@ class API_DescrParser:
if call == 'hsa_amd_memory_async_copy_rect' and var == 'range':
content += ' api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n'
content += ' auto [ api_callback_fun, api_callback_arg ] = cb_table.Get(' + call_id + ');\n'
content += ' api_data.phase = 0;\n'
content += ' api_data.correlation_id = hsa_support::hsa_counter_.fetch_add(1, std::memory_order_relaxed);\n'
content += ' hsa_correlation_id_tls = api_data.correlation_id;\n'
content += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
content += ' if (api_callback_fun) {\n'
content += ' api_data.phase = ACTIVITY_API_PHASE_ENTER;\n'
content += ' api_data.correlation_id = CorrelationIdPush();\n'
content += ' api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
content += ' }\n'
if ret_type != 'void':
content += ' ' + ret_type + ' ret ='
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
content += ' if (api_callback_fun) {\n'
if ret_type != 'void':
content += ' api_data.' + ret_type + '_retval = ret;\n'
content += ' api_data.phase = 1;\n'
content += ' if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
content += ' api_data.' + ret_type + '_retval = ret;\n'
content += ' api_data.phase = ACTIVITY_API_PHASE_EXIT;\n'
content += ' api_callback_fun(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &api_data, api_callback_arg);\n'
content += ' CorrelationIdPop();\n'
content += ' }\n'
if ret_type != 'void':
content += ' return ret;\n'
content += '}\n'
@@ -0,0 +1,99 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "correlation_id.h"
#include "roctracer.h"
#include <atomic>
#include <stack>
#include <vector>
namespace {
// A stack that can be used for TLS variables. TLS destructors are invoked before global destructors
// which is a problem if operations invoked by global destructors use TLS variables. If the TLS
// stack is destructed, it still has well defined behavior by always returning a dummy element.
template <typename T> class Stack : std::stack<T, std::vector<T>> {
using parent_type = typename std::stack<T, std::vector<T>>;
public:
Stack() { valid_.store(true, std::memory_order_relaxed); }
~Stack() { valid_.store(false, std::memory_order_relaxed); }
template <class... Args> auto& emplace(Args&&... args) {
return is_valid() ? parent_type::emplace(std::forward<Args>(args)...)
: *new (&dummy_element_) T(std::forward<Args>(args)...);
}
void push(const T& v) {
if (is_valid()) parent_type::push(v);
}
void push(T&& v) {
if (is_valid()) parent_type::push(std::move(v));
}
void pop() {
if (is_valid()) parent_type::pop();
}
const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; }
auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); }
bool is_valid() const { return valid_.load(std::memory_order_relaxed); }
size_t size() const { return is_valid() ? parent_type::size() : 0; }
bool empty() const { return size() == 0; }
private:
std::atomic<bool> valid_{false};
T dummy_element_; // Dummy element used when the stack is not valid.
};
thread_local Stack<activity_correlation_id_t> correlation_id_stack{};
thread_local Stack<activity_correlation_id_t> external_id_stack{};
} // namespace
namespace roctracer {
activity_correlation_id_t CorrelationIdPush() {
static std::atomic<uint64_t> counter{1};
return correlation_id_stack.emplace(counter.fetch_add(1, std::memory_order_relaxed));
}
void CorrelationIdPop() { correlation_id_stack.pop(); }
activity_correlation_id_t CorrelationId() {
return correlation_id_stack.empty() ? 0 : correlation_id_stack.top();
}
void ExternalCorrelationIdPush(activity_correlation_id_t external_id) {
external_id_stack.push(external_id);
}
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop() {
if (external_id_stack.empty()) return std::nullopt;
auto external_id = external_id_stack.top();
external_id_stack.pop();
return std::make_optional(external_id);
}
std::optional<activity_correlation_id_t> ExternalCorrelationId() {
return external_id_stack.empty() ? std::nullopt : std::make_optional(external_id_stack.top());
}
} // namespace roctracer
@@ -0,0 +1,50 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "roctracer.h"
#include <optional>
namespace roctracer {
// Start a new correlation ID region and push it onto the thread local stack. Correlation ID
// regions are nested and per-thread.
activity_correlation_id_t CorrelationIdPush();
// Stop the current correlation ID region and pop it from the thread local stack.
void CorrelationIdPop();
// Return the ID currently active correlation ID region, or 0 if no regin is active.
activity_correlation_id_t CorrelationId();
// Start a new external correlation ID region for the given \p external_id. As for the internal
// correlation ID regions, external correlation ID regions are nested and per-thread.
void ExternalCorrelationIdPush(activity_correlation_id_t external_id);
// Stop the current external correlation ID region and return the external_id used to start the
// region. Return a nullopt if no region was active.
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop();
// Return the current external correlation ID or nullopt is no region is active.
std::optional<activity_correlation_id_t> ExternalCorrelationId();
} // namespace roctracer
@@ -38,7 +38,10 @@ class MemoryPool {
// Pool definition: The memory pool is split in 2 buffers of equal size. When first initialized,
// the write pointer points to the first element of the first buffer. When a buffer is full, or
// when Flush() is called, the write pointer moves to the other buffer.
const size_t allocation_size = 2 * properties_.buffer_size;
// Each buffer should be large enough to hold at least 2 activity records, as record pairs may
// be written when external correlation ids are used.
const size_t allocation_size =
2 * std::max(2 * sizeof(roctracer_record_t), properties_.buffer_size);
pool_begin_ = nullptr;
AllocateMemory(&pool_begin_, allocation_size);
assert(pool_begin_ != nullptr && "pool allocator failed");
@@ -37,6 +37,7 @@
#include <unordered_map>
#include <vector>
#include "correlation_id.h"
#include "journal.h"
#include "loader.h"
#include "memory_pool.h"
@@ -161,16 +162,6 @@ roctracer_status_t GetExcStatus(const std::exception& e) {
return (roctracer_exc_ptr) ? roctracer_exc_ptr->status() : ROCTRACER_STATUS_ERROR;
}
static auto NextCorrelationId() {
static std::atomic<uint64_t> counter{1};
return counter.fetch_add(1, std::memory_order_relaxed);
}
// Correlation id storage
static thread_local activity_correlation_id_t correlation_id_tls = 0;
static thread_local std::stack<activity_correlation_id_t> external_id_stack;
std::mutex hip_activity_mutex;
enum { API_CB_MASK = 0x1, API_ACT_MASK = 0x2 };
@@ -192,13 +183,9 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
// Generate a new correlation ID.
uint64_t correlation_id = NextCorrelationId();
uint64_t correlation_id = CorrelationIdPush();
data->correlation_id = correlation_id;
// Record the correlation ID in a TLS variable so that it can be passed
// to an asynchronous activity started before the API function returns.
correlation_id_tls = correlation_id;
if (pool != nullptr) {
// Filing record info
record->domain = ACTIVITY_DOMAIN_HIP_API;
@@ -211,21 +198,22 @@ void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_
}
} else {
if (pool != nullptr) {
if (!external_id_stack.empty()) {
record->end_ns = util::timestamp_ns();
if (auto external_id = ExternalCorrelationId()) {
roctracer_record_t ext_record{};
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID;
ext_record.correlation_id = record->correlation_id;
ext_record.external_id = external_id_stack.top();
pool->Write(ext_record);
ext_record.external_id = *external_id;
// Write the external correlation id record directly followed by the activity record.
pool->Write(std::array<roctracer_record_t, 2>{ext_record, *record});
} else {
// Write record to the buffer.
pool->Write(*record);
}
// Write record to the buffer
record->end_ns = util::timestamp_ns();
pool->Write(*record);
}
// Clear correlation ID
correlation_id_tls = 0;
CorrelationIdPop();
}
DEBUG_TRACE(
@@ -291,7 +279,7 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = hsa_correlation_id_tls;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
@@ -315,7 +303,7 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = async_copy_callback_memory_pool;
entry->correlation_id = hsa_correlation_id_tls;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
@@ -897,22 +885,24 @@ ROCTRACER_API roctracer_status_t roctracer_flush_activity() {
ROCTRACER_API roctracer_status_t
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) {
API_METHOD_PREFIX
external_id_stack.push(id);
ExternalCorrelationIdPush(id);
API_METHOD_SUFFIX
}
// Notifies that the calling thread is leaving an external API region.
// Pop an external correlation id for the calling thread.
// 'lastId' returns the last external correlation
// Pop an external correlation id for the calling thread, and return it in 'last_id' if not null.
ROCTRACER_API roctracer_status_t
roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) {
API_METHOD_PREFIX
if (last_id != nullptr) *last_id = 0;
if (external_id_stack.empty())
auto external_id = ExternalCorrelationIdPop();
if (!external_id) {
if (last_id != nullptr) *last_id = 0;
EXC_RAISING(ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID,
"not matching external range pop");
if (last_id != nullptr) *last_id = external_id_stack.top();
external_id_stack.pop();
"unbalanced external correlation id pop");
}
if (last_id != nullptr) *last_id = *external_id;
API_METHOD_SUFFIX
}