Remove the roctx range message stack

The range message stack is mirrored in case ranges are pushed or popped
while tracing is stopped (by the tracer tool?). When a stop event is
reported, the tracer tool emits RangePop events by unwinding the stack,
then when the start event is reported, it emits RangePush events again
by unwinding the stack. The issue is that the RangePush events should
be emitted in reverse order.

For example:

RangePush(M1); RangePush(M2); \
  TracerStop; RangePop; RangePop; \
...; \
  TracerStart; RangePush(M2); RangePush(M1); \ <- In the wrong order
RangePop; RangePop;

It could be fixed by reversing the stack in RangeStackIterate but is it
worth it? The roctx range markers are supposed to be unintrusive so that
they can be left in the application even when it isn't being traced.

Simplifying the roctx API and reducing its added latency by removing
the range message stack mirroring seems like the better choise.

TODO: A future change should make roctx events immune to tracer start
and tracer stop requests. Or simply remove roctracer_start/stop.

Change-Id: Ie4d76afb5ce8d263848dcf1b599af394db56ddab


[ROCm/roctracer commit: 3d0198c395]
This commit is contained in:
Laurent Morichetti
2022-05-06 14:01:27 -07:00
parent 4a04400f85
commit 9b78c65ce1
4 changed files with 17 additions and 84 deletions
-14
View File
@@ -74,20 +74,6 @@ typedef struct roctx_api_data_s {
} args;
} roctx_api_data_t;
// Regiter ROCTX callback for given operation id
bool RegisterApiCallback(uint32_t op, void* callback, void* arg);
// Remove ROCTX callback for given operation id
bool RemoveApiCallback(uint32_t op);
// Iterate range stack to support tracing start/stop
typedef struct {
const char* message;
uint32_t tid;
} roctx_range_data_t;
typedef void (*roctx_range_iterate_cb_t)(const roctx_range_data_t* data, void* arg);
void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg);
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
+2 -5
View File
@@ -278,19 +278,16 @@ class RocTxApi {
public:
typedef BaseLoader<RocTxApi> Loader;
typedef decltype(RegisterApiCallback) RegisterApiCallback_t;
typedef decltype(RemoveApiCallback) RemoveApiCallback_t;
typedef decltype(RangeStackIterate) RangeStackIterate_t;
typedef bool(RegisterApiCallback_t)(uint32_t op, void* callback, void* arg);
typedef bool(RemoveApiCallback_t)(uint32_t op);
RegisterApiCallback_t* RegisterApiCallback;
RemoveApiCallback_t* RemoveApiCallback;
RangeStackIterate_t* RangeStackIterate;
protected:
void init(Loader* loader) {
RegisterApiCallback = loader->GetFun<RegisterApiCallback_t>("RegisterApiCallback");
RemoveApiCallback = loader->GetFun<RemoveApiCallback_t>("RemoveApiCallback");
RangeStackIterate = loader->GetFun<RangeStackIterate_t>("RangeStackIterate");
}
};
+6 -36
View File
@@ -22,11 +22,6 @@
#include "inc/roctracer_roctx.h"
#include <cassert>
#include <cstring>
#include <unordered_map>
#include <mutex>
#include <stack>
#include <string>
#include "inc/ext/prof_protocol.h"
#include "core/callback_table.h"
@@ -63,14 +58,7 @@ typedef enum {
namespace {
roctracer::CallbackTable<ROCTX_API_ID_NUMBER> callbacks;
std::unordered_map<uint32_t, std::stack<std::string>> message_stack_map;
std::mutex message_stack_mutex;
thread_local auto& message_stack = []() -> decltype(message_stack_map)::mapped_type& {
const auto tid = syscall(__NR_gettid);
std::lock_guard lock(message_stack_mutex);
return message_stack_map[tid];
}();
thread_local int range_level(0);
} // namespace
@@ -106,8 +94,7 @@ PUBLIC_API int roctxRangePushA(const char* message) {
api_callback_arg);
}
message_stack.emplace(message);
return message_stack.size() - 1;
return range_level++;
API_METHOD_CATCH(-1);
}
@@ -120,12 +107,8 @@ PUBLIC_API int roctxRangePop() {
api_callback_arg);
}
if (message_stack.empty()) {
EXC_RAISING(ROCTX_STATUS_ERROR, "Pop from empty stack!");
}
message_stack.pop();
return message_stack.size();
if (range_level == 0) EXC_RAISING(ROCTX_STATUS_ERROR, "Pop from empty stack!");
return --range_level;
API_METHOD_CATCH(-1)
}
@@ -157,26 +140,13 @@ PUBLIC_API void roctxRangeStop(roctx_range_id_t rangeId) {
API_METHOD_SUFFIX_NRET
}
PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) {
std::lock_guard lock(message_stack_mutex);
for (auto&& [tid, message_stack] : message_stack_map) {
// Since we can't iterate a std::stack, we must first make a copy and then unwind it.
for (auto stack_copy = message_stack; !stack_copy.empty(); stack_copy.pop()) {
roctx_range_data_t data{};
data.message = stack_copy.top().c_str();
data.tid = tid;
callback(&data, arg);
}
}
}
PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* arg) {
extern "C" PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* arg) {
if (op >= ROCTX_API_ID_NUMBER) return false;
callbacks.Set(op, reinterpret_cast<activity_rtapi_callback_t>(callback), arg);
return true;
}
PUBLIC_API bool RemoveApiCallback(uint32_t op) {
extern "C" PUBLIC_API bool RemoveApiCallback(uint32_t op) {
if (op >= ROCTX_API_ID_NUMBER) return false;
callbacks.Set(op, nullptr, nullptr);
return true;
+9 -29
View File
@@ -256,39 +256,22 @@ struct roctx_trace_entry_t {
roctracer::TraceBuffer<roctx_trace_entry_t>* roctx_trace_buffer = NULL;
// rocTX callback function
static inline void roctx_callback_fun(uint32_t domain, uint32_t cid, uint32_t tid,
roctx_range_id_t rid, const char* message) {
void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
void* /* user_arg */) {
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
roctx_trace_entry_t* entry = roctx_trace_buffer->GetEntry();
entry->cid = cid;
entry->time = util::timestamp_ns();
entry->pid = GetPid();
entry->tid = tid;
entry->rid = rid;
entry->message = (message != NULL) ? strdup(message) : NULL;
entry->tid = GetTid();
entry->rid = data->args.id;
entry->message = (data->args.message != NULL)
? strdup(data->args.message) /* FIXME: Who frees the message? */
: NULL;
entry->valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
(void)arg;
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
roctx_callback_fun(domain, cid, GetTid(), data->args.id, data->args.message);
}
// rocTX Start/Stop callbacks
void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) {
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, 0,
data->message);
}
void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) {
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, 0, NULL);
}
void start_callback() {
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL);
}
void stop_callback() {
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL);
}
// rocTX buffer flush function
void roctx_flush_cb(roctx_trace_entry_t* entry) {
std::ostringstream os;
@@ -859,9 +842,6 @@ void tool_load() {
roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt");
// initialize HSA tracing
roctracer_ext_properties_t properties{start_callback, stop_callback};
roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties);
fprintf(stdout, " rocTX-trace()\n");
fflush(stdout);
ROCTRACER_CALL(