Move trace_buffer.h to the tool directory

A trace buffer is used to efficiently store synchronous event records
so that they can be processed later, possibly in a different thread,
when the buffer is flushed. This helps reduce the latency added by
tracing API calls.

The API does not need to use trace buffers as synchronous events are
directly reported to the client with callbacks, and asynchronous events
(activities) are saved in memory pools.

The implentation of HSA asynchronous memory copy activities was using
a trace buffer shared with the tracer tool to write the records to a
file (async_copy_trace.txt), instead of using a memory pool and
reporting the activity to the client.

Removed the asynchronous memory copies trace buffer, and updated
hsa_async_copy_handler to use the pool specified when the activity
was enabled.

Updated the tracer tool to read HSA_OP_ID_COPY records out of the
default memory pool and write them to async_copy_trace.txt.

Move trace_buffer.h to test/tool as tracer_tool.cpp is now the only
file using it.

Change-Id: Ida95aba2eaf3c3f2a979ed6c2b060374017b7424
Этот коммит содержится в:
Laurent Morichetti
2022-04-22 13:36:41 -07:00
коммит произвёл Laurent Morichetti
родитель 48f4c82685
Коммит 61f35b0204
5 изменённых файлов: 77 добавлений и 77 удалений
+1 -3
Просмотреть файл
@@ -50,9 +50,7 @@ extern ImageExtTable ImageExtTable_saved;
struct ops_properties_t {
void* table;
activity_async_callback_t async_copy_callback_fun;
void* async_copy_callback_arg;
const char* output_prefix;
void* reserved1[3];
};
}; // namespace hsa_support
+20 -30
Просмотреть файл
@@ -40,7 +40,6 @@
#include "core/journal.h"
#include "core/loader.h"
#include "core/memory_pool.h"
#include "core/trace_buffer.h"
#include "core/tracker.h"
#include "ext/hsa_rt_utils.hpp"
#include "util/exception.h"
@@ -229,19 +228,12 @@ class hip_act_cb_tracker_t {
std::unordered_map<uint32_t, uint32_t> data_;
};
void hsa_async_copy_handler(Tracker::entry_t* entry);
constexpr TraceBuffer<trace_entry_t>::flush_prm_t trace_buffer_prm[] = {
{COPY_ENTRY_TYPE, hsa_async_copy_handler}};
TraceBuffer<trace_entry_t>* trace_buffer = NULL;
namespace hsa_support {
// callbacks table
cb_table_t cb_table;
// async copy activity callback
bool async_copy_callback_enabled = false;
activity_async_callback_t async_copy_callback_fun = NULL;
void* async_copy_callback_arg = NULL;
const char* output_prefix = NULL;
MemoryPool* async_copy_callback_memory_pool = nullptr;
// Table of function pointers to HSA Core Runtime
CoreApiTable CoreApiTable_saved{};
// Table of function pointers to AMD extensions
@@ -546,15 +538,14 @@ void close_output_file(FILE* file_handle) {
if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle);
}
void hsa_async_copy_handler(Tracker::entry_t* entry) {
void hsa_async_copy_handler(const Tracker::entry_t* entry) {
activity_record_t record{};
record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id
record.begin_ns = entry->begin; // host begin timestamp
record.end_ns = entry->end; // host end timestamp
record.device_id = 0; // device id
hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record,
hsa_support::async_copy_callback_arg);
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
record.op = HSA_OP_ID_COPY;
record.begin_ns = entry->begin;
record.end_ns = entry->end;
record.device_id = 0;
entry->pool->Write(record);
}
hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent,
@@ -564,8 +555,10 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag
hsa_signal_t completion_signal) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (hsa_support::async_copy_callback_enabled) {
trace_entry_t* entry = trace_buffer->GetEntry();
Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = hsa_support::async_copy_callback_memory_pool;
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals,
dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
@@ -583,8 +576,10 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
hsa_signal_t completion_signal) {
hsa_status_t status = HSA_STATUS_SUCCESS;
if (hsa_support::async_copy_callback_enabled) {
trace_entry_t* entry = trace_buffer->GetEntry();
Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = hsa_async_copy_handler;
entry->pool = hsa_support::async_copy_callback_memory_pool;
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent,
dir, num_dep_signals, dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
@@ -625,7 +620,6 @@ unsigned set_stopped(unsigned val) {
} // namespace roctracer
LOADER_INSTANTIATE();
TRACE_BUFFER_INSTANTIATE();
///////////////////////////////////////////////////////////////////////////////////////////////////
// Public library methods
@@ -968,6 +962,8 @@ static roctracer_status_t roctracer_enable_activity_fun(roctracer_domain_t domai
case ACTIVITY_DOMAIN_HSA_OPS: {
if (op == HSA_OP_ID_COPY) {
roctracer::hsa_support::async_copy_callback_enabled = true;
roctracer::hsa_support::async_copy_callback_memory_pool =
reinterpret_cast<roctracer::MemoryPool*>(pool);
} else {
const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL);
if (roctracer::RocpLoader::GetRef() == NULL) break;
@@ -1059,7 +1055,8 @@ static roctracer_status_t roctracer_disable_activity_fun(roctracer_domain_t doma
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
if (op == HSA_OP_ID_COPY) {
roctracer::hsa_support::async_copy_callback_enabled = true;
roctracer::hsa_support::async_copy_callback_enabled = false;
roctracer::hsa_support::async_copy_callback_memory_pool = nullptr;
} else {
if (roctracer::RocpLoader::GetRef() == NULL) break;
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false);
@@ -1145,7 +1142,6 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po
if (pool == NULL) pool = roctracer_default_pool();
roctracer::MemoryPool* memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
if (memory_pool != NULL) memory_pool->Flush();
roctracer::TraceBufferBase::FlushAll();
API_METHOD_SUFFIX
}
@@ -1223,9 +1219,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain
roctracer::hsa_ops_properties_t* ops_properties =
reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun;
roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg;
roctracer::hsa_support::output_prefix = ops_properties->output_prefix;
#if 0
// HSA dispatches intercepting
@@ -1314,15 +1307,12 @@ PUBLIC_API void roctracer_unload() {
PUBLIC_API void roctracer_flush_buf() {
ONLOAD_TRACE_BEG();
roctracer::trace_buffer->Flush();
ONLOAD_TRACE_END();
}
CONSTRUCTOR_API void constructor() {
ONLOAD_TRACE_BEG();
roctracer::util::Logger::Create();
roctracer::trace_buffer = new roctracer::TraceBuffer<roctracer::trace_entry_t>(
"HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2);
roctracer_load();
ONLOAD_TRACE_END();
}
+43 -9
Просмотреть файл
@@ -31,14 +31,45 @@
#include "util/hsa_rsrc_factory.h"
#include "util/exception.h"
#include "util/logger.h"
#include "core/trace_buffer.h"
namespace roctracer {
class Tracker {
public:
typedef ::util::HsaRsrcFactory::timestamp_t timestamp_t;
typedef roctracer::trace_entry_t entry_t;
typedef roctracer::entry_type_t entry_type_t;
enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 };
enum entry_type_t {
DFLT_ENTRY_TYPE = 0,
API_ENTRY_TYPE = 1,
COPY_ENTRY_TYPE = 2,
KERNEL_ENTRY_TYPE = 3,
NUM_ENTRY_TYPE = 4
};
struct entry_t {
std::atomic<uint32_t> valid;
entry_type_t type;
uint64_t dispatch;
uint64_t begin; // kernel begin timestamp, ns
uint64_t end; // kernel end timestamp, ns
uint64_t complete;
hsa_agent_t agent;
uint32_t dev_index;
hsa_signal_t orig;
hsa_signal_t signal;
void (*handler)(const entry_t*);
MemoryPool* pool;
union {
struct {
} copy;
struct {
const char* name;
hsa_agent_t agent;
uint32_t tid;
} kernel;
};
};
// Add tracker entry
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
@@ -52,7 +83,7 @@ class Tracker {
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
entry->orig = signal;
entry->dispatch = hsa_rsrc->TimestampNs();
entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release);
entry->valid.store(ENTRY_INIT, std::memory_order_release);
// Creating a proxy signal
status = hsa_signal_create(1, 0, NULL, &(entry->signal));
@@ -67,7 +98,7 @@ class Tracker {
// Delete tracker entry
inline static void Disable(entry_t* entry) {
hsa_signal_destroy(entry->signal);
entry->valid.store(roctracer::TRACE_ENTRY_INV, std::memory_order_release);
entry->valid.store(ENTRY_INV, std::memory_order_release);
}
private:
@@ -75,7 +106,7 @@ class Tracker {
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
// Query begin/end and complete timestamps
::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance());
if (entry->type == roctracer::COPY_ENTRY_TYPE) {
if (entry->type == COPY_ENTRY_TYPE) {
hsa_amd_profiling_async_copy_time_t async_copy_time{};
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
if (status != HSA_STATUS_SUCCESS)
@@ -98,7 +129,10 @@ class Tracker {
hsa_signal_t signal = entry->signal;
// Releasing completed entry
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
entry->valid.store(ENTRY_COMPL, std::memory_order_release);
assert(entry->handler != nullptr);
entry->handler(entry);
// Original intercepted signal completion
if (orig.handle) {
@@ -112,14 +146,14 @@ class Tracker {
hsa_signal_store_screlease(orig, signal_value);
}
hsa_signal_destroy(signal);
delete entry;
}
// Handler for packet completion
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
// Acquire entry
entry_t* entry = reinterpret_cast<entry_t*>(arg);
while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT)
sched_yield();
while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield();
// Complete entry
Tracker::Complete(signal_value, entry);
-22
Просмотреть файл
@@ -60,28 +60,6 @@ enum entry_type_t {
NUM_ENTRY_TYPE = 4
};
struct trace_entry_t {
std::atomic<uint32_t> valid;
entry_type_t type;
uint64_t dispatch;
uint64_t begin; // kernel begin timestamp, ns
uint64_t end; // kernel end timestamp, ns
uint64_t complete;
hsa_agent_t agent;
uint32_t dev_index;
hsa_signal_t orig;
hsa_signal_t signal;
union {
struct {
} copy;
struct {
const char* name;
hsa_agent_t agent;
uint32_t tid;
} kernel;
};
};
template <class T> struct push_element_fun {
T* const elem_;
T** prev_;
+13 -13
Просмотреть файл
@@ -39,7 +39,7 @@
#include <ext/hsa_rt_utils.hpp>
#include "src/core/loader.h"
#include "src/core/trace_buffer.h"
#include "test/tool/trace_buffer.h"
#include "util/evt_stats.h"
#include "util/hsa_rsrc_factory.h"
#include "util/xml.h"
@@ -357,14 +357,6 @@ void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) {
fflush(hsa_api_file_handle);
}
void hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) {
static uint64_t index = 0;
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns,
record->end_ns, index, my_pid);
fflush(hsa_async_copy_file_handle);
index++;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP API tracing
@@ -648,7 +640,13 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
}
break;
case ACTIVITY_DOMAIN_HSA_OPS:
if (record->op == HSA_OP_ID_RESERVED1) {
if (record->op == HSA_OP_ID_COPY) {
static uint64_t index = 0;
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns,
record->end_ns, index, my_pid);
fflush(hsa_async_copy_file_handle);
index++;
} else if (record->op == HSA_OP_ID_RESERVED1) {
fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", record->pc_sample.se,
record->pc_sample.cycle, record->pc_sample.pc, name);
fflush(pc_sample_file_handle);
@@ -1032,11 +1030,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
// initialize HSA tracing
roctracer::hsa_ops_properties_t ops_properties{
table, reinterpret_cast<activity_async_callback_t>(hsa_activity_callback), NULL,
output_prefix};
roctracer::hsa_ops_properties_t ops_properties{};
ops_properties.table = table;
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
// Allocating tracing pool
open_tracing_pool();
fprintf(stdout, " HSA-activity-trace()\n");
fflush(stdout);
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));