Move trace_buffer.h to the tool directory
A trace buffer is used to efficiently store synchronous event records so that they can be processed later, possibly in a different thread, when the buffer is flushed. This helps reduce the latency added by tracing API calls. The API does not need to use trace buffers as synchronous events are directly reported to the client with callbacks, and asynchronous events (activities) are saved in memory pools. The implentation of HSA asynchronous memory copy activities was using a trace buffer shared with the tracer tool to write the records to a file (async_copy_trace.txt), instead of using a memory pool and reporting the activity to the client. Removed the asynchronous memory copies trace buffer, and updated hsa_async_copy_handler to use the pool specified when the activity was enabled. Updated the tracer tool to read HSA_OP_ID_COPY records out of the default memory pool and write them to async_copy_trace.txt. Move trace_buffer.h to test/tool as tracer_tool.cpp is now the only file using it. Change-Id: Ida95aba2eaf3c3f2a979ed6c2b060374017b7424
Этот коммит содержится в:
коммит произвёл
Laurent Morichetti
родитель
48f4c82685
Коммит
61f35b0204
@@ -50,9 +50,7 @@ extern ImageExtTable ImageExtTable_saved;
|
||||
|
||||
struct ops_properties_t {
|
||||
void* table;
|
||||
activity_async_callback_t async_copy_callback_fun;
|
||||
void* async_copy_callback_arg;
|
||||
const char* output_prefix;
|
||||
void* reserved1[3];
|
||||
};
|
||||
|
||||
}; // namespace hsa_support
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#include "core/journal.h"
|
||||
#include "core/loader.h"
|
||||
#include "core/memory_pool.h"
|
||||
#include "core/trace_buffer.h"
|
||||
#include "core/tracker.h"
|
||||
#include "ext/hsa_rt_utils.hpp"
|
||||
#include "util/exception.h"
|
||||
@@ -229,19 +228,12 @@ class hip_act_cb_tracker_t {
|
||||
std::unordered_map<uint32_t, uint32_t> data_;
|
||||
};
|
||||
|
||||
void hsa_async_copy_handler(Tracker::entry_t* entry);
|
||||
constexpr TraceBuffer<trace_entry_t>::flush_prm_t trace_buffer_prm[] = {
|
||||
{COPY_ENTRY_TYPE, hsa_async_copy_handler}};
|
||||
TraceBuffer<trace_entry_t>* trace_buffer = NULL;
|
||||
|
||||
namespace hsa_support {
|
||||
// callbacks table
|
||||
cb_table_t cb_table;
|
||||
// async copy activity callback
|
||||
bool async_copy_callback_enabled = false;
|
||||
activity_async_callback_t async_copy_callback_fun = NULL;
|
||||
void* async_copy_callback_arg = NULL;
|
||||
const char* output_prefix = NULL;
|
||||
MemoryPool* async_copy_callback_memory_pool = nullptr;
|
||||
// Table of function pointers to HSA Core Runtime
|
||||
CoreApiTable CoreApiTable_saved{};
|
||||
// Table of function pointers to AMD extensions
|
||||
@@ -546,15 +538,14 @@ void close_output_file(FILE* file_handle) {
|
||||
if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle);
|
||||
}
|
||||
|
||||
void hsa_async_copy_handler(Tracker::entry_t* entry) {
|
||||
void hsa_async_copy_handler(const Tracker::entry_t* entry) {
|
||||
activity_record_t record{};
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id
|
||||
record.begin_ns = entry->begin; // host begin timestamp
|
||||
record.end_ns = entry->end; // host end timestamp
|
||||
record.device_id = 0; // device id
|
||||
|
||||
hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record,
|
||||
hsa_support::async_copy_callback_arg);
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
record.op = HSA_OP_ID_COPY;
|
||||
record.begin_ns = entry->begin;
|
||||
record.end_ns = entry->end;
|
||||
record.device_id = 0;
|
||||
entry->pool->Write(record);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent,
|
||||
@@ -564,8 +555,10 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag
|
||||
hsa_signal_t completion_signal) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
trace_entry_t* entry = trace_buffer->GetEntry();
|
||||
Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = hsa_async_copy_handler;
|
||||
entry->pool = hsa_support::async_copy_callback_memory_pool;
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals,
|
||||
dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
@@ -583,8 +576,10 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
hsa_signal_t completion_signal) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
trace_entry_t* entry = trace_buffer->GetEntry();
|
||||
Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = hsa_async_copy_handler;
|
||||
entry->pool = hsa_support::async_copy_callback_memory_pool;
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
@@ -625,7 +620,6 @@ unsigned set_stopped(unsigned val) {
|
||||
} // namespace roctracer
|
||||
|
||||
LOADER_INSTANTIATE();
|
||||
TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Public library methods
|
||||
@@ -968,6 +962,8 @@ static roctracer_status_t roctracer_enable_activity_fun(roctracer_domain_t domai
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = true;
|
||||
roctracer::hsa_support::async_copy_callback_memory_pool =
|
||||
reinterpret_cast<roctracer::MemoryPool*>(pool);
|
||||
} else {
|
||||
const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL);
|
||||
if (roctracer::RocpLoader::GetRef() == NULL) break;
|
||||
@@ -1059,7 +1055,8 @@ static roctracer_status_t roctracer_disable_activity_fun(roctracer_domain_t doma
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = true;
|
||||
roctracer::hsa_support::async_copy_callback_enabled = false;
|
||||
roctracer::hsa_support::async_copy_callback_memory_pool = nullptr;
|
||||
} else {
|
||||
if (roctracer::RocpLoader::GetRef() == NULL) break;
|
||||
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false);
|
||||
@@ -1145,7 +1142,6 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po
|
||||
if (pool == NULL) pool = roctracer_default_pool();
|
||||
roctracer::MemoryPool* memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
|
||||
if (memory_pool != NULL) memory_pool->Flush();
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -1223,9 +1219,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain
|
||||
roctracer::hsa_ops_properties_t* ops_properties =
|
||||
reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
|
||||
roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun;
|
||||
roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg;
|
||||
roctracer::hsa_support::output_prefix = ops_properties->output_prefix;
|
||||
|
||||
#if 0
|
||||
// HSA dispatches intercepting
|
||||
@@ -1314,15 +1307,12 @@ PUBLIC_API void roctracer_unload() {
|
||||
|
||||
PUBLIC_API void roctracer_flush_buf() {
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer::trace_buffer->Flush();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer::util::Logger::Create();
|
||||
roctracer::trace_buffer = new roctracer::TraceBuffer<roctracer::trace_entry_t>(
|
||||
"HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2);
|
||||
roctracer_load();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
@@ -31,14 +31,45 @@
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
#include "core/trace_buffer.h"
|
||||
|
||||
namespace roctracer {
|
||||
class Tracker {
|
||||
public:
|
||||
typedef ::util::HsaRsrcFactory::timestamp_t timestamp_t;
|
||||
typedef roctracer::trace_entry_t entry_t;
|
||||
typedef roctracer::entry_type_t entry_type_t;
|
||||
|
||||
enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 };
|
||||
|
||||
enum entry_type_t {
|
||||
DFLT_ENTRY_TYPE = 0,
|
||||
API_ENTRY_TYPE = 1,
|
||||
COPY_ENTRY_TYPE = 2,
|
||||
KERNEL_ENTRY_TYPE = 3,
|
||||
NUM_ENTRY_TYPE = 4
|
||||
};
|
||||
|
||||
struct entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
entry_type_t type;
|
||||
uint64_t dispatch;
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete;
|
||||
hsa_agent_t agent;
|
||||
uint32_t dev_index;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
void (*handler)(const entry_t*);
|
||||
MemoryPool* pool;
|
||||
union {
|
||||
struct {
|
||||
} copy;
|
||||
struct {
|
||||
const char* name;
|
||||
hsa_agent_t agent;
|
||||
uint32_t tid;
|
||||
} kernel;
|
||||
};
|
||||
};
|
||||
|
||||
// Add tracker entry
|
||||
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
|
||||
@@ -52,7 +83,7 @@ class Tracker {
|
||||
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
|
||||
entry->orig = signal;
|
||||
entry->dispatch = hsa_rsrc->TimestampNs();
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release);
|
||||
entry->valid.store(ENTRY_INIT, std::memory_order_release);
|
||||
|
||||
// Creating a proxy signal
|
||||
status = hsa_signal_create(1, 0, NULL, &(entry->signal));
|
||||
@@ -67,7 +98,7 @@ class Tracker {
|
||||
// Delete tracker entry
|
||||
inline static void Disable(entry_t* entry) {
|
||||
hsa_signal_destroy(entry->signal);
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_INV, std::memory_order_release);
|
||||
entry->valid.store(ENTRY_INV, std::memory_order_release);
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -75,7 +106,7 @@ class Tracker {
|
||||
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
// Query begin/end and complete timestamps
|
||||
::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance());
|
||||
if (entry->type == roctracer::COPY_ENTRY_TYPE) {
|
||||
if (entry->type == COPY_ENTRY_TYPE) {
|
||||
hsa_amd_profiling_async_copy_time_t async_copy_time{};
|
||||
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
@@ -98,7 +129,10 @@ class Tracker {
|
||||
hsa_signal_t signal = entry->signal;
|
||||
|
||||
// Releasing completed entry
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
|
||||
entry->valid.store(ENTRY_COMPL, std::memory_order_release);
|
||||
|
||||
assert(entry->handler != nullptr);
|
||||
entry->handler(entry);
|
||||
|
||||
// Original intercepted signal completion
|
||||
if (orig.handle) {
|
||||
@@ -112,14 +146,14 @@ class Tracker {
|
||||
hsa_signal_store_screlease(orig, signal_value);
|
||||
}
|
||||
hsa_signal_destroy(signal);
|
||||
delete entry;
|
||||
}
|
||||
|
||||
// Handler for packet completion
|
||||
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
|
||||
// Acquire entry
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT)
|
||||
sched_yield();
|
||||
while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield();
|
||||
|
||||
// Complete entry
|
||||
Tracker::Complete(signal_value, entry);
|
||||
|
||||
@@ -60,28 +60,6 @@ enum entry_type_t {
|
||||
NUM_ENTRY_TYPE = 4
|
||||
};
|
||||
|
||||
struct trace_entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
entry_type_t type;
|
||||
uint64_t dispatch;
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete;
|
||||
hsa_agent_t agent;
|
||||
uint32_t dev_index;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
union {
|
||||
struct {
|
||||
} copy;
|
||||
struct {
|
||||
const char* name;
|
||||
hsa_agent_t agent;
|
||||
uint32_t tid;
|
||||
} kernel;
|
||||
};
|
||||
};
|
||||
|
||||
template <class T> struct push_element_fun {
|
||||
T* const elem_;
|
||||
T** prev_;
|
||||
@@ -39,7 +39,7 @@
|
||||
#include <ext/hsa_rt_utils.hpp>
|
||||
|
||||
#include "src/core/loader.h"
|
||||
#include "src/core/trace_buffer.h"
|
||||
#include "test/tool/trace_buffer.h"
|
||||
#include "util/evt_stats.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/xml.h"
|
||||
@@ -357,14 +357,6 @@ void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) {
|
||||
fflush(hsa_api_file_handle);
|
||||
}
|
||||
|
||||
void hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) {
|
||||
static uint64_t index = 0;
|
||||
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns,
|
||||
record->end_ns, index, my_pid);
|
||||
fflush(hsa_async_copy_file_handle);
|
||||
index++;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP API tracing
|
||||
|
||||
@@ -648,7 +640,13 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
}
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (record->op == HSA_OP_ID_RESERVED1) {
|
||||
if (record->op == HSA_OP_ID_COPY) {
|
||||
static uint64_t index = 0;
|
||||
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns,
|
||||
record->end_ns, index, my_pid);
|
||||
fflush(hsa_async_copy_file_handle);
|
||||
index++;
|
||||
} else if (record->op == HSA_OP_ID_RESERVED1) {
|
||||
fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", record->pc_sample.se,
|
||||
record->pc_sample.cycle, record->pc_sample.pc, name);
|
||||
fflush(pc_sample_file_handle);
|
||||
@@ -1032,11 +1030,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer::hsa_ops_properties_t ops_properties{
|
||||
table, reinterpret_cast<activity_async_callback_t>(hsa_activity_callback), NULL,
|
||||
output_prefix};
|
||||
roctracer::hsa_ops_properties_t ops_properties{};
|
||||
ops_properties.table = table;
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
// Allocating tracing pool
|
||||
open_tracing_pool();
|
||||
|
||||
fprintf(stdout, " HSA-activity-trace()\n");
|
||||
fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
|
||||
Ссылка в новой задаче
Block a user