From 61f35b0204582de790dfa281caf285a4ea62ac1f Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Fri, 22 Apr 2022 13:36:41 -0700 Subject: [PATCH] Move trace_buffer.h to the tool directory A trace buffer is used to efficiently store synchronous event records so that they can be processed later, possibly in a different thread, when the buffer is flushed. This helps reduce the latency added by tracing API calls. The API does not need to use trace buffers as synchronous events are directly reported to the client with callbacks, and asynchronous events (activities) are saved in memory pools. The implentation of HSA asynchronous memory copy activities was using a trace buffer shared with the tracer tool to write the records to a file (async_copy_trace.txt), instead of using a memory pool and reporting the activity to the client. Removed the asynchronous memory copies trace buffer, and updated hsa_async_copy_handler to use the pool specified when the activity was enabled. Updated the tracer tool to read HSA_OP_ID_COPY records out of the default memory pool and write them to async_copy_trace.txt. Move trace_buffer.h to test/tool as tracer_tool.cpp is now the only file using it. Change-Id: Ida95aba2eaf3c3f2a979ed6c2b060374017b7424 --- inc/roctracer_hsa.h | 4 +- src/core/roctracer.cpp | 50 ++++++++++--------------- src/core/tracker.h | 52 +++++++++++++++++++++----- {src/core => test/tool}/trace_buffer.h | 22 ----------- test/tool/tracer_tool.cpp | 26 ++++++------- 5 files changed, 77 insertions(+), 77 deletions(-) rename {src/core => test/tool}/trace_buffer.h (95%) diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index 1166e7a889..33e16de7da 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -50,9 +50,7 @@ extern ImageExtTable ImageExtTable_saved; struct ops_properties_t { void* table; - activity_async_callback_t async_copy_callback_fun; - void* async_copy_callback_arg; - const char* output_prefix; + void* reserved1[3]; }; }; // namespace hsa_support diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 7e9161244f..708c90ca70 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -40,7 +40,6 @@ #include "core/journal.h" #include "core/loader.h" #include "core/memory_pool.h" -#include "core/trace_buffer.h" #include "core/tracker.h" #include "ext/hsa_rt_utils.hpp" #include "util/exception.h" @@ -229,19 +228,12 @@ class hip_act_cb_tracker_t { std::unordered_map data_; }; -void hsa_async_copy_handler(Tracker::entry_t* entry); -constexpr TraceBuffer::flush_prm_t trace_buffer_prm[] = { - {COPY_ENTRY_TYPE, hsa_async_copy_handler}}; -TraceBuffer* trace_buffer = NULL; - namespace hsa_support { // callbacks table cb_table_t cb_table; // async copy activity callback bool async_copy_callback_enabled = false; -activity_async_callback_t async_copy_callback_fun = NULL; -void* async_copy_callback_arg = NULL; -const char* output_prefix = NULL; +MemoryPool* async_copy_callback_memory_pool = nullptr; // Table of function pointers to HSA Core Runtime CoreApiTable CoreApiTable_saved{}; // Table of function pointers to AMD extensions @@ -546,15 +538,14 @@ void close_output_file(FILE* file_handle) { if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle); } -void hsa_async_copy_handler(Tracker::entry_t* entry) { +void hsa_async_copy_handler(const Tracker::entry_t* entry) { activity_record_t record{}; - record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id - record.begin_ns = entry->begin; // host begin timestamp - record.end_ns = entry->end; // host end timestamp - record.device_id = 0; // device id - - hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, - hsa_support::async_copy_callback_arg); + record.domain = ACTIVITY_DOMAIN_HSA_OPS; + record.op = HSA_OP_ID_COPY; + record.begin_ns = entry->begin; + record.end_ns = entry->end; + record.device_id = 0; + entry->pool->Write(record); } hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent, @@ -564,8 +555,10 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag hsa_signal_t completion_signal) { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { - trace_entry_t* entry = trace_buffer->GetEntry(); - Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = hsa_async_copy_handler; + entry->pool = hsa_support::async_copy_callback_memory_pool; + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -583,8 +576,10 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( hsa_signal_t completion_signal) { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { - trace_entry_t* entry = trace_buffer->GetEntry(); - Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = hsa_async_copy_handler; + entry->pool = hsa_support::async_copy_callback_memory_pool; + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -625,7 +620,6 @@ unsigned set_stopped(unsigned val) { } // namespace roctracer LOADER_INSTANTIATE(); -TRACE_BUFFER_INSTANTIATE(); /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods @@ -968,6 +962,8 @@ static roctracer_status_t roctracer_enable_activity_fun(roctracer_domain_t domai case ACTIVITY_DOMAIN_HSA_OPS: { if (op == HSA_OP_ID_COPY) { roctracer::hsa_support::async_copy_callback_enabled = true; + roctracer::hsa_support::async_copy_callback_memory_pool = + reinterpret_cast(pool); } else { const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); if (roctracer::RocpLoader::GetRef() == NULL) break; @@ -1059,7 +1055,8 @@ static roctracer_status_t roctracer_disable_activity_fun(roctracer_domain_t doma switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { if (op == HSA_OP_ID_COPY) { - roctracer::hsa_support::async_copy_callback_enabled = true; + roctracer::hsa_support::async_copy_callback_enabled = false; + roctracer::hsa_support::async_copy_callback_memory_pool = nullptr; } else { if (roctracer::RocpLoader::GetRef() == NULL) break; const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); @@ -1145,7 +1142,6 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po if (pool == NULL) pool = roctracer_default_pool(); roctracer::MemoryPool* memory_pool = reinterpret_cast(pool); if (memory_pool != NULL) memory_pool->Flush(); - roctracer::TraceBufferBase::FlushAll(); API_METHOD_SUFFIX } @@ -1223,9 +1219,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast(properties); HsaApiTable* table = reinterpret_cast(ops_properties->table); - roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun; - roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg; - roctracer::hsa_support::output_prefix = ops_properties->output_prefix; #if 0 // HSA dispatches intercepting @@ -1314,15 +1307,12 @@ PUBLIC_API void roctracer_unload() { PUBLIC_API void roctracer_flush_buf() { ONLOAD_TRACE_BEG(); - roctracer::trace_buffer->Flush(); ONLOAD_TRACE_END(); } CONSTRUCTOR_API void constructor() { ONLOAD_TRACE_BEG(); roctracer::util::Logger::Create(); - roctracer::trace_buffer = new roctracer::TraceBuffer( - "HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2); roctracer_load(); ONLOAD_TRACE_END(); } diff --git a/src/core/tracker.h b/src/core/tracker.h index 76ca7d8f66..f86fb49b30 100644 --- a/src/core/tracker.h +++ b/src/core/tracker.h @@ -31,14 +31,45 @@ #include "util/hsa_rsrc_factory.h" #include "util/exception.h" #include "util/logger.h" -#include "core/trace_buffer.h" namespace roctracer { class Tracker { public: typedef ::util::HsaRsrcFactory::timestamp_t timestamp_t; - typedef roctracer::trace_entry_t entry_t; - typedef roctracer::entry_type_t entry_type_t; + + enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 }; + + enum entry_type_t { + DFLT_ENTRY_TYPE = 0, + API_ENTRY_TYPE = 1, + COPY_ENTRY_TYPE = 2, + KERNEL_ENTRY_TYPE = 3, + NUM_ENTRY_TYPE = 4 + }; + + struct entry_t { + std::atomic valid; + entry_type_t type; + uint64_t dispatch; + uint64_t begin; // kernel begin timestamp, ns + uint64_t end; // kernel end timestamp, ns + uint64_t complete; + hsa_agent_t agent; + uint32_t dev_index; + hsa_signal_t orig; + hsa_signal_t signal; + void (*handler)(const entry_t*); + MemoryPool* pool; + union { + struct { + } copy; + struct { + const char* name; + hsa_agent_t agent; + uint32_t tid; + } kernel; + }; + }; // Add tracker entry inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, @@ -52,7 +83,7 @@ class Tracker { entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; entry->orig = signal; entry->dispatch = hsa_rsrc->TimestampNs(); - entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release); + entry->valid.store(ENTRY_INIT, std::memory_order_release); // Creating a proxy signal status = hsa_signal_create(1, 0, NULL, &(entry->signal)); @@ -67,7 +98,7 @@ class Tracker { // Delete tracker entry inline static void Disable(entry_t* entry) { hsa_signal_destroy(entry->signal); - entry->valid.store(roctracer::TRACE_ENTRY_INV, std::memory_order_release); + entry->valid.store(ENTRY_INV, std::memory_order_release); } private: @@ -75,7 +106,7 @@ class Tracker { inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { // Query begin/end and complete timestamps ::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance()); - if (entry->type == roctracer::COPY_ENTRY_TYPE) { + if (entry->type == COPY_ENTRY_TYPE) { hsa_amd_profiling_async_copy_time_t async_copy_time{}; hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time); if (status != HSA_STATUS_SUCCESS) @@ -98,7 +129,10 @@ class Tracker { hsa_signal_t signal = entry->signal; // Releasing completed entry - entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); + entry->valid.store(ENTRY_COMPL, std::memory_order_release); + + assert(entry->handler != nullptr); + entry->handler(entry); // Original intercepted signal completion if (orig.handle) { @@ -112,14 +146,14 @@ class Tracker { hsa_signal_store_screlease(orig, signal_value); } hsa_signal_destroy(signal); + delete entry; } // Handler for packet completion static bool Handler(hsa_signal_value_t signal_value, void* arg) { // Acquire entry entry_t* entry = reinterpret_cast(arg); - while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) - sched_yield(); + while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield(); // Complete entry Tracker::Complete(signal_value, entry); diff --git a/src/core/trace_buffer.h b/test/tool/trace_buffer.h similarity index 95% rename from src/core/trace_buffer.h rename to test/tool/trace_buffer.h index d6061970ae..3000e188c3 100644 --- a/src/core/trace_buffer.h +++ b/test/tool/trace_buffer.h @@ -60,28 +60,6 @@ enum entry_type_t { NUM_ENTRY_TYPE = 4 }; -struct trace_entry_t { - std::atomic valid; - entry_type_t type; - uint64_t dispatch; - uint64_t begin; // kernel begin timestamp, ns - uint64_t end; // kernel end timestamp, ns - uint64_t complete; - hsa_agent_t agent; - uint32_t dev_index; - hsa_signal_t orig; - hsa_signal_t signal; - union { - struct { - } copy; - struct { - const char* name; - hsa_agent_t agent; - uint32_t tid; - } kernel; - }; -}; - template struct push_element_fun { T* const elem_; T** prev_; diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 5dc147de1e..9e23becbe9 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -39,7 +39,7 @@ #include #include "src/core/loader.h" -#include "src/core/trace_buffer.h" +#include "test/tool/trace_buffer.h" #include "util/evt_stats.h" #include "util/hsa_rsrc_factory.h" #include "util/xml.h" @@ -357,14 +357,6 @@ void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) { fflush(hsa_api_file_handle); } -void hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) { - static uint64_t index = 0; - fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, - record->end_ns, index, my_pid); - fflush(hsa_async_copy_file_handle); - index++; -} - /////////////////////////////////////////////////////////////////////////////////////////////////////// // HIP API tracing @@ -648,7 +640,13 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) { } break; case ACTIVITY_DOMAIN_HSA_OPS: - if (record->op == HSA_OP_ID_RESERVED1) { + if (record->op == HSA_OP_ID_COPY) { + static uint64_t index = 0; + fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, + record->end_ns, index, my_pid); + fflush(hsa_async_copy_file_handle); + index++; + } else if (record->op == HSA_OP_ID_RESERVED1) { fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name); fflush(pc_sample_file_handle); @@ -1032,11 +1030,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); // initialize HSA tracing - roctracer::hsa_ops_properties_t ops_properties{ - table, reinterpret_cast(hsa_activity_callback), NULL, - output_prefix}; + roctracer::hsa_ops_properties_t ops_properties{}; + ops_properties.table = table; roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); + // Allocating tracing pool + open_tracing_pool(); + fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));