diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index 1166e7a889..33e16de7da 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -50,9 +50,7 @@ extern ImageExtTable ImageExtTable_saved; struct ops_properties_t { void* table; - activity_async_callback_t async_copy_callback_fun; - void* async_copy_callback_arg; - const char* output_prefix; + void* reserved1[3]; }; }; // namespace hsa_support diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 7e9161244f..708c90ca70 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -40,7 +40,6 @@ #include "core/journal.h" #include "core/loader.h" #include "core/memory_pool.h" -#include "core/trace_buffer.h" #include "core/tracker.h" #include "ext/hsa_rt_utils.hpp" #include "util/exception.h" @@ -229,19 +228,12 @@ class hip_act_cb_tracker_t { std::unordered_map data_; }; -void hsa_async_copy_handler(Tracker::entry_t* entry); -constexpr TraceBuffer::flush_prm_t trace_buffer_prm[] = { - {COPY_ENTRY_TYPE, hsa_async_copy_handler}}; -TraceBuffer* trace_buffer = NULL; - namespace hsa_support { // callbacks table cb_table_t cb_table; // async copy activity callback bool async_copy_callback_enabled = false; -activity_async_callback_t async_copy_callback_fun = NULL; -void* async_copy_callback_arg = NULL; -const char* output_prefix = NULL; +MemoryPool* async_copy_callback_memory_pool = nullptr; // Table of function pointers to HSA Core Runtime CoreApiTable CoreApiTable_saved{}; // Table of function pointers to AMD extensions @@ -546,15 +538,14 @@ void close_output_file(FILE* file_handle) { if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle); } -void hsa_async_copy_handler(Tracker::entry_t* entry) { +void hsa_async_copy_handler(const Tracker::entry_t* entry) { activity_record_t record{}; - record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id - record.begin_ns = entry->begin; // host begin timestamp - record.end_ns = entry->end; // host end timestamp - record.device_id = 0; // device id - - hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, - hsa_support::async_copy_callback_arg); + record.domain = ACTIVITY_DOMAIN_HSA_OPS; + record.op = HSA_OP_ID_COPY; + record.begin_ns = entry->begin; + record.end_ns = entry->end; + record.device_id = 0; + entry->pool->Write(record); } hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent, @@ -564,8 +555,10 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_ag hsa_signal_t completion_signal) { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { - trace_entry_t* entry = trace_buffer->GetEntry(); - Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = hsa_async_copy_handler; + entry->pool = hsa_support::async_copy_callback_memory_pool; + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -583,8 +576,10 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( hsa_signal_t completion_signal) { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { - trace_entry_t* entry = trace_buffer->GetEntry(); - Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); + Tracker::entry_t* entry = new Tracker::entry_t(); + entry->handler = hsa_async_copy_handler; + entry->pool = hsa_support::async_copy_callback_memory_pool; + Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry); @@ -625,7 +620,6 @@ unsigned set_stopped(unsigned val) { } // namespace roctracer LOADER_INSTANTIATE(); -TRACE_BUFFER_INSTANTIATE(); /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods @@ -968,6 +962,8 @@ static roctracer_status_t roctracer_enable_activity_fun(roctracer_domain_t domai case ACTIVITY_DOMAIN_HSA_OPS: { if (op == HSA_OP_ID_COPY) { roctracer::hsa_support::async_copy_callback_enabled = true; + roctracer::hsa_support::async_copy_callback_memory_pool = + reinterpret_cast(pool); } else { const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); if (roctracer::RocpLoader::GetRef() == NULL) break; @@ -1059,7 +1055,8 @@ static roctracer_status_t roctracer_disable_activity_fun(roctracer_domain_t doma switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { if (op == HSA_OP_ID_COPY) { - roctracer::hsa_support::async_copy_callback_enabled = true; + roctracer::hsa_support::async_copy_callback_enabled = false; + roctracer::hsa_support::async_copy_callback_memory_pool = nullptr; } else { if (roctracer::RocpLoader::GetRef() == NULL) break; const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); @@ -1145,7 +1142,6 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po if (pool == NULL) pool = roctracer_default_pool(); roctracer::MemoryPool* memory_pool = reinterpret_cast(pool); if (memory_pool != NULL) memory_pool->Flush(); - roctracer::TraceBufferBase::FlushAll(); API_METHOD_SUFFIX } @@ -1223,9 +1219,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast(properties); HsaApiTable* table = reinterpret_cast(ops_properties->table); - roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun; - roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg; - roctracer::hsa_support::output_prefix = ops_properties->output_prefix; #if 0 // HSA dispatches intercepting @@ -1314,15 +1307,12 @@ PUBLIC_API void roctracer_unload() { PUBLIC_API void roctracer_flush_buf() { ONLOAD_TRACE_BEG(); - roctracer::trace_buffer->Flush(); ONLOAD_TRACE_END(); } CONSTRUCTOR_API void constructor() { ONLOAD_TRACE_BEG(); roctracer::util::Logger::Create(); - roctracer::trace_buffer = new roctracer::TraceBuffer( - "HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2); roctracer_load(); ONLOAD_TRACE_END(); } diff --git a/src/core/tracker.h b/src/core/tracker.h index 76ca7d8f66..f86fb49b30 100644 --- a/src/core/tracker.h +++ b/src/core/tracker.h @@ -31,14 +31,45 @@ #include "util/hsa_rsrc_factory.h" #include "util/exception.h" #include "util/logger.h" -#include "core/trace_buffer.h" namespace roctracer { class Tracker { public: typedef ::util::HsaRsrcFactory::timestamp_t timestamp_t; - typedef roctracer::trace_entry_t entry_t; - typedef roctracer::entry_type_t entry_type_t; + + enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 }; + + enum entry_type_t { + DFLT_ENTRY_TYPE = 0, + API_ENTRY_TYPE = 1, + COPY_ENTRY_TYPE = 2, + KERNEL_ENTRY_TYPE = 3, + NUM_ENTRY_TYPE = 4 + }; + + struct entry_t { + std::atomic valid; + entry_type_t type; + uint64_t dispatch; + uint64_t begin; // kernel begin timestamp, ns + uint64_t end; // kernel end timestamp, ns + uint64_t complete; + hsa_agent_t agent; + uint32_t dev_index; + hsa_signal_t orig; + hsa_signal_t signal; + void (*handler)(const entry_t*); + MemoryPool* pool; + union { + struct { + } copy; + struct { + const char* name; + hsa_agent_t agent; + uint32_t tid; + } kernel; + }; + }; // Add tracker entry inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, @@ -52,7 +83,7 @@ class Tracker { entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; entry->orig = signal; entry->dispatch = hsa_rsrc->TimestampNs(); - entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release); + entry->valid.store(ENTRY_INIT, std::memory_order_release); // Creating a proxy signal status = hsa_signal_create(1, 0, NULL, &(entry->signal)); @@ -67,7 +98,7 @@ class Tracker { // Delete tracker entry inline static void Disable(entry_t* entry) { hsa_signal_destroy(entry->signal); - entry->valid.store(roctracer::TRACE_ENTRY_INV, std::memory_order_release); + entry->valid.store(ENTRY_INV, std::memory_order_release); } private: @@ -75,7 +106,7 @@ class Tracker { inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { // Query begin/end and complete timestamps ::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance()); - if (entry->type == roctracer::COPY_ENTRY_TYPE) { + if (entry->type == COPY_ENTRY_TYPE) { hsa_amd_profiling_async_copy_time_t async_copy_time{}; hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time); if (status != HSA_STATUS_SUCCESS) @@ -98,7 +129,10 @@ class Tracker { hsa_signal_t signal = entry->signal; // Releasing completed entry - entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); + entry->valid.store(ENTRY_COMPL, std::memory_order_release); + + assert(entry->handler != nullptr); + entry->handler(entry); // Original intercepted signal completion if (orig.handle) { @@ -112,14 +146,14 @@ class Tracker { hsa_signal_store_screlease(orig, signal_value); } hsa_signal_destroy(signal); + delete entry; } // Handler for packet completion static bool Handler(hsa_signal_value_t signal_value, void* arg) { // Acquire entry entry_t* entry = reinterpret_cast(arg); - while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) - sched_yield(); + while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield(); // Complete entry Tracker::Complete(signal_value, entry); diff --git a/src/core/trace_buffer.h b/test/tool/trace_buffer.h similarity index 95% rename from src/core/trace_buffer.h rename to test/tool/trace_buffer.h index d6061970ae..3000e188c3 100644 --- a/src/core/trace_buffer.h +++ b/test/tool/trace_buffer.h @@ -60,28 +60,6 @@ enum entry_type_t { NUM_ENTRY_TYPE = 4 }; -struct trace_entry_t { - std::atomic valid; - entry_type_t type; - uint64_t dispatch; - uint64_t begin; // kernel begin timestamp, ns - uint64_t end; // kernel end timestamp, ns - uint64_t complete; - hsa_agent_t agent; - uint32_t dev_index; - hsa_signal_t orig; - hsa_signal_t signal; - union { - struct { - } copy; - struct { - const char* name; - hsa_agent_t agent; - uint32_t tid; - } kernel; - }; -}; - template struct push_element_fun { T* const elem_; T** prev_; diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 5dc147de1e..9e23becbe9 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -39,7 +39,7 @@ #include #include "src/core/loader.h" -#include "src/core/trace_buffer.h" +#include "test/tool/trace_buffer.h" #include "util/evt_stats.h" #include "util/hsa_rsrc_factory.h" #include "util/xml.h" @@ -357,14 +357,6 @@ void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) { fflush(hsa_api_file_handle); } -void hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) { - static uint64_t index = 0; - fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, - record->end_ns, index, my_pid); - fflush(hsa_async_copy_file_handle); - index++; -} - /////////////////////////////////////////////////////////////////////////////////////////////////////// // HIP API tracing @@ -648,7 +640,13 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) { } break; case ACTIVITY_DOMAIN_HSA_OPS: - if (record->op == HSA_OP_ID_RESERVED1) { + if (record->op == HSA_OP_ID_COPY) { + static uint64_t index = 0; + fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, + record->end_ns, index, my_pid); + fflush(hsa_async_copy_file_handle); + index++; + } else if (record->op == HSA_OP_ID_RESERVED1) { fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name); fflush(pc_sample_file_handle); @@ -1032,11 +1030,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); // initialize HSA tracing - roctracer::hsa_ops_properties_t ops_properties{ - table, reinterpret_cast(hsa_activity_callback), NULL, - output_prefix}; + roctracer::hsa_ops_properties_t ops_properties{}; + ops_properties.table = table; roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); + // Allocating tracing pool + open_tracing_pool(); + fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));