diff --git a/projects/roctracer/inc/roctracer.h b/projects/roctracer/inc/roctracer.h index f39118abdf..05d3b84295 100644 --- a/projects/roctracer/inc/roctracer.h +++ b/projects/roctracer/inc/roctracer.h @@ -90,11 +90,6 @@ roctracer_status_t roctracer_op_code( uint32_t* op, // [out] op code uint32_t* kind = NULL); // [out] op kind code -// Set properties -roctracer_status_t roctracer_set_properties( - roctracer_domain_t domain, // tracing domain - void* propertes); // tracing properties - //////////////////////////////////////////////////////////////////////////////// // Callback API // @@ -214,6 +209,21 @@ roctracer_status_t roctracer_disable_activity(); roctracer_status_t roctracer_flush_activity( roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one +// Load/Un;oad methods +// Set properties +roctracer_status_t roctracer_set_properties( + roctracer_domain_t domain, // tracing domain + void* propertes); // tracing properties + +struct HsaApiTable; +bool roctracer_load( + HsaApiTable* table, + uint64_t runtime_version, + uint64_t failed_tool_count, + const char* const* failed_tool_names); + +void roctracer_unload(); + #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/projects/roctracer/inc/roctracer_hsa.h b/projects/roctracer/inc/roctracer_hsa.h index de4f1f9d1c..924d6a6238 100644 --- a/projects/roctracer/inc/roctracer_hsa.h +++ b/projects/roctracer/inc/roctracer_hsa.h @@ -74,8 +74,10 @@ extern AmdExtTable AmdExtTable_saved; extern ImageExtTable ImageExtTable_saved; struct ops_properties_t { + void* table; activity_async_callback_t async_copy_callback_fun; void* async_copy_callback_arg; + const char* output_prefix; }; }; // namespace hsa_support diff --git a/projects/roctracer/src/CMakeLists.txt b/projects/roctracer/src/CMakeLists.txt index 2ac70bb6a8..46766ad815 100644 --- a/projects/roctracer/src/CMakeLists.txt +++ b/projects/roctracer/src/CMakeLists.txt @@ -4,6 +4,9 @@ set ( TARGET_LIB "${TARGET_NAME}" ) set ( LIB_SRC ${LIB_DIR}/core/roctracer.cpp + ${LIB_DIR}/proxy/proxy_queue.cpp + ${LIB_DIR}/proxy/simple_proxy_queue.cpp + ${LIB_DIR}/proxy/intercept_queue.cpp ${LIB_DIR}/util/hsa_rsrc_factory.cpp ) add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} ) diff --git a/projects/roctracer/src/core/roctracer.cpp b/projects/roctracer/src/core/roctracer.cpp index 0286c10e05..f4d078760d 100644 --- a/projects/roctracer/src/core/roctracer.cpp +++ b/projects/roctracer/src/core/roctracer.cpp @@ -35,12 +35,18 @@ THE SOFTWARE. #include #include "core/loader.h" +#include "core/trace_buffer.h" #include "proxy/tracker.h" #include "ext/hsa_rt_utils.hpp" #include "util/exception.h" #include "util/hsa_rsrc_factory.h" #include "util/logger.h" +#include "proxy/hsa_queue.h" +#include "proxy/intercept_queue.h" +#include "proxy/proxy_queue.h" +#include "proxy/simple_proxy_queue.h" + #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) @@ -85,17 +91,86 @@ THE SOFTWARE. /////////////////////////////////////////////////////////////////////////////////////////////////// // Internal library methods // +namespace rocprofiler { +decltype(hsa_queue_create)* hsa_queue_create_fn; +decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; + +decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn; +decltype(hsa_signal_store_relaxed)* hsa_signal_store_screlease_fn; + +decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn; +decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn; +decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn; + +decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn; +decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn; +decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn; + +decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn; +decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn; + +decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn; +decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; + +::HsaApiTable* kHsaApiTable; + +void SaveHsaApi(::HsaApiTable* table) { + util::HsaRsrcFactory::InitHsaApiTable(table); + + kHsaApiTable = table; + hsa_queue_create_fn = table->core_->hsa_queue_create_fn; + hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn; + + hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn; + hsa_signal_store_screlease_fn = table->core_->hsa_signal_store_screlease_fn; + + hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn; + hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn; + hsa_queue_load_read_index_relaxed_fn = table->core_->hsa_queue_load_read_index_relaxed_fn; + + hsa_queue_load_write_index_scacquire_fn = table->core_->hsa_queue_load_write_index_scacquire_fn; + hsa_queue_store_write_index_screlease_fn = table->core_->hsa_queue_store_write_index_screlease_fn; + hsa_queue_load_read_index_scacquire_fn = table->core_->hsa_queue_load_read_index_scacquire_fn; + + hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn; + hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn; +} + +void RestoreHsaApi() { + ::HsaApiTable* table = kHsaApiTable; + table->core_->hsa_queue_create_fn = hsa_queue_create_fn; + table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn; + + table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn; + table->core_->hsa_signal_store_screlease_fn = hsa_signal_store_screlease_fn; + + table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn; + table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn; + table->core_->hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed_fn; + + table->core_->hsa_queue_load_write_index_scacquire_fn = hsa_queue_load_write_index_scacquire_fn; + table->core_->hsa_queue_store_write_index_screlease_fn = hsa_queue_store_write_index_screlease_fn; + table->core_->hsa_queue_load_read_index_scacquire_fn = hsa_queue_load_read_index_scacquire_fn; + + table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn; + table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn; +} +} + namespace roctracer { decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn; decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; +TraceBuffer trace_buffer(0x200000); + namespace hsa_support { // callbacks table cb_table_t cb_table; // asyc copy activity callback +bool async_copy_callback_enabled = false; activity_async_callback_t async_copy_callback_fun = NULL; void* async_copy_callback_arg = NULL; -bool async_copy_callback_enabled = false; +const char* output_prefix = NULL; // Table of function pointers to HSA Core Runtime CoreApiTable CoreApiTable_saved{}; // Table of function pointers to AMD extensions @@ -288,15 +363,6 @@ class MemoryPool { pthread_cond_t read_cond_; }; -CONSTRUCTOR_API void constructor() { - util::Logger::Create(); -} - -DESTRUCTOR_API void destructor() { - ::util::HsaRsrcFactory::Destroy(); - util::Logger::Destroy(); -} - // Correlation id storage static thread_local activity_correlation_id_t correlation_id_tls = 0; typedef std::map correlation_id_map_t; @@ -367,18 +433,73 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { pool->Write(*record_ptr); } -bool hsa_async_copy_handler(hsa_signal_value_t value, void* arg) { - ::proxy::Tracker::entry_t* entry = reinterpret_cast<::proxy::Tracker::entry_t*>(arg); +// Open output file +FILE* open_output_file(const char* prefix, const char* name) { + FILE* file_handle = NULL; + if (prefix != NULL) { + std::ostringstream oss; + oss << prefix << "/" << name; + file_handle = fopen(oss.str().c_str(), "w"); + if (file_handle == NULL) { + std::ostringstream errmsg; + errmsg << "ROCTracer: fopen error, file '" << oss.str().c_str() << "'"; + perror(errmsg.str().c_str()); + abort(); + } + } else file_handle = stdout; + return file_handle; +} +FILE* kernel_file_handle = NULL; +void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) { + static uint64_t index = 0; + if (index == 0) { + kernel_file_handle = open_output_file(hsa_support::output_prefix, "results.txt"); + } + fprintf(kernel_file_handle, "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n", + index, + ::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, + entry->kernel.tid, + entry->kernel.name, + entry->dispatch, + entry->begin, + entry->end, + entry->complete); +#if 0 + fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), tid(%lu), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), kernel-name(\"%s\")", + index, + HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, + entry->data.queue_id, + entry->data.queue_index, + entry->data.thread_id, + entry->kernel_properties.grid_size, + entry->kernel_properties.workgroup_size, + entry->kernel_properties.lds_size, + entry->kernel_properties.scratch_size, + entry->kernel_properties.vgpr_count, + entry->kernel_properties.sgpr_count, + entry->kernel_properties.fbarrier_count, + entry->kernel_properties.signal.handle, + nik_name.c_str()); + if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)", + record->dispatch, + record->begin, + record->end, + record->complete); + fprintf(file_handle, "\n"); + fflush(file_handle); +#endif + index++; +} + +void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry) { activity_record_t record{}; record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id - record.correlation_id = entry->index; // activity ID - record.begin_ns = entry->record->begin; // host begin timestamp - record.end_ns = entry->record->end; // host end timestamp + record.begin_ns = entry->begin; // host begin timestamp + record.end_ns = entry->end; // host end timestamp record.device_id = 0; // device id hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, hsa_support::async_copy_callback_arg); - return false; } hsa_status_t hsa_amd_memory_async_copy_interceptor( @@ -388,17 +509,15 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor( { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { - ::proxy::Tracker* tracker = &::proxy::Tracker::Instance(); - ::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal); + trace_entry_t* entry = trace_buffer.GetEntry(); + ::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, - dep_signals, tracker_entry->signal); - if (status == HSA_STATUS_SUCCESS) { - tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast(tracker_entry)); - } else { - tracker->Delete(tracker_entry); - } - } else { + dep_signals, entry->signal); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_memory_async_copy interceptor"); + } + else + { status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal); @@ -414,18 +533,16 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { - ::proxy::Tracker* tracker = &::proxy::Tracker::Instance(); - ::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal); + trace_entry_t* entry = trace_buffer.GetEntry(); + ::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, - tracker_entry->signal); - if (status == HSA_STATUS_SUCCESS) { - tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast(tracker_entry)); - } else { - tracker->Delete(tracker_entry); - } - } else { + entry->signal); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_memory_async_copy interceptor"); + } + else + { status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals, @@ -443,10 +560,6 @@ memory_pool_mutex_t memory_pool_mutex; LOADER_INSTANTIATE(); -std::atomic proxy::Tracker::instance_{}; -proxy::Tracker::mutex_t proxy::Tracker::glob_mutex_; -proxy::Tracker::counter_t proxy::Tracker::counter_ = 0; - /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods // @@ -685,6 +798,7 @@ static void roctracer_enable_activity_impl( switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { roctracer::hsa_support::async_copy_callback_enabled = true; + rocprofiler::InterceptQueue::Enable(true); break; } case ACTIVITY_DOMAIN_HSA_API: break; @@ -747,6 +861,7 @@ static void roctracer_disable_activity_impl( switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { roctracer::hsa_support::async_copy_callback_enabled = false; + rocprofiler::InterceptQueue::Enable(false); break; } case ACTIVITY_DOMAIN_HSA_API: break; @@ -812,8 +927,25 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( case ACTIVITY_DOMAIN_HSA_OPS: { // HSA OPS properties roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast(properties); + HsaApiTable* table = reinterpret_cast(ops_properties->table); roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun; roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg; + roctracer::hsa_support::output_prefix = ops_properties->output_prefix; + + // HSA dispatches intercepting + rocprofiler::SaveHsaApi(table); + rocprofiler::ProxyQueue::InitFactory(); + rocprofiler::ProxyQueue::HsaIntercept(table); + rocprofiler::InterceptQueue::HsaIntercept(table); + + // HSA async-copy tracing + hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable"); + roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn; + roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; + table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor; + table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor; + break; } case ACTIVITY_DOMAIN_HSA_API: { @@ -834,17 +966,39 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( } // HSA-runtime tool on-load method -PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, +PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, const char* const* failed_tool_names) { - // enabled HSA async-copy tracing - hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); - if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable"); - roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn; - roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; - table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor; - table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor; + static bool is_loaded = false; + if (is_loaded) return true; + is_loaded = true; return true; } +PUBLIC_API void roctracer_unload() { + static bool is_unloaded = false; + if (is_unloaded) return; + is_unloaded = true; + + roctracer::trace_buffer.Flush(roctracer::COPY_ENTRY_TYPE, roctracer::hsa_async_copy_handler); + roctracer::trace_buffer.Flush(roctracer::KERNEL_ENTRY_TYPE, roctracer::hsa_kernel_handler); + if ((roctracer::hsa_support::output_prefix != NULL) && (roctracer::kernel_file_handle != NULL)) fclose(roctracer::kernel_file_handle); +} + +PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) { + return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); +} +PUBLIC_API void OnUnload() { roctracer_unload(); } + +CONSTRUCTOR_API void constructor() { + roctracer::util::Logger::Create(); +} + +DESTRUCTOR_API void destructor() { + roctracer_unload(); + util::HsaRsrcFactory::Destroy(); + roctracer::util::Logger::Destroy(); +} + } // extern "C" diff --git a/projects/roctracer/src/core/trace_buffer.h b/projects/roctracer/src/core/trace_buffer.h new file mode 100644 index 0000000000..30668e34d7 --- /dev/null +++ b/projects/roctracer/src/core/trace_buffer.h @@ -0,0 +1,79 @@ +#ifndef SRC_CORE_TRACE_BUFFER_H_ +#define SRC_CORE_TRACE_BUFFER_H_ + +namespace roctracer { +enum { + TRACE_ENTRY_INV = 0, + TRACE_ENTRY_INIT = 1, + TRACE_ENTRY_COMPL = 2 +}; +enum { + API_ENTRY_TYPE, + COPY_ENTRY_TYPE, + KERNEL_ENTRY_TYPE +}; + +struct trace_entry_t { + std::atomic valid; + uint32_t type; + uint64_t dispatch; + uint64_t begin; // kernel begin timestamp, ns + uint64_t end; // kernel end timestamp, ns + uint64_t complete; + hsa_agent_t agent; + hsa_signal_t orig; + hsa_signal_t signal; + union { + struct { + } copy; + struct { + const char* name; + hsa_agent_t agent; + uint32_t tid; + } kernel; + }; +}; + +template +class TraceBuffer { + public: + typedef void (*callabck_t)(Entry*); + + TraceBuffer(uint32_t size) { + size_ = size; + data_ = new Entry[size_];; + memset(data_, 0, size_ * sizeof(Entry)); + read_pointer_ = data_; + } + + Entry* GetEntry() { + Entry* ptr = read_pointer_.fetch_add(1); + if (ptr >= (data_ + size_)) { + fprintf(stderr, "GetEntry: trace buffer is out of range\n"); + abort(); + } + return ptr; + } + + void Flush(uint32_t type, callabck_t fun) { + Entry* ptr = data_; + for (; (ptr < read_pointer_) && (ptr < (data_ + size_)); ptr++) { + if (ptr->type == type) { + if (ptr->valid == TRACE_ENTRY_COMPL) { + fun(ptr); + } + } + } + if (ptr >= (data_ + size_)) { + fprintf(stderr, "Flush: trace buffer is out of range\n"); + } + } + + private: + Entry* data_; + uint32_t size_; + std::atomic read_pointer_; +}; +} // namespace roctracer + +#endif // SRC_CORE_TRACE_BUFFER_H_ diff --git a/projects/roctracer/src/proxy/hsa_proxy_queue.h b/projects/roctracer/src/proxy/hsa_proxy_queue.h new file mode 100644 index 0000000000..91daf96815 --- /dev/null +++ b/projects/roctracer/src/proxy/hsa_proxy_queue.h @@ -0,0 +1,67 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef _SRC_CORE_HSA_PROXY_QUEUE_H +#define _SRC_CORE_HSA_PROXY_QUEUE_H + +#include +#include +#include +#include + +#include "proxy/proxy_queue.h" +#include "util/exception.h" + +namespace rocprofiler { +extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; +extern decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn; +extern decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn; + +class HsaProxyQueue : public ProxyQueue { + public: + hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) { + return hsa_amd_queue_intercept_register_fn(queue_, on_submit_cb, data); + } + + void Submit(const packet_t* packet) { + EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported"); + } + + private: + hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) { + const auto status = hsa_amd_queue_intercept_create_fn( + agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_); + *queue = queue_; + return status; + } + + hsa_status_t Cleanup() const { return hsa_queue_destroy_fn(queue_); } + + hsa_queue_t* queue_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_HSA_PROXY_QUEUE_H diff --git a/projects/roctracer/src/proxy/hsa_queue.h b/projects/roctracer/src/proxy/hsa_queue.h new file mode 100644 index 0000000000..48e3039a60 --- /dev/null +++ b/projects/roctracer/src/proxy/hsa_queue.h @@ -0,0 +1,46 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef _SRC_CORE_HSA_QUEUE_H +#define _SRC_CORE_HSA_QUEUE_H + +#include + +#include "proxy/queue.h" + +namespace rocprofiler { + +class HsaQueue : public Queue { + public: + HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {} + + void Submit(const packet_t* packet) { + util::HsaRsrcFactory::Instance().Submit(queue_, packet); + } + + private: + hsa_queue_t* queue_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_HSA_QUEUE_H diff --git a/projects/roctracer/src/proxy/intercept_queue.cpp b/projects/roctracer/src/proxy/intercept_queue.cpp new file mode 100644 index 0000000000..301f4eea68 --- /dev/null +++ b/projects/roctracer/src/proxy/intercept_queue.cpp @@ -0,0 +1,42 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "proxy/intercept_queue.h" + +namespace rocprofiler { +void InterceptQueue::HsaIntercept(HsaApiTable* table) { + table->core_->hsa_queue_create_fn = rocprofiler::InterceptQueue::QueueCreate; + table->core_->hsa_queue_destroy_fn = rocprofiler::InterceptQueue::QueueDestroy; +} + +InterceptQueue::mutex_t InterceptQueue::mutex_; +//rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL; +//InterceptQueue::queue_callback_t InterceptQueue::create_callback_ = NULL; +//InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL; +//void* InterceptQueue::callback_data_ = NULL; +InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL; +const char* InterceptQueue::kernel_none_ = ""; +bool InterceptQueue::in_create_call_ = false; +InterceptQueue::queue_id_t InterceptQueue::current_queue_id = 0; +bool InterceptQueue::is_enabled = false; + +} // namespace rocprofiler diff --git a/projects/roctracer/src/proxy/intercept_queue.h b/projects/roctracer/src/proxy/intercept_queue.h new file mode 100644 index 0000000000..f92f1ce6bc --- /dev/null +++ b/projects/roctracer/src/proxy/intercept_queue.h @@ -0,0 +1,301 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef _SRC_CORE_INTERCEPT_QUEUE_H +#define _SRC_CORE_INTERCEPT_QUEUE_H + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "core/trace_buffer.h" +#include "proxy/tracker.h" +#include "proxy/proxy_queue.h" +#include "util/hsa_rsrc_factory.h" +#include "util/exception.h" + +namespace roctracer { extern TraceBuffer trace_buffer; } + +namespace rocprofiler { +extern decltype(hsa_queue_create)* hsa_queue_create_fn; +extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; + +class InterceptQueue { + public: + typedef std::recursive_mutex mutex_t; + typedef std::map obj_map_t; + typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data); + typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg); + typedef uint32_t queue_id_t; + + static void HsaIntercept(HsaApiTable* table); + + static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue, + const bool& tracker_on) { + std::lock_guard lck(mutex_); + hsa_status_t status = HSA_STATUS_ERROR; + + if (in_create_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()"); + in_create_call_ = true; + + ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, queue_event_callback, data, private_segment_size, + group_segment_size, queue, &status); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()"); + + status = util::HsaRsrcFactory::HsaApi()->hsa_amd_profiling_set_profiler_enabled(*queue, true); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_set_profiler_enabled()"); + + if (!obj_map_) obj_map_ = new obj_map_t; + InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy); + obj->queue_event_callback_ = callback; + obj->queue_id = current_queue_id; + (*obj_map_)[(uint64_t)(*queue)] = obj; + + status = (is_enabled) ? proxy->SetInterceptCB(OnSubmitCB, obj) : proxy->SetInterceptCB(OnSubmitCB_dummy, obj); + +#if 0 + if (create_callback_ != NULL) { + status = create_callback_(*queue, callback_data_); + } +#endif + + ++current_queue_id; + in_create_call_ = false; + return status; + } + + static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue) { + return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false); + } + + static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue) { + return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true); + } + + static hsa_status_t QueueDestroy(hsa_queue_t* queue) { + std::lock_guard lck(mutex_); + hsa_status_t status = HSA_STATUS_SUCCESS; +#if 0 + if (destroy_callback_ != NULL) { + status = destroy_callback_(queue, callback_data_); + } +#endif + if (status == HSA_STATUS_SUCCESS) { + status = DelObj(queue); + } + + return status; + } + + static void OnSubmitCB_dummy(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, + hsa_amd_queue_intercept_packet_writer writer) { + const packet_t* packets_arr = reinterpret_cast(in_packets); + + // Submitting the original packets if profiling was not enabled + if (writer != NULL) { + writer(packets_arr, count); + } else { + InterceptQueue* obj = reinterpret_cast(data); + Queue* proxy = obj->proxy_; + proxy->Submit(packets_arr, count); + } + } + + static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, + hsa_amd_queue_intercept_packet_writer writer) { + const packet_t* packets_arr = reinterpret_cast(in_packets); + InterceptQueue* obj = reinterpret_cast(data); + Queue* proxy = obj->proxy_; + + // Travers input packets + for (uint64_t j = 0; j < count; ++j) { + const packet_t* packet = &packets_arr[j]; + + // Checking for dispatch packet type + if (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) { + const hsa_kernel_dispatch_packet_t* dispatch_packet = + reinterpret_cast(packet); + + // Prepareing dispatch callback data + const hsa_signal_t completion_signal = dispatch_packet->completion_signal; + const amd_kernel_code_t* kernel_code = GetKernelCode(dispatch_packet); + const uint64_t kernel_symbol = kernel_code->runtime_loader_kernel_symbol; + const char* kernel_name = GetKernelName(kernel_symbol); + + // Adding kernel timing tracker + ::proxy::Tracker::entry_t* entry = roctracer::trace_buffer.GetEntry(); + entry->kernel.tid = syscall(__NR_gettid); + entry->kernel.name = kernel_name; + ::proxy::Tracker::Enable(roctracer::KERNEL_ENTRY_TYPE, obj->agent_info_->dev_id, completion_signal, entry); + const_cast(dispatch_packet)->completion_signal = entry->signal; + } + } + + // Submitting the original packets if profiling was not enabled + if (writer != NULL) { + writer(packets_arr, count); + } else { + proxy->Submit(packets_arr, count); + } + } +#if 0 + static void SetCallbacks(rocprofiler_callback_t dispatch_callback, + queue_callback_t create_callback, + queue_callback_t destroy_callback, + void* data) + { + std::lock_guard lck(mutex_); + callback_data_ = data; + dispatch_callback_ = dispatch_callback; + create_callback_ = create_callback; + destroy_callback_ = destroy_callback; + } +#endif + + static void Enable(bool val) { is_enabled = val; } + + private: + static void queue_event_callback(hsa_status_t status, hsa_queue_t *queue, void *arg) { + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "queue error handling is not supported"); + InterceptQueue* obj = GetObj(queue); + if (obj->queue_event_callback_) obj->queue_event_callback_(status, obj->queue_, arg); + } + + static hsa_packet_type_t GetHeaderType(const packet_t* packet) { + const packet_word_t* header = reinterpret_cast(packet); + return static_cast((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask); + } + + static const amd_kernel_code_t* GetKernelCode(const hsa_kernel_dispatch_packet_t* dispatch_packet) { + const amd_kernel_code_t* kernel_code = NULL; + hsa_status_t status = + util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address( + reinterpret_cast(dispatch_packet->kernel_object), + reinterpret_cast(&kernel_code)); + if (HSA_STATUS_SUCCESS != status) { + kernel_code = reinterpret_cast(dispatch_packet->kernel_object); + } + return kernel_code; + } + + static const char* GetKernelName(const uint64_t kernel_symbol) { + amd_runtime_loader_debug_info_t* dbg_info = + reinterpret_cast(kernel_symbol); + const char* kernel_name = (dbg_info != NULL) ? dbg_info->kernel_name : NULL; + return (kernel_name != NULL) ? strdup(kernel_name) : strdup(kernel_none_); +#if 0 + // Kernel name is mangled name + // apply __cxa_demangle() to demangle it + const char* funcname = NULL; + if (kernel_name != NULL) { + size_t funcnamesize = 0; + int status; + const char* ret = abi::__cxa_demangle(kernel_name, NULL, &funcnamesize, &status); + funcname = (ret != 0) ? ret : strdup(kernel_name); + } + if (funcname == NULL) funcname = strdup(kernel_none_); + + return funcname; +#endif + } + + // method to get an intercept queue object + static InterceptQueue* GetObj(const hsa_queue_t* queue) { + std::lock_guard lck(mutex_); + InterceptQueue* obj = NULL; + obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue); + if (it != obj_map_->end()) { + obj = it->second; + assert(queue == obj->queue_); + } + return obj; + } + + // method to delete an intercept queue object + static hsa_status_t DelObj(const hsa_queue_t* queue) { + std::lock_guard lck(mutex_); + hsa_status_t status = HSA_STATUS_ERROR; + obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue); + if (it != obj_map_->end()) { + const InterceptQueue* obj = it->second; + assert(queue == obj->queue_); + delete obj; + obj_map_->erase(it); + status = HSA_STATUS_SUCCESS;; + } + return status; + } + + InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : + queue_(queue), + proxy_(proxy) + { + agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); + queue_event_callback_ = NULL; + } + + ~InterceptQueue() { + ProxyQueue::Destroy(proxy_); + } + + static bool is_enabled; + + static mutex_t mutex_; + static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1; +#if 0 + static queue_callback_t create_callback_; + static queue_callback_t destroy_callback_; + static void* callback_data_; +#endif + static obj_map_t* obj_map_; + static const char* kernel_none_; + static bool in_create_call_; + static queue_id_t current_queue_id; + + hsa_queue_t* const queue_; + ProxyQueue* const proxy_; + const util::AgentInfo* agent_info_; + queue_event_callback_t queue_event_callback_; + queue_id_t queue_id; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_INTERCEPT_QUEUE_H diff --git a/projects/roctracer/src/proxy/proxy_queue.cpp b/projects/roctracer/src/proxy/proxy_queue.cpp new file mode 100644 index 0000000000..e5cca0da2c --- /dev/null +++ b/projects/roctracer/src/proxy/proxy_queue.cpp @@ -0,0 +1,63 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "proxy/proxy_queue.h" + +#include "proxy/hsa_proxy_queue.h" +#include "proxy/simple_proxy_queue.h" + +namespace rocprofiler { +void ProxyQueue::HsaIntercept(HsaApiTable* table) { + if (rocp_type_) SimpleProxyQueue::HsaIntercept(table); +} + +ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue, + hsa_status_t* status) { + hsa_status_t suc = HSA_STATUS_ERROR; + ProxyQueue* instance = + (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue(); + if (instance != NULL) { + suc = instance->Init(agent, size, type, callback, data, private_segment_size, + group_segment_size, queue); + if (suc != HSA_STATUS_SUCCESS) { + delete instance; + instance = NULL; + } + } + *status = suc; + assert(*status == HSA_STATUS_SUCCESS); + return instance; +} + +hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) { + assert(obj != NULL); + auto suc = obj->Cleanup(); + delete obj; + return suc; +} + +bool ProxyQueue::rocp_type_ = false; +} // namespace rocprofiler diff --git a/projects/roctracer/src/proxy/proxy_queue.h b/projects/roctracer/src/proxy/proxy_queue.h new file mode 100644 index 0000000000..f876ab6c72 --- /dev/null +++ b/projects/roctracer/src/proxy/proxy_queue.h @@ -0,0 +1,77 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef _SRC_CORE_PROXY_QUEUE_H +#define _SRC_CORE_PROXY_QUEUE_H + +#include +#include +#include +#include +#include + +#include "proxy/queue.h" +#include "proxy/types.h" + +struct HsaApiTable; + +namespace rocprofiler { +typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* packets, uint64_t count); +typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data, + hsa_amd_queue_intercept_packet_writer writer); + +class ProxyQueue : public Queue { + public: + static void InitFactory() { + const char* type = getenv("ROCP_PROXY_QUEUE"); + if (type != NULL) { + if (strncmp(type, "rocp", 4) == 0) rocp_type_ = true; + } + } + + static void HsaIntercept(HsaApiTable* table); + + static ProxyQueue* Create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue, hsa_status_t* status); + + static hsa_status_t Destroy(const ProxyQueue* obj); + + virtual hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) = 0; + virtual hsa_status_t Cleanup() const = 0; + virtual hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) = 0; + virtual void Submit(const packet_t* packet) = 0; + + protected: + virtual ~ProxyQueue(){}; + + private: + static bool rocp_type_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_PROXY_QUEUE_H diff --git a/projects/roctracer/src/proxy/queue.h b/projects/roctracer/src/proxy/queue.h new file mode 100644 index 0000000000..78214af504 --- /dev/null +++ b/projects/roctracer/src/proxy/queue.h @@ -0,0 +1,42 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef _SRC_CORE_QUEUE_H +#define _SRC_CORE_QUEUE_H + +#include "proxy/types.h" + +namespace rocprofiler { + +class Queue { + public: + Queue() {} + virtual ~Queue() {} + virtual void Submit(const packet_t* packet) = 0; + virtual void Submit(const packet_t* packet, const size_t& count) { + for (const packet_t* p = packet; p < packet + count; ++p) Submit(p); + } +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_QUEUE_H diff --git a/projects/roctracer/src/proxy/simple_proxy_queue.cpp b/projects/roctracer/src/proxy/simple_proxy_queue.cpp new file mode 100644 index 0000000000..91b2d109eb --- /dev/null +++ b/projects/roctracer/src/proxy/simple_proxy_queue.cpp @@ -0,0 +1,40 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "proxy/simple_proxy_queue.h" + +namespace rocprofiler { +void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) { + table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore; + table->core_->hsa_signal_store_screlease_fn = rocprofiler::SimpleProxyQueue::SignalStore; + + table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex; + table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex; + table->core_->hsa_queue_load_read_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex; + + table->core_->hsa_queue_load_write_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex; + table->core_->hsa_queue_store_write_index_screlease_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex; + table->core_->hsa_queue_load_read_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex; +} + +SimpleProxyQueue::queue_map_t* SimpleProxyQueue::queue_map_ = NULL; +} // namespace rocprofiler diff --git a/projects/roctracer/src/proxy/simple_proxy_queue.h b/projects/roctracer/src/proxy/simple_proxy_queue.h new file mode 100644 index 0000000000..8215f44c1e --- /dev/null +++ b/projects/roctracer/src/proxy/simple_proxy_queue.h @@ -0,0 +1,261 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef _SRC_CORE_SIMPLE_PROXY_QUEUE_H +#define _SRC_CORE_SIMPLE_PROXY_QUEUE_H + +#include +#include +#include +#include + +#include "proxy/proxy_queue.h" +#include "util/hsa_rsrc_factory.h" + +#ifndef ROCP_PROXY_LOCK +# define ROCP_PROXY_LOCK 1 +#endif + +namespace rocprofiler { +extern decltype(hsa_queue_create)* hsa_queue_create_fn; +extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn; + +extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn; +extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_screlease_fn; + +extern decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn; +extern decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn; +extern decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn; + +extern decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn; +extern decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn; +extern decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn; + +typedef decltype(hsa_signal_t::handle) signal_handle_t; + + +class SimpleProxyQueue : public ProxyQueue { + public: + static void HsaIntercept(HsaApiTable* table); + + static void SignalStore(hsa_signal_t signal, hsa_signal_value_t que_idx) { + auto it = queue_map_->find(signal.handle); + if (it != queue_map_->end()) { + SimpleProxyQueue* instance = it->second; + instance->mutex_lock(); + const uint64_t begin = instance->submit_index_; + const uint64_t end = que_idx + 1; + instance->submit_index_ = end; + instance->mutex_unlock(); + for (uint64_t j = begin; j < end; ++j) { + // Submited packet + const uint32_t idx = j & instance->queue_mask_; + packet_t* packet = reinterpret_cast(instance->queue_->base_address) + idx; + if (instance->on_submit_cb_ != NULL) + instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL); + else + instance->Submit(packet); + } + } else { + hsa_signal_store_relaxed_fn(signal, que_idx); + } + } + + static uint64_t GetSubmitIndex(const hsa_queue_t* queue) { + uint64_t index = 0; + auto it = queue_map_->find(queue->doorbell_signal.handle); + if (it != queue_map_->end()) { + SimpleProxyQueue* instance = it->second; + index = instance->submit_index_; + } else { + index = hsa_queue_load_read_index_relaxed_fn(queue); + } + return index; + } + + static uint64_t GetQueueIndex(const hsa_queue_t* queue) { + uint64_t index = 0; + auto it = queue_map_->find(queue->doorbell_signal.handle); + if (it != queue_map_->end()) { + SimpleProxyQueue* instance = it->second; + instance->mutex_lock(); + index = instance->queue_index_; + } else { + index = hsa_queue_load_write_index_relaxed_fn(queue); + } + return index; + } + + static void SetQueueIndex(const hsa_queue_t* queue, uint64_t value) { + auto it = queue_map_->find(queue->doorbell_signal.handle); + if (it != queue_map_->end()) { + SimpleProxyQueue* instance = it->second; + instance->queue_index_ = value; + instance->mutex_unlock(); + } else { + hsa_queue_store_write_index_relaxed_fn(queue, value); + } + } + + hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) { + on_submit_cb_ = on_submit_cb; + on_submit_cb_data_ = data; + return HSA_STATUS_SUCCESS; + } + + void Submit(const packet_t* packet) { + // Compute the write index of queue + const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_); + + // Waiting untill there is a free space in the queue + while (que_idx >= (hsa_queue_load_read_index_relaxed_fn(queue_) + size_)); + + // Increment the write index + hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1); + + const uint32_t mask = queue_->size - 1; + const uint32_t idx = que_idx & mask; + + // Copy packet to the queue + const packet_word_t* src = reinterpret_cast(packet); + packet_word_t* dst = reinterpret_cast(base_address_ + idx); + for (unsigned i = 1; i < sizeof(packet_t) / sizeof(packet_word_t); ++i) { + dst[i] = src[i]; + } + + // To maintain global order to ensure the prior copy of the packet contents is made visible + // before the header is updated. + // With in-order CP it will wait until the first packet in the blob will be valid. + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&dst[0]); + header_atomic_ptr->store(src[0], std::memory_order_release); + + // Doorbell signaling to submit the packet + hsa_signal_store_relaxed_fn(doorbell_signal_, que_idx); + } + + SimpleProxyQueue() + : agent_info_(NULL), + queue_(NULL), + base_address_(NULL), + doorbell_signal_({}), + queue_index_(0), + queue_mask_(0), + submit_index_(0), + on_submit_cb_(NULL), + on_submit_cb_data_(NULL) + { + printf("ROCProfiler: SimpleProxyQueue is enabled\n"); + fflush(stdout); + } + + ~SimpleProxyQueue() {} + + private: + typedef std::map queue_map_t; + + hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) { + size_ = size; + auto status = Init(agent, size); + *queue = queue_; + return status; + } + + hsa_status_t Init(hsa_agent_t agent, uint32_t size) { + hsa_status_t status = HSA_STATUS_ERROR; + agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); + if (agent_info_ != NULL) { + if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) { + status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, + UINT32_MAX, &queue_); + if (status == HSA_STATUS_SUCCESS) { + base_address_ = reinterpret_cast(queue_->base_address); + doorbell_signal_ = queue_->doorbell_signal; + data_array_ = calloc(size + 1, sizeof(packet_t)); + uintptr_t addr = (uintptr_t)data_array_; + queue_->base_address = (void*)((addr + align_mask_) & ~align_mask_); + status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal)); + if (status != HSA_STATUS_SUCCESS) abort(); + queue_mask_ = size - 1; + + if (queue_map_ == NULL) queue_map_ = new queue_map_t; + (*queue_map_)[queue_->doorbell_signal.handle] = this; + } + else abort(); + } + } + if (status != HSA_STATUS_SUCCESS) abort(); + return status; + } + + hsa_status_t Cleanup() const { + hsa_status_t status = HSA_STATUS_ERROR; + hsa_signal_t queue_signal = queue_->doorbell_signal; + + // Destroy original HSA queue + queue_->base_address = base_address_; + queue_->doorbell_signal = doorbell_signal_; + status = hsa_queue_destroy_fn(queue_); + if (status != HSA_STATUS_SUCCESS) abort(); + + // Destroy overloaded virtual queue data and signal + free(data_array_); + status = hsa_signal_destroy(queue_signal); + if (status != HSA_STATUS_SUCCESS) abort(); + + return status; + } + + void mutex_lock() { +#if ROCP_PROXY_LOCK + mutex_.lock(); +#endif + } + + void mutex_unlock() { +#if ROCP_PROXY_LOCK + mutex_.unlock(); +#endif + } + + uint32_t size_; + static queue_map_t* queue_map_; + const util::AgentInfo* agent_info_; + hsa_queue_t* queue_; + static const uintptr_t align_mask_ = sizeof(packet_t) - 1; + packet_t* base_address_; + hsa_signal_t doorbell_signal_; + uint64_t queue_index_; + uint64_t queue_mask_; + uint64_t submit_index_; + std::mutex mutex_; + on_submit_cb_t on_submit_cb_; + void* on_submit_cb_data_; + void* data_array_; +}; + +} // namespace rocprofiler + +#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H diff --git a/projects/roctracer/src/proxy/tracker.h b/projects/roctracer/src/proxy/tracker.h index 190d6caf60..40b41438aa 100644 --- a/projects/roctracer/src/proxy/tracker.h +++ b/projects/roctracer/src/proxy/tracker.h @@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *******************************************************************************/ -#ifndef SRC_CORE_TRACKER_H_ -#define SRC_CORE_TRACKER_H_ +#ifndef SRC_PROXY_TRACKER_H_ +#define SRC_PROXY_TRACKER_H_ #include #include @@ -29,180 +29,58 @@ THE SOFTWARE. #include #include -#include -#include #include "util/hsa_rsrc_factory.h" #include "util/exception.h" #include "util/logger.h" +#include "core/trace_buffer.h" namespace proxy { -// Dispatch record -typedef struct { - uint64_t dispatch; // dispatch timestamp, ns - uint64_t begin; // kernel begin timestamp, ns - uint64_t end; // kernel end timestamp, ns - uint64_t complete; // completion signal timestamp, ns -} async_record_t; - class Tracker { public: - typedef std::mutex mutex_t; typedef util::HsaRsrcFactory::timestamp_t timestamp_t; - typedef async_record_t record_t; - struct entry_t; - typedef std::list sig_list_t; - typedef sig_list_t::iterator sig_list_it_t; - typedef uint64_t counter_t; - - struct entry_t { - counter_t index; - std::atomic valid; - Tracker* tracker; - sig_list_t::iterator it; - hsa_agent_t agent; - hsa_signal_t orig; - hsa_signal_t signal; - record_t* record; - std::atomic handler; - void* arg; - bool is_memcopy; - }; - - static Tracker* Create() { - std::lock_guard lck(glob_mutex_); - Tracker* obj = instance_.load(std::memory_order_relaxed); - if (obj == NULL) { - obj = new Tracker; - if (obj == NULL) EXC_ABORT(HSA_STATUS_ERROR, "Tracker creation failed"); - instance_.store(obj, std::memory_order_release); - } - return obj; - } - - static Tracker& Instance() { - Tracker* obj = instance_.load(std::memory_order_acquire); - if (obj == NULL) obj = Create(); - return *obj; - } - - static void Destroy() { - std::lock_guard lck(glob_mutex_); - if (instance_ != NULL) delete instance_; - instance_ = NULL; - } + typedef roctracer::trace_entry_t entry_t; // Add tracker entry - entry_t* Alloc(const hsa_agent_t& agent, const hsa_signal_t& orig) { + inline static void Enable(uint32_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) { hsa_status_t status = HSA_STATUS_ERROR; + util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance()); // Creating a new tracker entry - entry_t* entry = new entry_t{}; - assert(entry); - entry->tracker = this; + entry->type = type; entry->agent = agent; - entry->orig = orig; - - // Creating a record with the dispatch timestamps - record_t* record = new record_t{}; - assert(record); - record->dispatch = hsa_rsrc_->TimestampNs(); - entry->record = record; + entry->orig = signal; + entry->dispatch = hsa_rsrc->TimestampNs(); + entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release); // Creating a proxy signal status = hsa_signal_create(1, 0, NULL, &(entry->signal)); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create"); status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler"); - - // Adding antry to the list - mutex_.lock(); - entry->it = sig_list_.insert(sig_list_.end(), entry); - entry->index = counter_++; - mutex_.unlock(); - - return entry; - } - - // Delete tracker entry - void Delete(entry_t* entry) { - hsa_signal_destroy(entry->signal); - mutex_.lock(); - sig_list_.erase(entry->it); - mutex_.unlock(); - delete entry; - } - - // Enable tracker entry - void Enable(entry_t* entry, void* handler, void* arg) { - // Set entry handler and release the entry - entry->arg = arg; - entry->handler.store(handler, std::memory_order_release); - - // Debug trace - if (trace_on_) { - auto outstanding = outstanding_.fetch_add(1); - fprintf(stdout, "Tracker::Add: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding); - fflush(stdout); - } - } - - void EnableDispatch(entry_t* entry, hsa_amd_signal_handler handler, void* arg) { - entry->is_memcopy = false; - Enable(entry, reinterpret_cast(handler), arg); - } - void EnableMemcopy(entry_t* entry, hsa_amd_signal_handler handler, void* arg) { - entry->is_memcopy = true; - Enable(entry, reinterpret_cast(handler), arg); } private: - Tracker() : - outstanding_(0), - hsa_rsrc_(&(util::HsaRsrcFactory::Instance())) - {} - - ~Tracker() { - auto it = sig_list_.begin(); - auto end = sig_list_.end(); - while (it != end) { - auto cur = it++; - hsa_rsrc_->SignalWait((*cur)->signal); - Erase(cur); - } - } - - // Delete an entry by iterator - void Erase(const sig_list_it_t& it) { Delete(*it); } - // Entry completion - inline void Complete(hsa_signal_value_t signal_value, entry_t* entry) { - record_t* record = entry->record; - - // Debug trace - if (trace_on_) { - auto outstanding = outstanding_.fetch_sub(1); - fprintf(stdout, "Tracker::Handler: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding); - fflush(stdout); - } - + inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { // Query begin/end and complete timestamps - if (entry->is_memcopy) { + util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance()); + if (entry->type == roctracer::COPY_ENTRY_TYPE) { hsa_amd_profiling_async_copy_time_t async_copy_time{}; hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time"); - record->begin = hsa_rsrc_->SysclockToNs(async_copy_time.start); - record->end = hsa_rsrc_->SysclockToNs(async_copy_time.end); + entry->begin = hsa_rsrc->SysclockToNs(async_copy_time.start); + entry->end = hsa_rsrc->SysclockToNs(async_copy_time.end); } else { hsa_amd_profiling_dispatch_time_t dispatch_time{}; hsa_status_t status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time"); - record->begin = hsa_rsrc_->SysclockToNs(dispatch_time.start); - record->end = hsa_rsrc_->SysclockToNs(dispatch_time.end); + entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start); + entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end); } - record->complete = hsa_rsrc_->TimestampNs(); - entry->valid.store(true, std::memory_order_release); + entry->complete = hsa_rsrc->TimestampNs(); + entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); // Original intercepted signal completion hsa_signal_t orig = entry->orig; @@ -216,72 +94,22 @@ class Tracker { if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value"); hsa_signal_store_screlease(orig, signal_value); } - } - - inline static void HandleEntry(hsa_signal_value_t signal_value, entry_t* entry) { - // Call entry handler - void* handler = static_cast(entry->handler); - reinterpret_cast(handler)(signal_value, entry->arg); - // Delete tracker entry - entry->tracker->Delete(entry); + hsa_signal_destroy(entry->signal); } // Handler for packet completion static bool Handler(hsa_signal_value_t signal_value, void* arg) { // Acquire entry entry_t* entry = reinterpret_cast(arg); - volatile std::atomic* ptr = &entry->handler; - while (ptr->load(std::memory_order_acquire) == NULL) sched_yield(); + while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) sched_yield(); // Complete entry - Tracker* tracker = entry->tracker; - tracker->Complete(signal_value, entry); - - if (ordering_enabled_ == false) { - HandleEntry(signal_value, entry); - } else { - // Acquire last entry - entry_t* back = tracker->sig_list_.back(); - volatile std::atomic* ptr = &back->handler; - while (ptr->load(std::memory_order_acquire) == NULL) sched_yield(); - - tracker->handler_mutex_.lock(); - sig_list_it_t it = tracker->sig_list_.begin(); - sig_list_it_t end = back->it; - while (it != end) { - entry = *(it++); - if (entry->valid.load(std::memory_order_acquire)) { - HandleEntry(signal_value, entry); - } else { - break; - } - } - tracker->handler_mutex_.unlock(); - } + Tracker::Complete(signal_value, entry); return false; } - - // instance - static std::atomic instance_; - static mutex_t glob_mutex_; - static counter_t counter_; - - // Tracked signals list - sig_list_t sig_list_; - // Inter-thread synchronization - mutex_t mutex_; - mutex_t handler_mutex_; - // Outstanding dispatches - std::atomic outstanding_; - // HSA resources factory - util::HsaRsrcFactory* hsa_rsrc_; - // Handling ordering enabled - static const bool ordering_enabled_ = false; - // Enable tracing - static const bool trace_on_ = false; }; } // namespace rocprofiler -#endif // SRC_CORE_TRACKER_H_ +#endif // SRC_PROXY_TRACKER_H_ diff --git a/projects/roctracer/src/proxy/types.h b/projects/roctracer/src/proxy/types.h new file mode 100644 index 0000000000..c72bb34302 --- /dev/null +++ b/projects/roctracer/src/proxy/types.h @@ -0,0 +1,50 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef SRC_CORE_TYPES_H_ +#define SRC_CORE_TYPES_H_ + +#include + +#include + +namespace rocprofiler { +typedef hsa_ven_amd_aqlprofile_pfn_t pfn_t; +typedef hsa_ven_amd_aqlprofile_event_t event_t; +typedef hsa_ven_amd_aqlprofile_parameter_t parameter_t; +typedef hsa_ven_amd_aqlprofile_profile_t profile_t; +typedef hsa_ext_amd_aql_pm4_packet_t packet_t; +typedef uint32_t packet_word_t; +typedef uint64_t timestamp_t; + +inline std::ostream& operator<< (std::ostream& out, const event_t& event) { + out << "[block_name(" << event.block_name << "). block_index(" << event.block_index << "). counter_id(" << event.counter_id << ")]"; + return out; +} +inline std::ostream& operator<< (std::ostream& out, const parameter_t& parameter) { + out << "[parameter_name(" << parameter.parameter_name << "). value(" << parameter.value << ")]"; + return out; +} + +} // namespace rocprofiler + +#endif // SRC_CORE_TYPES_H_ diff --git a/projects/roctracer/src/util/hsa_rsrc_factory.cpp b/projects/roctracer/src/util/hsa_rsrc_factory.cpp index 97e599b3d8..ccb1cd9de3 100644 --- a/projects/roctracer/src/util/hsa_rsrc_factory.cpp +++ b/projects/roctracer/src/util/hsa_rsrc_factory.cpp @@ -145,7 +145,7 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize CHECK_STATUS("loader API table query failed", status); // Instantiate HSA timer - timer_ = new HsaTimer; + timer_ = new HsaTimer(&hsa_api_); CHECK_STATUS("HSA timer allocation failed", (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); @@ -172,7 +172,6 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_init = table->core_->hsa_init_fn; hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn; hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn; - hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn; hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; @@ -180,14 +179,13 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn; hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn; hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn; + hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn; hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn; - hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn; - - hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; + hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn; hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; @@ -195,21 +193,23 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; + hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; + hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; + hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn; hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn; hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn; - hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn; hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn; + hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; } else { hsa_api_.hsa_init = hsa_init; hsa_api_.hsa_shut_down = hsa_shut_down; hsa_api_.hsa_agent_get_info = hsa_agent_get_info; - hsa_api_.hsa_iterate_agents = hsa_iterate_agents; hsa_api_.hsa_queue_create = hsa_queue_create; @@ -217,19 +217,13 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed; hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed; hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed; + hsa_api_.hsa_signal_create = hsa_signal_create; hsa_api_.hsa_signal_destroy = hsa_signal_destroy; + hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed; hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed; hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire; - - hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools; - hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info; - hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate; - hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access; - - hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy; - - hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; + hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease; hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file; hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt; @@ -237,11 +231,19 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_freeze = hsa_executable_freeze; hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; + hsa_api_.hsa_system_get_info = hsa_system_get_info; + hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; + + hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools; + hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info; + hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate; + hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access; + hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy; + hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler; + hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled; hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time; hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time; - hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed; - hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease; } } } diff --git a/projects/roctracer/src/util/hsa_rsrc_factory.h b/projects/roctracer/src/util/hsa_rsrc_factory.h index 0a2ad84fe3..8cc8c1254b 100644 --- a/projects/roctracer/src/util/hsa_rsrc_factory.h +++ b/projects/roctracer/src/util/hsa_rsrc_factory.h @@ -71,41 +71,43 @@ static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; struct hsa_pfn_t { - decltype(::hsa_init)* hsa_init; - decltype(::hsa_shut_down)* hsa_shut_down; - decltype(::hsa_agent_get_info)* hsa_agent_get_info; + decltype(hsa_init)* hsa_init; + decltype(hsa_shut_down)* hsa_shut_down; + decltype(hsa_agent_get_info)* hsa_agent_get_info; + decltype(hsa_iterate_agents)* hsa_iterate_agents; - decltype(::hsa_iterate_agents)* hsa_iterate_agents; + decltype(hsa_queue_create)* hsa_queue_create; + decltype(hsa_queue_destroy)* hsa_queue_destroy; + decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed; + decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed; + decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed; - decltype(::hsa_queue_create)* hsa_queue_create; - decltype(::hsa_queue_destroy)* hsa_queue_destroy; - decltype(::hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed; - decltype(::hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed; - decltype(::hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed; - decltype(::hsa_signal_create)* hsa_signal_create; - decltype(::hsa_signal_destroy)* hsa_signal_destroy; - decltype(::hsa_signal_store_relaxed)* hsa_signal_store_relaxed; - decltype(::hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire; + decltype(hsa_signal_create)* hsa_signal_create; + decltype(hsa_signal_destroy)* hsa_signal_destroy; + decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed; + decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed; + decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire; + decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease; - decltype(::hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools; - decltype(::hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info; - decltype(::hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate; - decltype(::hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access; - decltype(::hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy; + decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file; + decltype(hsa_executable_create_alt)* hsa_executable_create_alt; + decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; + decltype(hsa_executable_freeze)* hsa_executable_freeze; + decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; - decltype(::hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; + decltype(hsa_system_get_info)* hsa_system_get_info; + decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; - decltype(::hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file; - decltype(::hsa_executable_create_alt)* hsa_executable_create_alt; - decltype(::hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; - decltype(::hsa_executable_freeze)* hsa_executable_freeze; - decltype(::hsa_executable_get_symbol)* hsa_executable_get_symbol; + decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools; + decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info; + decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate; + decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access; + decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy; - decltype(::hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler; - decltype(::hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time; - decltype(::hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time; - decltype(::hsa_signal_load_relaxed)* hsa_signal_load_relaxed; - decltype(::hsa_signal_store_screlease)* hsa_signal_store_screlease; + decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler; + decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled; + decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time; + decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time; }; // Encapsulates information about a Hsa Agent such as its @@ -167,9 +169,9 @@ class HsaTimer { static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; - HsaTimer() { + HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { timestamp_t sysclock_hz = 0; - hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz; } @@ -185,7 +187,7 @@ class HsaTimer { // Return timestamp in 'ns' timestamp_t timestamp_ns() const { timestamp_t sysclock; - hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); + hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status); return sysclock_to_ns(sysclock); } @@ -193,6 +195,8 @@ class HsaTimer { private: // Timestamp frequency factor freq_t sysclock_factor_; + // HSA API table + const hsa_pfn_t* const hsa_api_; }; class HsaRsrcFactory { diff --git a/projects/roctracer/test/tool/tracer_tool.cpp b/projects/roctracer/test/tool/tracer_tool.cpp index 2a5a0edb8e..a3bb0619b6 100644 --- a/projects/roctracer/test/tool/tracer_tool.cpp +++ b/projects/roctracer/test/tool/tracer_tool.cpp @@ -32,6 +32,7 @@ THE SOFTWARE. #include #include #include +#include #include #define PUBLIC_API __attribute__((visibility("default"))) @@ -78,6 +79,19 @@ void fatal(const std::string msg) { abort(); } +struct api_trace_entry_t { + uint32_t valid; + uint32_t type; + uint32_t cid; + timestamp_t begin; + timestamp_t end; + uint32_t pid; + uint32_t tid; + hsa_api_data_t data; +}; + +roctracer::TraceBuffer api_trace_buffer(0x200000); + // HSA API callback function void hsa_api_callback( uint32_t domain, @@ -87,23 +101,35 @@ void hsa_api_callback( { (void)arg; const hsa_api_data_t* data = reinterpret_cast(callback_data); - if (data->phase == ACTIVITY_API_PHASE_ENTER) { hsa_begin_timestamp = timer->timestamp_fn_ns(); } else { const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns(); - std::ostringstream os; - os << hsa_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << hsa_api_data_pair_t(cid, *data); - fprintf(hsa_api_file_handle, "%s\n", os.str().c_str()); + api_trace_entry_t* entry = api_trace_buffer.GetEntry(); + entry->valid = roctracer::TRACE_ENTRY_COMPL; + entry->cid = cid; + entry->begin = hsa_begin_timestamp; + entry->end = end_timestamp; + entry->pid = GetPid(); + entry->tid = GetTid(); + entry->data = *data; } } +void hsa_api_flush_cb(api_trace_entry_t* entry) { + std::ostringstream os; + os << entry->begin << ":" << entry->end << " " << entry->pid << ":" << entry->tid << " " << hsa_api_data_pair_t(entry->cid, entry->data); + fprintf(hsa_api_file_handle, "%s\n", os.str().c_str()); +} + void hsa_activity_callback( uint32_t op, activity_record_t* record, void* arg) { - fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy%lu\n", record->begin_ns, record->end_ns, record->correlation_id); + static uint64_t index = 0; + fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy%lu\n", record->begin_ns, record->end_ns, index); + index++; } void hip_api_callback( @@ -164,14 +190,14 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { const roctracer_record_t* end_record = reinterpret_cast(end); while (record < end_record) { + const char * name = roctracer_op_string(record->domain, record->op, record->kind); if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { - const char * name = roctracer_op_string(record->domain, record->op, record->kind); fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n", record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id); } else { #if 0 - fprintf(stderr, "Bad domain %d\n", record->domain); - abort(); + fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n", + record->begin_ns, record->end_ns, record->process_id, record->thread_id, name); #endif } ROCTRACER_CALL(roctracer_next_record(record, &record)); @@ -312,8 +338,10 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // initialize HSA tracing roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table); roctracer::hsa_ops_properties_t ops_properties{ + table, reinterpret_cast(hsa_activity_callback), - NULL}; + NULL, + output_prefix}; roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); fprintf(stdout, " HSA-trace("); fflush(stdout); @@ -340,23 +368,32 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, fprintf(stdout, " HIP-trace()\n"); fflush(stdout); // Allocating tracing pool roctracer_properties_t properties{}; - properties.buffer_size = 0x1000; + properties.buffer_size = 0x80000; properties.buffer_callback_fun = hcc_activity_callback; ROCTRACER_CALL(roctracer_open_pool(&properties)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + //ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); } - return true; + return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); } // HSA-runtime tool on-unload method extern "C" PUBLIC_API void OnUnload() { + static bool is_unloaded = false; + if (is_unloaded) { + return; + } + is_unloaded = true; + roctracer_unload(); + if (trace_hsa) { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + api_trace_buffer.Flush(0, hsa_api_flush_cb); + fclose(hsa_api_file_handle); fclose(hsa_async_copy_file_handle); } @@ -364,9 +401,13 @@ extern "C" PUBLIC_API void OnUnload() { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + ROCTRACER_CALL(roctracer_flush_activity()); ROCTRACER_CALL(roctracer_close_pool()); fclose(hip_api_file_handle); fclose(hcc_activity_file_handle); } } + +extern "C" CONSTRUCTOR_API void constructor() {} +extern "C" DESTRUCTOR_API void destructor() { OnUnload(); }