From cdc74d31db131bfb05abc49cb3298f2d523d471f Mon Sep 17 00:00:00 2001 From: Sriraksha Nagaraj Date: Thu, 11 May 2023 20:28:31 +0000 Subject: [PATCH] SWDEV-389161:Adding fix for tensorflow app hang with rocprofv2 Change-Id: I48ade2e853468940aa5e1fca58ba22f686d89f26 --- include/rocprofiler/v2/rocprofiler_plugin.h | 29 +- plugin/CMakeLists.txt | 2 +- plugin/att/att.cpp | 7 +- plugin/ctf/CMakeLists.txt | 2 +- plugin/ctf/ctf.cpp | 7 +- plugin/ctf/plugin.cpp | 89 +++++- plugin/ctf/plugin.h | 5 +- plugin/file/file.cpp | 120 +++----- plugin/perfetto/perfetto.cpp | 155 ++++------- src/tools/CMakeLists.txt | 1 + src/tools/tool.cpp | 177 +++++++++++- src/tools/trace_buffer.h | 288 ++++++++++++++++++++ 12 files changed, 660 insertions(+), 222 deletions(-) create mode 100644 src/tools/trace_buffer.h diff --git a/include/rocprofiler/v2/rocprofiler_plugin.h b/include/rocprofiler/v2/rocprofiler_plugin.h index e660853ab5..2beac6eade 100644 --- a/include/rocprofiler/v2/rocprofiler_plugin.h +++ b/include/rocprofiler/v2/rocprofiler_plugin.h @@ -114,15 +114,38 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id); + +typedef struct { + /** + * @brief tracer extra data + * + * This is applicable only if ::ACTIVITY_DOMAIN_HSA or ACTIVITY_DOMAIN_HIP is the domain of the tracer record + */ + const char* function_name; +/** + * @brief tracer extra data + * + * This is applicable only if ::ACTIVITY_DOMAIN_HIP is the domain of the tracer record + */ + union { + const char* kernel_name; + }; +} rocprofiler_plugin_tracer_extra_data_t; + + + + /** * Report Synchronous Record. * - * @param[in] record Pointer to the Synchronous Tracer record. - * @param[in] session_id Session ID + * @param[in] record Synchronous Tracer record. + * @param[in] data : api_data + * @param[in] tracer_data :Tracer record extra data such as function name and kernel name * @return Returns 0 on success and -1 on error. */ + ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record, - rocprofiler_session_id_t session_id); + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data); /** @} */ diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 1576b72a9b..4a8f0f0c81 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -22,5 +22,5 @@ add_subdirectory(file) add_subdirectory(perfetto) -add_subdirectory(ctf) +#add_subdirectory(ctf) add_subdirectory(att) diff --git a/plugin/att/att.cpp b/plugin/att/att.cpp index c16a1e853d..ed28ed1923 100644 --- a/plugin/att/att.cpp +++ b/plugin/att/att.cpp @@ -55,7 +55,7 @@ class att_plugin_t { bool is_valid_{true}; inline bool att_file_exists(const std::string& name) { - struct stat buffer; + struct stat buffer; return stat(name.c_str(), &buffer) == 0; } @@ -189,8 +189,9 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler return att_plugin->WriteBufferRecords(begin, end, session_id, buffer_id); } -ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record, - rocprofiler_session_id_t session_id) { +ROCPROFILER_EXPORT int rocprofiler_plugin_write_record( + rocprofiler_record_tracer_t record, + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data) { if (!att_plugin || !att_plugin->IsValid()) return -1; if (record.header.id.handle == 0) return 0; return 0; diff --git a/plugin/ctf/CMakeLists.txt b/plugin/ctf/CMakeLists.txt index 9540d0a49c..b8c2ce27aa 100644 --- a/plugin/ctf/CMakeLists.txt +++ b/plugin/ctf/CMakeLists.txt @@ -33,7 +33,7 @@ set_target_properties(ctf_plugin PROPERTIES LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap" LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}") set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf") -target_compile_definitions(ctf_plugin PRIVATE +target_compile_definitions(ctf_plugin PUBLIC AMD_INTERNAL_BUILD PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1 CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata") diff --git a/plugin/ctf/ctf.cpp b/plugin/ctf/ctf.cpp index 829231c381..6f61909730 100644 --- a/plugin/ctf/ctf.cpp +++ b/plugin/ctf/ctf.cpp @@ -88,8 +88,9 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records( return 0; } -ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record, - const rocprofiler_session_id_t session_id) { +ROCPROFILER_EXPORT int rocprofiler_plugin_write_record( + const rocprofiler_record_tracer_t record, const void* data, + rocprofiler_plugin_trace_record_data_t tracer_data) { assert(the_plugin); if (record.header.id.handle == 0) { @@ -97,7 +98,7 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_ } try { - the_plugin->HandleTracerRecord(record, session_id); + the_plugin->HandleTracerRecord(record, rocprofiler_session_id_t{0}, tracer_data, data); } catch (const std::exception& exc) { std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl; return -1; diff --git a/plugin/ctf/plugin.cpp b/plugin/ctf/plugin.cpp index 7124f6f23c..c4f113c764 100644 --- a/plugin/ctf/plugin.cpp +++ b/plugin/ctf/plugin.cpp @@ -137,6 +137,11 @@ class RocTxEventRecord final : public TracerEventRecord { : TracerEventRecord{record, GetRecordBeginClockVal(record)}, id_{QueryId(record, session_id)}, msg_{QueryMsg(record, session_id)} {} + explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record, uint64_t roctx_id, + std::string roctx_msg) + : TracerEventRecord{record, GetRecordBeginClockVal(record)}, + id_{roctx_id}, + msg_{roctx_msg} {} void Write(barectf_roctx_ctx& barectf_ctx) const override { barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str()); @@ -209,7 +214,9 @@ class HsaApiEventRecord : public TracerEventRecord { const rocprofiler_session_id_t session_id, const std::uint64_t clock_val) : TracerEventRecord{record, clock_val}, api_data_{QueryApiData(record, session_id)} {} - + explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record, + const std::uint64_t clock_val, hsa_api_data_t& api_data) + : TracerEventRecord{record, clock_val}, api_data_(api_data) {} const hsa_api_data_t& GetApiData() const noexcept { return api_data_; } private: @@ -246,6 +253,10 @@ class HsaApiEventRecordBegin final : public HsaApiEventRecord { explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record, const rocprofiler_session_id_t session_id) : HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {} + explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record, + hsa_api_data_t& api_data) + : HsaApiEventRecord{record, GetRecordBeginClockVal(record), + api_data} {} void Write(barectf_hsa_api_ctx& barectf_ctx) const override { // Include generated switch statement. @@ -259,6 +270,9 @@ class HsaApiEventRecordEnd final : public HsaApiEventRecord { explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record, const rocprofiler_session_id_t session_id) : HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {} + explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record, + hsa_api_data_t& api_data) + : HsaApiEventRecord{record, GetRecordBeginClockVal(record), api_data} {} void Write(barectf_hsa_api_ctx& barectf_ctx) const override { // Include generated switch statement. @@ -270,11 +284,17 @@ class HsaApiEventRecordEnd final : public HsaApiEventRecord { class HipApiEventRecord : public TracerEventRecord { protected: explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record, - const rocprofiler_session_id_t session_id, const std::uint64_t clock_val) + const rocprofiler_session_id_t session_id, + const std::uint64_t clock_val) : TracerEventRecord{record, clock_val}, api_data_{QueryApiData(record, session_id)}, kernel_name_{QueryKernelName(record, session_id)} {} - + explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record, + const std::uint64_t clock_val, hip_api_data_t& api_data, + std::string kernel_name) + : TracerEventRecord{record, clock_val}, + api_data_{api_data}, + kernel_name_{kernel_name} {} const hip_api_data_t& GetApiData() const noexcept { return api_data_; } const std::string& GetKernelName() const noexcept { return kernel_name_; } @@ -339,6 +359,9 @@ class HipApiEventRecordBegin final : public HipApiEventRecord { explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record, const rocprofiler_session_id_t session_id) : HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {} + explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record, + hip_api_data_t& api_data, std::string kernel_name) + : HipApiEventRecord{record, GetRecordBeginClockVal(record), api_data, kernel_name} {} void Write(barectf_hip_api_ctx& barectf_ctx) const override { // Include generated switch statement. @@ -352,6 +375,9 @@ class HipApiEventRecordEnd final : public HipApiEventRecord { explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record, const rocprofiler_session_id_t session_id) : HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {} + explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record, hip_api_data_t& api_data, + std::string kernel_name) + : HipApiEventRecord{record, GetRecordBeginClockVal(record), api_data, kernel_name} {} void Write(barectf_hip_api_ctx& barectf_ctx) const override { // Include generated switch statement. @@ -686,27 +712,60 @@ Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir, } void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record, - const rocprofiler_session_id_t session_id) { + const rocprofiler_session_id_t session_id, + rocprofiler_plugin_trace_record_data_t tracer_data, + const void* data) { std::lock_guard lock{lock_}; // Depending on the domain, create and add an event record to the // corresponding tracer. switch (record.domain) { case ACTIVITY_DOMAIN_ROCTX: - roctx_tracer_.AddEventRecord(std::make_shared(record, session_id)); + /*If data is nullptr then the call is asynchromous*/ + if (data == nullptr) + roctx_tracer_.AddEventRecord(std::make_shared(record, session_id)); + else { + const char* roctx_message = reinterpret_cast(data); + std::string roctx_msg(roctx_message); + roctx_tracer_.AddEventRecord( + std::make_shared(record, tracer_data.roctx_id, roctx_msg)); + } break; case ACTIVITY_DOMAIN_HSA_API: { - hsa_api_tracer_.AddEventRecord( - std::make_shared(record, session_id)); - hsa_api_tracer_.AddEventRecord( - std::make_shared(record, session_id)); + /*If data is nullptr then the call is asynchromous*/ + if (data == nullptr) { + hsa_api_tracer_.AddEventRecord( + std::make_shared(record, session_id)); + hsa_api_tracer_.AddEventRecord( + std::make_shared(record, session_id)); + } else { + hsa_api_data_t hsa_api_data = *reinterpret_cast(data); + hsa_api_tracer_.AddEventRecord( + std::make_shared(record, hsa_api_data)); + hsa_api_tracer_.AddEventRecord( + std::make_shared(record, hsa_api_data)); + } break; } case ACTIVITY_DOMAIN_HIP_API: { - hip_api_tracer_.AddEventRecord( - std::make_shared(record, session_id)); - hip_api_tracer_.AddEventRecord( - std::make_shared(record, session_id)); + /*If data is nullptr then the call is asynchromous*/ + if (data == nullptr) { + hip_api_tracer_.AddEventRecord( + std::make_shared(record, session_id)); + hip_api_tracer_.AddEventRecord( + std::make_shared(record, session_id)); + } else { + std::string kernel_name; + hip_api_data_t hip_api_data = *reinterpret_cast(data); + if (tracer_data.kernel_name != nullptr) + kernel_name = rocmtools::cxx_demangle(std::string(tracer_data.kernel_name)); + else + kernel_name = ""; + hip_api_tracer_.AddEventRecord( + std::make_shared(record, hip_api_data, kernel_name)); + hip_api_tracer_.AddEventRecord( + std::make_shared(record, hip_api_data, kernel_name)); + } break; } case ACTIVITY_DOMAIN_HSA_OPS: @@ -738,7 +797,9 @@ void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin, const rocprofiler_buffer_id_t buffer_id) { while (begin && begin < end) { if (begin->kind == ROCPROFILER_TRACER_RECORD) { - HandleTracerRecord(*reinterpret_cast(begin), session_id); + rocprofiler_plugin_trace_record_data_t tracer_data = {}; + HandleTracerRecord(*reinterpret_cast(begin), session_id, + tracer_data); } else { assert(begin->kind == ROCPROFILER_PROFILER_RECORD); HandleProfilerRecord(*reinterpret_cast(begin), diff --git a/plugin/ctf/plugin.h b/plugin/ctf/plugin.h index 3880162d0d..23afcd6fbd 100644 --- a/plugin/ctf/plugin.h +++ b/plugin/ctf/plugin.h @@ -54,7 +54,10 @@ class Plugin final { // Handles a tracer record. void HandleTracerRecord(const rocprofiler_record_tracer_t& record, - rocprofiler_session_id_t session_id); + rocprofiler_session_id_t session_id, + rocprofiler_plugin_trace_record_data_t tracer_data, + const void* data = nullptr); + // Handles a profiler record. void HandleProfilerRecord(const rocprofiler_record_profiler_t& record, diff --git a/plugin/file/file.cpp b/plugin/file/file.cpp index 5e51c5352d..e4cdffc4e7 100644 --- a/plugin/file/file.cpp +++ b/plugin/file/file.cpp @@ -198,7 +198,8 @@ class file_plugin_t { void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id, - rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) { + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data, + rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}){ std::lock_guard lock(writing_lock); std::string kernel_name; std::string function_name; @@ -211,81 +212,29 @@ class file_plugin_t { reinterpret_cast(tracer_record.api_data_handle.handle)); } if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) { - size_t function_name_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size( - session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name_size)); - if (function_name_size > 1) { - char* function_name_c = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info( - session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name_c)); - if (function_name_c) function_name = std::string(function_name_c); - } + if (tracer_extra_data.function_name) + function_name = std::string(tracer_extra_data.function_name); } if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) { - size_t function_name_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( - session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name_size)); - if (function_name_size > 1) { - char* function_name_c = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( - session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name_c)); - if (function_name_c) function_name = std::string(function_name_c); - } - size_t kernel_name_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( - session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &kernel_name_size)); - if (kernel_name_size > 1) { - char* kernel_name_str = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( - session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &kernel_name_str)); - if (kernel_name_str) { - kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str)); - free(kernel_name_str); - // TODO: Change how this API returns a string. - } - } + if (tracer_extra_data.kernel_name) + kernel_name = rocmtools::cxx_demangle(std::string(tracer_extra_data.kernel_name)); + if(tracer_extra_data.function_name) + function_name = std::string(tracer_extra_data.function_name); } if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) { - size_t roctx_message_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size( - session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_message_size)); - if (roctx_message_size > 1) { - [[maybe_unused]] char* roctx_message_str = - static_cast(malloc(roctx_message_size * sizeof(char))); - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info( - session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_message_str)); - if (roctx_message_str) - roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str))); - } - size_t roctx_id_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size( - session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_id_size)); - if (roctx_id_size > 1) { - [[maybe_unused]] char* roctx_id_str = - static_cast(malloc(roctx_id_size * sizeof(char))); - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info( - session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_id_str)); - if (roctx_id_str) { - roctx_id = std::stoll(std::string(strdup(roctx_id_str))); - free(roctx_id_str); - } - } + const char* roctx_msg = reinterpret_cast(tracer_record.api_data_handle.handle); + if(roctx_msg) + roctx_message = rocmtools::cxx_demangle(std::string(roctx_msg)); + roctx_id = tracer_record.operation_id.id; } + // return; output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain); *output_file << "Record(" << tracer_record.header.id.handle << "), Domain(" << GetDomainName(tracer_record.domain) << "),"; - if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_id >= 0) *output_file << " ROCTX_ID(" << roctx_id << "),"; - if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_message.size() > 1) *output_file << " ROCTX_Message(" << roctx_message << "),"; + if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_id >= 0) + *output_file << " ROCTX_ID(" << roctx_id << "),"; + if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX && roctx_message.size() > 1) + *output_file << " ROCTX_Message(" << roctx_message << "),"; if (function_name.size() > 1) *output_file << " Function(" << function_name << "),"; if (kernel_name.size() > 1) *output_file << " Kernel_Name(" << kernel_name.c_str() << "),"; if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS || tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS) { @@ -304,15 +253,17 @@ class file_plugin_t { } } if (tracer_record.phase == ROCPROFILER_PHASE_NONE) { - *output_file << " Begin(" << tracer_record.timestamps.begin.value - << "), End(" << tracer_record.timestamps.end.value << "),"; + *output_file << " Begin(" << tracer_record.timestamps.begin.value << "), End(" + << tracer_record.timestamps.end.value << "),"; } else { - if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain != ACTIVITY_DOMAIN_ROCTX){ + if (tracer_record.phase == ROCPROFILER_PHASE_ENTER && + tracer_record.domain != ACTIVITY_DOMAIN_ROCTX) { rocprofiler_timestamp_t timestamp; rocprofiler_get_timestamp(×tamp); *output_file << " Begin(" << timestamp.value << "),"; } - if(tracer_record.phase == ROCPROFILER_PHASE_ENTER && tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) { + if (tracer_record.phase == ROCPROFILER_PHASE_ENTER && + tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) { rocprofiler_timestamp_t timestamp; rocprofiler_get_timestamp(×tamp); *output_file << " timestamp(" << timestamp.value << "),"; @@ -418,7 +369,6 @@ class file_plugin_t { << "pc-sample(" << std::hex << std::showbase << sample.pc << "), " << "se(" << sample.se << ')' << std::endl; } - int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { @@ -432,9 +382,11 @@ class file_plugin_t { break; } case ROCPROFILER_TRACER_RECORD: { + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data; + tracer_extra_data.function_name = nullptr; rocprofiler_record_tracer_t* tracer_record = const_cast( reinterpret_cast(begin)); - FlushTracerRecord(*tracer_record, session_id, buffer_id); + FlushTracerRecord(*tracer_record, session_id, tracer_extra_data, buffer_id); break; } case ROCPROFILER_ATT_TRACER_RECORD: { @@ -454,16 +406,16 @@ class file_plugin_t { return 0; } - bool is_valid() const { return valid_; } + bool is_valid() const { return valid_; } - private: - bool valid_{false}; + private: + bool valid_{false}; - output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"}, - hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"}, - hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"}, - output_file_{"results.txt"}; -}; + output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"}, + hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"}, + hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"}, + output_file_{"results.txt"}; + }; file_plugin_t* file_plugin = nullptr; @@ -500,9 +452,9 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records( } ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record, - rocprofiler_session_id_t session_id) { + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data){ if (!file_plugin || !file_plugin->is_valid()) return -1; if (record.header.id.handle == 0) return 0; - file_plugin->FlushTracerRecord(record, session_id); + file_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, tracer_extra_data, rocprofiler_buffer_id_t{0}); return 0; } diff --git a/plugin/perfetto/perfetto.cpp b/plugin/perfetto/perfetto.cpp index 65b735aa03..fe2f5d0d70 100644 --- a/plugin/perfetto/perfetto.cpp +++ b/plugin/perfetto/perfetto.cpp @@ -340,12 +340,13 @@ class perfetto_plugin_t { return 0; } - int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, - rocprofiler_session_id_t session_id) { +int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, + rocprofiler_session_id_t session_id, + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data) { std::lock_guard lock(writing_lock); if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n"); std::string kernel_name; - char* function_name; + const char* function_name; char* activity_name; std::string roctx_message; uint64_t roctx_id = 0; @@ -424,35 +425,11 @@ class perfetto_plugin_t { } } auto& roctx_track = roctx_track_it->second; - - size_t roctx_message_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size( - session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_message_size)); - if (roctx_message_size > 1) { - char* roctx_message_str = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info( - session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_message_str)); - if (roctx_message_str) - roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str))); - } - size_t roctx_id_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size( - session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_id_size)); - if (roctx_id_size > 1) { - char* roctx_id_str = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info( - session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, - tracer_record.operation_id, &roctx_id_str)); - if (roctx_id_str) { - roctx_id = std::stoll(std::string(strdup(roctx_id_str))); - free(roctx_id_str); - } - } - - if(tracer_record.phase == ROCPROFILER_PHASE_NONE) { + roctx_id = tracer_record.operation_id.id; + if (reinterpret_cast(tracer_record.api_data_handle.handle)) + roctx_message = std::string(reinterpret_cast(tracer_record.api_data_handle.handle)); + + if (tracer_record.phase == ROCPROFILER_PHASE_NONE) { if (tracer_record.operation_id.id == 1) { perfetto::StaticString roctx_message_pft( (!roctx_message.empty() ? roctx_message.c_str() : "")); @@ -465,19 +442,18 @@ class perfetto_plugin_t { roctx_track_entries_--; } } else { - rocprofiler_timestamp_t timestamp; + rocprofiler_timestamp_t timestamp; rocprofiler_get_timestamp(×tamp); if (tracer_record.operation_id.id == 1) { - perfetto::StaticString roctx_message_pft( - (!roctx_message.empty() ? roctx_message.c_str() : "")); - TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track, - timestamp.value, "Timestamp(ns)", - timestamp.value, "RocTx ID", roctx_id); - roctx_track_entries_++; - } else { - TRACE_EVENT_END("ROCTX_API", roctx_track, timestamp.value); - roctx_track_entries_--; - } + perfetto::StaticString roctx_message_pft( + (!roctx_message.empty() ? roctx_message.c_str() : "")); + TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track, timestamp.value, + "Timestamp(ns)", timestamp.value, "RocTx ID", roctx_id); + roctx_track_entries_++; + } else { + TRACE_EVENT_END("ROCTX_API", roctx_track, timestamp.value); + roctx_track_entries_--; + } } break; } @@ -505,23 +481,15 @@ class perfetto_plugin_t { } } auto& hsa_track = hsa_track_it->second; - size_t function_name_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size( - session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name_size)); - if (function_name_size > 1) { - function_name = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info( - session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name)); - } + function_name = tracer_extra_data.function_name; + rocprofiler_timestamp_t timestamp; rocprofiler_get_timestamp(×tamp); - if(tracer_record.phase == ROCPROFILER_PHASE_ENTER) + if (tracer_record.phase == ROCPROFILER_PHASE_ENTER) TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track, - timestamp.value, - perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); - if(tracer_record.phase == ROCPROFILER_PHASE_EXIT) + timestamp.value, + perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); + if (tracer_record.phase == ROCPROFILER_PHASE_EXIT) TRACE_EVENT_END("HSA_API", hsa_track, timestamp.value); break; } @@ -550,34 +518,13 @@ class perfetto_plugin_t { } } auto& hip_track = hip_track_it->second; - size_t function_name_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( - session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name_size)); - if (function_name_size > 1) { - function_name = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( - session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &function_name)); - } - size_t kernel_name_size = 0; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( - session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &kernel_name_size)); - char* kernel_name_str; - if (kernel_name_size > 1) { - kernel_name_str = nullptr; - CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( - session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle, - tracer_record.operation_id, &kernel_name_str)); - if (kernel_name_str) { - kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str)); - free(kernel_name_str); - } - } + function_name = tracer_extra_data.function_name; + if (tracer_extra_data.kernel_name != nullptr) + kernel_name = std::string(tracer_extra_data.kernel_name); + rocprofiler_timestamp_t timestamp; rocprofiler_get_timestamp(×tamp); - if(tracer_record.phase == ROCPROFILER_PHASE_ENTER) { + if (tracer_record.phase == ROCPROFILER_PHASE_ENTER) { if (kernel_name.size() > 0) { TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track, timestamp.value, "Kernel Name", kernel_name, @@ -588,10 +535,10 @@ class perfetto_plugin_t { perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); } } - if(tracer_record.phase == ROCPROFILER_PHASE_EXIT) { + if (tracer_record.phase == ROCPROFILER_PHASE_EXIT) { TRACE_EVENT_END("HIP_API", hip_track, timestamp.value); } - if(tracer_record.phase == ROCPROFILER_PHASE_NONE) { + if (tracer_record.phase == ROCPROFILER_PHASE_NONE) { if (kernel_name.size() > 0) { TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track, tracer_record.timestamps.begin.value, "Kernel Name", kernel_name, @@ -616,13 +563,14 @@ class perfetto_plugin_t { // size_t stream_id_str_size = 0; // char* stream_id_str; // CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( - // session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0}, - // rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value}, + // session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, + // 0}, rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value}, // &stream_id_str_size)); // if (stream_id_str_size > 1) { // stream_id_str = static_cast(malloc(stream_id_str_size * sizeof(char))); // CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( - // session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0}, + // session_id, ROCPROFILER_HIP_STREAM_ID, + // rocprofiler_tracer_api_data_handle_t{nullptr, 0}, // rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value}, // &stream_id_str)); // if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str); @@ -671,20 +619,20 @@ class perfetto_plugin_t { } else { activity_name = const_cast(std::string("N/A").c_str()); } - if(tracer_record.phase == ROCPROFILER_PHASE_NONE) + if (tracer_record.phase == ROCPROFILER_PHASE_NONE) TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track, - tracer_record.timestamps.begin.value, "Agent ID", - tracer_record.agent_id.handle, "Process ID", GetPid(), - perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); - else if(tracer_record.phase == ROCPROFILER_PHASE_ENTER) + tracer_record.timestamps.begin.value, "Agent ID", + tracer_record.agent_id.handle, "Process ID", GetPid(), + perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); + else if (tracer_record.phase == ROCPROFILER_PHASE_ENTER) TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track, - timestamp.value, "Agent ID", - tracer_record.agent_id.handle, "Process ID", GetPid(), - perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); + timestamp.value, "Agent ID", tracer_record.agent_id.handle, + "Process ID", GetPid(), + perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value)); } - if(tracer_record.phase == ROCPROFILER_PHASE_NONE) + if (tracer_record.phase == ROCPROFILER_PHASE_NONE) TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value); - else if(tracer_record.phase == ROCPROFILER_PHASE_EXIT) + else if (tracer_record.phase == ROCPROFILER_PHASE_EXIT) TRACE_EVENT_END("HIP_OPS", stream_track, timestamp.value); break; } @@ -754,9 +702,11 @@ class perfetto_plugin_t { break; } case ROCPROFILER_TRACER_RECORD: { + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data; + tracer_extra_data.function_name = nullptr; rocprofiler_record_tracer_t* tracer_record = const_cast( reinterpret_cast(begin)); - FlushTracerRecord(*tracer_record, session_id); + FlushTracerRecord(*tracer_record, session_id, tracer_extra_data); break; } default: @@ -837,10 +787,11 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id); } -ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record, - rocprofiler_session_id_t session_id) { +ROCPROFILER_EXPORT int rocprofiler_plugin_write_record( + rocprofiler_record_tracer_t record, + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data) { if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1; if (record.header.id.handle == 0) return 0; - perfetto_plugin->FlushTracerRecord(record, session_id); + perfetto_plugin->FlushTracerRecord(record, rocprofiler_session_id_t{0}, tracer_extra_data); return 0; } diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index 9a4100d3b4..efd99ee4d5 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -21,6 +21,7 @@ target_include_directories(rocprofiler_tool ${PROJECT_SOURCE_DIR}/src) target_compile_definitions(rocprofiler_tool + PUBLIC AMD_INTERNAL_BUILD PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1) if(ASAN) diff --git a/src/tools/tool.cpp b/src/tools/tool.cpp index 0b81517d5c..5dd2200196 100644 --- a/src/tools/tool.cpp +++ b/src/tools/tool.cpp @@ -22,6 +22,11 @@ #include #include #include +#include +#include "hsa_prof_str.h" +#include +#include + #include "rocprofiler.h" #include "rocprofiler_plugin.h" #include @@ -52,6 +57,7 @@ #include #include "utils/helper.h" +#include "trace_buffer.h" namespace fs = std::experimental::filesystem; @@ -60,7 +66,7 @@ namespace fs = std::experimental::filesystem; do { \ if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCProfiler API Call Error!"); \ } while (false) - +TRACE_BUFFER_INSTANTIATE(); namespace { struct shmd_t { @@ -93,15 +99,12 @@ class rocprofiler_plugin_t { reinterpret_cast( dlsym(plugin_handle_, "rocprofiler_plugin_write_buffer_records")); if (!rocprofiler_plugin_write_buffer_records_) return; - rocprofiler_plugin_write_record_ = reinterpret_cast( dlsym(plugin_handle_, "rocprofiler_plugin_write_record")); if (!rocprofiler_plugin_write_record_) return; - rocprofiler_plugin_finalize_ = reinterpret_cast( dlsym(plugin_handle_, "rocprofiler_plugin_finalize")); if (!rocprofiler_plugin_finalize_) return; - if (auto* initialize = reinterpret_cast( dlsym(plugin_handle_, "rocprofiler_plugin_initialize")); initialize != nullptr) @@ -123,8 +126,7 @@ class rocprofiler_plugin_t { assert(is_valid()); return rocprofiler_plugin_write_buffer_records_(std::forward(args)...); } - - private: + private: bool valid_{false}; void* plugin_handle_; @@ -135,6 +137,77 @@ class rocprofiler_plugin_t { std::optional plugin; +struct hsa_api_trace_entry_t { + std::atomic valid; + rocprofiler_record_tracer_t record; + const char* function_name; + hsa_api_trace_entry_t(rocprofiler_record_tracer_t tracer_record, const char* function_name_str, + const hsa_api_data_t& data) + : valid(rocprofiler::TRACE_ENTRY_INIT) { + record = tracer_record; + function_name = function_name_str ? strdup(function_name_str) : nullptr; + record.api_data_handle.handle = &data; + } + ~hsa_api_trace_entry_t() { if (function_name != nullptr) free(const_cast(function_name)); } +}; + +struct roctx_trace_entry_t { + std::atomic valid; + rocprofiler_record_tracer_t record; + roctx_trace_entry_t(rocprofiler_record_tracer_t tracer_record, const char* roctx_message_str) + : valid(rocprofiler::TRACE_ENTRY_INIT) { + record = tracer_record; + roctx_message_str? record.api_data_handle.handle=strdup(roctx_message_str):nullptr; + } + ~roctx_trace_entry_t() { } +}; + +struct hip_api_trace_entry_t { + + std::atomic valid; + rocprofiler_record_tracer_t record; + const char* function_name; + const char* kernel_name; + + hip_api_trace_entry_t(rocprofiler_record_tracer_t tracer_record, const char* kernel_name_str, + const char* function_name_str, const hip_api_data_t& data) + : valid(rocprofiler::TRACE_ENTRY_INIT) { + record = tracer_record; + kernel_name = kernel_name_str ? strdup(kernel_name_str) : nullptr; + function_name = function_name_str ? strdup(function_name_str) : nullptr; + record.api_data_handle.handle=&data; + } + ~hip_api_trace_entry_t() { + if (function_name != nullptr) free(const_cast(function_name)); + if (kernel_name != nullptr) free(const_cast(kernel_name)); + } +}; + +rocprofiler::TraceBuffer hip_api_buffer( + "HIP API", 0x200000, [](hip_api_trace_entry_t* entry) { + assert(plugin && "plugin is not initialized"); + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data; + tracer_extra_data.function_name = entry->function_name; + tracer_extra_data.kernel_name = entry->kernel_name; + plugin->write_callback_record(entry->record, tracer_extra_data); + }); +rocprofiler::TraceBuffer hsa_api_buffer( + "HSA API", 0x200000, [](hsa_api_trace_entry_t* entry) { + assert(plugin && "plugin is not initialized"); + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data; + tracer_extra_data.function_name = entry->function_name; + plugin->write_callback_record(entry->record, + tracer_extra_data); + }); +rocprofiler::TraceBuffer roctx_trace_buffer( + "rocTX API", 0x200000, [](roctx_trace_entry_t* entry) { + assert(plugin && "plugin is not initialized"); + rocprofiler_plugin_tracer_extra_data_t tracer_extra_data; + tracer_extra_data.function_name = nullptr; + plugin->write_callback_record( + entry->record, tracer_extra_data); + }); + } // namespace uint64_t getFlushIntervalFromEnv() { @@ -325,10 +398,94 @@ void plugins_load() { } } } +/* +* A callback function for synchronous trace records. +* This function queries the api infoemation and populates the +* api_trace buffer and adds it to the trace buffer. +*/ +void sync_api_trace_callback(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id) { + if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) { + size_t kernel_name_size = 0, function_name_size = 0; + char *function_name_c = nullptr, *kernel_name_c = nullptr; + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( + session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle, + tracer_record.operation_id, &function_name_size)); + if (function_name_size > 1) { + //function_name_c = (char*)malloc(function_name_size); + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( + session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle, + tracer_record.operation_id, &function_name_c)); + } + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( + session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle, + tracer_record.operation_id, &kernel_name_size)); + if (kernel_name_size > 1) { + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( + session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle, + tracer_record.operation_id, &kernel_name_c)); + } + char* data = nullptr; + size_t size = 0; + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( + session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &size)); + if(size > 0) + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( + session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &data)); + hip_api_data_t hip_api_data = *reinterpret_cast(data); + //std::cout << "in api calback" << strlen(function_name_c) << "\t" << function_name_c << std::endl; + hip_api_trace_entry_t& entry = hip_api_buffer.Emplace( + tracer_record, + (const char*)kernel_name_c ? strdup(kernel_name_c) : nullptr, + (const char*)(function_name_c) ? strdup(function_name_c):nullptr, + hip_api_data); + entry.valid.store(rocprofiler::TRACE_ENTRY_COMPLETE, std::memory_order_release); + } + if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) { + size_t function_name_size = 0; + char *function_name_c = nullptr; + CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size( + session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle, + tracer_record.operation_id, &function_name_size)); + if (function_name_size > 1) { -void plugin_write_record(rocprofiler_record_tracer_t record, rocprofiler_session_id_t session_id) { - if (plugin) plugin->write_callback_record(record, session_id); -} + CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info( + session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle, + tracer_record.operation_id, &function_name_c)); + } + char* data = nullptr; + size_t size=0; + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size( + session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &size)); + CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info( + session_id, ROCPROFILER_HIP_API_DATA, tracer_record.api_data_handle, tracer_record.operation_id, &data)); + hsa_api_data_t hsa_api_data = *reinterpret_cast(data); + hsa_api_trace_entry_t& entry = hsa_api_buffer.Emplace( + tracer_record, + (const char*)(function_name_c), + hsa_api_data); + entry.valid.store(rocprofiler::TRACE_ENTRY_COMPLETE, std::memory_order_release); + } + if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) { + size_t roctx_message_size = 0; + char *roctx_message_str = nullptr; + uint64_t roctx_id=0; + CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size( + session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle, + tracer_record.operation_id, &roctx_message_size)); + if (roctx_message_size > 1) { + roctx_message_str =(char*)malloc(roctx_message_size * sizeof(char)); + CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info( + session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle, + tracer_record.operation_id, &roctx_message_str)); + if (roctx_message_str) + roctx_message_str ? rocmtools::cxx_demangle(std::string(strdup(roctx_message_str))).c_str() : nullptr; + } + roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace( + tracer_record, + roctx_message_str); + entry.valid.store(rocprofiler::TRACE_ENTRY_COMPLETE, std::memory_order_release); + } + } void wait_for_amdsys() { while (amd_sys_handler.load(std::memory_order_relaxed)) { @@ -542,7 +699,7 @@ ROCPROFILER_EXPORT bool OnLoad(void* table, uint64_t runtime_version, apis_requested.size(), &filter_id, property)); CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id)); CHECK_ROCPROFILER( - rocprofiler_set_api_trace_sync_callback(session_id, filter_id, plugin_write_record)); + rocprofiler_set_api_trace_sync_callback(session_id, filter_id, sync_api_trace_callback)); filter_ids.emplace_back(filter_id); break; } diff --git a/src/tools/trace_buffer.h b/src/tools/trace_buffer.h new file mode 100644 index 0000000000..81c7345ae2 --- /dev/null +++ b/src/tools/trace_buffer.h @@ -0,0 +1,288 @@ +/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#ifndef TOOL_TRACE_BUFFER_H_ +#define TOOL_TRACE_BUFFER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofiler { + +class TraceBufferBase { + public: + static void FlushAll() { + std::lock_guard lock(mutex_); + + for (auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_) + trace_buffer->Flush(); + } + + static void Register(TraceBufferBase* elem) { + std::lock_guard lock(mutex_); + + auto** prev_ptr = &head_; + while (*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_) + prev_ptr = &(*prev_ptr)->next_; + + elem->next_ = *prev_ptr; + *prev_ptr = elem; + } + + static void Unregister(TraceBufferBase* elem) { + std::lock_guard lock(mutex_); + + auto** prev_ptr = &head_; + while (*prev_ptr != nullptr && *prev_ptr != elem) prev_ptr = &(*prev_ptr)->next_; + + assert(*prev_ptr != nullptr && "elem is not in the list"); + *prev_ptr = elem->next_; + } + + TraceBufferBase(std::string name, int priority) + : name_(std::move(name)), priority_(priority), next_(nullptr) {} + + TraceBufferBase(const TraceBufferBase&) = delete; + TraceBufferBase& operator=(const TraceBufferBase&) = delete; + + virtual ~TraceBufferBase() { Unregister(this); } + + virtual void Flush() = 0; + + std::string name() && { return std::move(name_); } + const std::string& name() const& { return name_; } + + private: + const std::string name_; + const int priority_; + TraceBufferBase* next_; + + static TraceBufferBase* head_; + static std::mutex mutex_; +}; + +enum TraceEntryState { TRACE_ENTRY_INVALID = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPLETE = 2 }; + +template > +class TraceBuffer : protected TraceBufferBase { + public: + using callback_t = std::function; + + TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0) + : TraceBufferBase(std::move(name), priority), + flush_callback_(std::move(flush_callback)), + size_(size) { + assert(size_ != 0 && "cannot create an empty trace buffer"); + + Entry* write_buffer = allocator_.allocate(size_); + assert(write_buffer != nullptr); + buffer_list_.push_back(write_buffer); + + read_index_ = 0; + write_index_ = {0, write_buffer}; + + AllocateFreeBuffer(); + + // Add this instance to the link list of all trace buffers in the process. + Register(this); + } + + ~TraceBuffer() override { + // Flush the remaining records. After flushing, there should not be any records left in the + // trace buffer. + Flush(); + assert(read_index_ == write_index_.load().index); + + // Acquire both the writer and worker lock as we are accessing shared variables they protect. + std::unique_lock writer_lock(write_mutex_, std::defer_lock); + std::unique_lock worker_lock(worker_mutex_, std::defer_lock); + std::lock(writer_lock, worker_lock); + + // Deallocate the buffers. + allocator_.deallocate(write_index_.load().buffer, size_); + allocator_.deallocate(free_buffer_, size_); + + // Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional + // after waking up, and exits if it does not have a value. + if (worker_thread_) { + std::thread worker_thread = std::move(worker_thread_.value()); + { + // Tell the worker thread loop to exit. + worker_thread_.reset(); + free_buffer_ = nullptr; + worker_cond_.notify_one(); + } + // Release the worker lock to allow the worker thread to exit. + worker_lock.unlock(); + worker_thread.join(); + } + } + + // Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are + // monotonically increasing indices, with read_pointer % size always indexing inside the first + // buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with + // the next invocation after changing its state to 'complete'. + void Flush() override { + std::lock_guard lock(write_mutex_); + auto write_index = write_index_.load(std::memory_order_relaxed); + + for (auto it = buffer_list_.begin(); it != buffer_list_.end();) { + auto end_of_buffer = read_index_ - read_index_ % size_ + size_; + + while (read_index_ < std::min(write_index.index, end_of_buffer)) { + Entry* entry = &(*it)[read_index_ % size_]; + + // The entry is not yet complete, stop flushing here. + if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return; + + flush_callback_(entry); + entry->~Entry(); + + ++read_index_; + } + + // The buffer is still in use or the read pointer did not reach the end of the buffer. + if (*it == write_index.buffer || read_index_ != end_of_buffer) return; + + // All entries in the current buffer are now processed. Destroy the buffer and move onto the + // next buffer in the list. + allocator_.deallocate(*it, size_); + it = buffer_list_.erase(it); + } + } + + template Entry& Emplace(Args... args) { + return *new (GetEntry()) Entry(std::forward(args)...); + } + + private: + Entry* GetEntry() { + auto current = write_index_.load(std::memory_order_relaxed); + + while (true) { + // If the pointer is at the end of the current buffer, switch to the available free buffer and + // notify the worker thread to allocate a new buffer. + if (current.index != 0 && current.index % size_ == 0) { + std::lock_guard lock(write_mutex_); + + // If the worker thread wasn't already started, start it now. This avoids starting a new + // thread when the trace buffer is created. + if (!worker_thread_) { + std::promise ready; + auto future = ready.get_future(); + { + std::lock_guard worker_lock(worker_mutex_); + worker_thread_.emplace(&TraceBuffer::WorkerThreadLoop, this, std::move(ready)); + } + future.wait(); + } + + // Re-check the pointer overflow under the writer lock, another thread could have beaten us + // to it and already bumped the write_index_. + current = write_index_.load(std::memory_order_relaxed); + if (current.index % size_ == 0) { + std::unique_lock worker_lock(worker_mutex_); + + // Wait for the free buffer to become available. + worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; }); + + current.buffer = free_buffer_; + buffer_list_.push_back(current.buffer); + write_index_.store({current.index + 1, current.buffer}, std::memory_order_relaxed); + + // Tell the worker thread to allocate a new free buffer. + free_buffer_ = nullptr; + worker_cond_.notify_one(); + + // We successfully allocated a new buffer, return the first element. + return ¤t.buffer[0]; + } + } + + if (write_index_.compare_exchange_weak(current, {current.index + 1, current.buffer}, + std::memory_order_relaxed)) + return ¤t.buffer[current.index % size_]; + } + } + + void AllocateFreeBuffer() { + assert(free_buffer_ == nullptr); + + free_buffer_ = allocator_.allocate(size_); + assert(free_buffer_ != nullptr); + + for (size_t i = 0; i < size_; ++i) + free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed); + } + + void WorkerThreadLoop(std::promise ready) { + std::unique_lock lock(worker_mutex_); + + // This worker thread is now ready to accept work. + ready.set_value(); + + while (true) { + worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; }); + if (!worker_thread_) break; + AllocateFreeBuffer(); + worker_cond_.notify_one(); + } + } + + // The WriteIndex is used to store both the index and the buffer associated with that index (the + // buffer contains the trace buffer records at [index - index % size, index - index % size_t + + // size_ - 1]) in a single atomic variable. + struct WriteIndex { + uint64_t index; + Entry* buffer; + }; + + const callback_t flush_callback_; + const uint64_t size_; + + uint64_t read_index_; // The index of the next record to flush. + std::atomic write_index_; // The index of the next record that could be written. + Entry* free_buffer_{nullptr}; // The next available free buffer. + + std::optional worker_thread_; + std::mutex worker_mutex_; + std::condition_variable worker_cond_; + + std::mutex write_mutex_; + std::list buffer_list_; + Allocator allocator_; +}; +} // namespace rocprofiler + +#define TRACE_BUFFER_INSTANTIATE() \ + rocprofiler::TraceBufferBase* rocprofiler::TraceBufferBase::head_ = nullptr; \ + std::mutex rocprofiler::TraceBufferBase::mutex_; + +#endif // TOOL_TRACE_BUFFER_H_