From 8abb65b166467f205d9864bb93cb03484e76655a Mon Sep 17 00:00:00 2001 From: "Baraldi, Giovanni" Date: Sat, 11 Jan 2025 00:43:06 +0100 Subject: [PATCH] Adding source snapshot and partial serialization (#99) * Adding source snapshot * Adding option to serialize only on target kernel * Fix for tidy * Formatting * Testing the new flag --------- Co-authored-by: Giovanni Baraldi [ROCm/rocprofiler-sdk commit: a2fa188e140629b5ca8cb642c10b3b1d61d539bd] --- .../amd_detail/thread_trace_core.h | 5 +- .../source/lib/rocprofiler-sdk-att/code.cpp | 43 ++++++++++++++++ .../rocprofiler-sdk/thread_trace/att_core.cpp | 49 +++++++++++-------- .../rocprofiler-sdk/thread_trace/att_core.hpp | 8 +-- .../thread_trace/att_service.cpp | 4 ++ .../tests/thread-trace/agent.cpp | 1 + .../tests/thread-trace/multi_dispatch.cpp | 1 + 7 files changed, 85 insertions(+), 26 deletions(-) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/amd_detail/thread_trace_core.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/amd_detail/thread_trace_core.h index 7a96cd6abe..157ae998be 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/amd_detail/thread_trace_core.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/amd_detail/thread_trace_core.h @@ -42,8 +42,9 @@ typedef enum ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, ///< Bitmask of shader engines. ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs - ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, - ROCPROFILER_ATT_PARAMETER_PERFCOUNTER, + ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, ///< Period [1,32] or disable (0) perfmon + ROCPROFILER_ATT_PARAMETER_PERFCOUNTER, ///< Perfmon ID and SIMD mask + ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, ///< Serializes kernels not under thread trace ROCPROFILER_ATT_PARAMETER_LAST } rocprofiler_att_parameter_type_t; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/code.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/code.cpp index e4f2b2a089..fc90fa3740 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/code.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-att/code.cpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace rocprofiler { @@ -34,6 +35,19 @@ namespace att_wrapper { #define ATT_CSV_NAME "att_output.csv" +// Builds a json filetree by recursively inserting "path" into the json object. +void +navigate(nlohmann::json& json, std::vector& path, const std::string& filename) +{ + if(path.size() == 1) json[path.at(0)] = filename; + + if(path.size() <= 1) return; + + auto& j = json[path.at(0)]; + path.erase(path.begin()); + navigate(j, path, filename); +} + CodeFile::CodeFile(const Fspath& _dir, std::shared_ptr _table) : dir(_dir) , filename(_dir / "code.json") @@ -90,6 +104,8 @@ CodeFile::~CodeFile() nlohmann::json jcode; + std::unordered_set snapshots{}; + for(auto& line : vec) { auto& isa = *line.second; @@ -110,6 +126,14 @@ CodeFile::~CodeFile() << ", " << isa.hitcount << ", " << isa.latency << "]"; jcode.push_back(nlohmann::json::parse(code.str())); + + size_t lineref = isa.code_line->comment.rfind(':'); + if(lineref == 0 || lineref == std::string::npos) continue; + + auto source_ref = isa.code_line->comment.substr(0, lineref); + + if(!source_ref.empty() && snapshots.find(source_ref) == snapshots.end()) + snapshots.insert(std::move(source_ref)); } nlohmann::json json; @@ -117,6 +141,25 @@ CodeFile::~CodeFile() json["version"] = TOOL_VERSION; OutputFile(filename) << json; + + nlohmann::json jsnapfiletree; + size_t num_snap = 0; + + for(auto& source_ref : snapshots) + { + if(rocprofiler::common::filesystem::exists(source_ref)) + { + Fspath filepath(source_ref); + std::stringstream newfile; + newfile << "source_" << (num_snap++) << '_' << filepath.filename().string(); + + std::vector path_elements(filepath.begin(), filepath.end()); + navigate(jsnapfiletree, path_elements, newfile.str()); + rocprofiler::common::filesystem::copy(filepath, dir / newfile.str()); + } + } + + if(num_snap != 0) OutputFile(dir / "snapshots.json") << jsnapfiletree; } } // namespace att_wrapper diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp index c84b169a1e..6e32802a0c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp @@ -355,7 +355,7 @@ DispatchThreadTracer::pre_kernel_call(const hsa::Queue& queue, if(control_flags == ROCPROFILER_ATT_CONTROL_NONE) { auto empty = std::make_unique(); - maybe_add_serialization(empty); + if(params.bSerialize) maybe_add_serialization(empty); return empty; } @@ -396,7 +396,8 @@ void DispatchThreadTracer::post_kernel_call(DispatchThreadTracer::inst_pkt_t& aql, const hsa::Queue::queue_info_session_t& session) { - SignalSerializerExit signal(session); + std::unique_ptr signal{nullptr}; + if(params.bSerialize) signal = std::make_unique(session); if(post_move_data.load() < 1) return; @@ -413,6 +414,8 @@ DispatchThreadTracer::post_kernel_call(DispatchThreadTracer::inst_pkt_t& a auto it = agents.find(pkt->GetAgent()); if(it != agents.end() && it->second != nullptr) it->second->iterate_data(pkt->GetHandle(), session.user_data); + + if(!signal) std::make_unique(session); } } @@ -420,28 +423,33 @@ void DispatchThreadTracer::start_context() { using corr_id_map_t = hsa::Queue::queue_info_session_t::external_corr_id_map_t; + CHECK_NOTNULL(hsa::get_queue_controller())->enable_serialization(); // Only one thread should be attempting to enable/disable this context client.wlock([&](auto& client_id) { if(client_id) return; - client_id = hsa::get_queue_controller()->add_callback( - std::nullopt, - [=](const hsa::Queue& q, - const hsa::rocprofiler_packet& /* kern_pkt */, - rocprofiler_kernel_id_t kernel_id, - rocprofiler_dispatch_id_t dispatch_id, - rocprofiler_user_data_t* user_data, - const corr_id_map_t& /* extern_corr_ids */, - const context::correlation_id* corr_id) { - return this->pre_kernel_call(q, kernel_id, dispatch_id, user_data, corr_id); - }, - [=](const hsa::Queue& /* q */, - hsa::rocprofiler_packet /* kern_pkt */, - std::shared_ptr& session, - inst_pkt_t& aql, - kernel_dispatch::profiling_time) { this->post_kernel_call(aql, *session); }); + client_id = + CHECK_NOTNULL(hsa::get_queue_controller()) + ->add_callback( + std::nullopt, + [=](const hsa::Queue& q, + const hsa::rocprofiler_packet& /* kern_pkt */, + rocprofiler_kernel_id_t kernel_id, + rocprofiler_dispatch_id_t dispatch_id, + rocprofiler_user_data_t* user_data, + const corr_id_map_t& /* extern_corr_ids */, + const context::correlation_id* corr_id) { + return this->pre_kernel_call(q, kernel_id, dispatch_id, user_data, corr_id); + }, + [=](const hsa::Queue& /* q */, + hsa::rocprofiler_packet /* kern_pkt */, + std::shared_ptr& session, + inst_pkt_t& aql, + kernel_dispatch::profiling_time) { + this->post_kernel_call(aql, *session); + }); }); } @@ -452,12 +460,11 @@ DispatchThreadTracer::stop_context() // NOLINT(readability-convert-member-funct if(!client_id) return; // Remove our callbacks from HSA's queue controller - hsa::get_queue_controller()->remove_callback(*client_id); + CHECK_NOTNULL(hsa::get_queue_controller())->remove_callback(*client_id); client_id = std::nullopt; }); - auto* controller = hsa::get_queue_controller(); - if(controller) controller->disable_serialization(); + CHECK_NOTNULL(hsa::get_queue_controller())->disable_serialization(); } void diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp index b8491aada9..174f1dafc1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp @@ -67,6 +67,8 @@ struct thread_trace_parameter_pack uint64_t shader_engine_mask = DEFAULT_SE_MASK; uint64_t buffer_size = DEFAULT_BUFFER_SIZE; + bool bSerialize = false; + // GFX9 Only std::vector> perfcounters; @@ -155,12 +157,12 @@ public: void post_kernel_call(inst_pkt_t& aql, const hsa::queue_info_session& session); - std::unordered_map> agents; + std::unordered_map> agents{}; - std::shared_mutex agents_map_mut; + std::shared_mutex agents_map_mut{}; std::atomic post_move_data{0}; - thread_trace_parameter_pack params; + thread_trace_parameter_pack params{}; }; class AgentThreadTracer diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp index cb864bd190..70d749d750 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp @@ -85,6 +85,7 @@ rocprofiler_configure_dispatch_thread_trace_service( case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL: pack.perfcounter_ctrl = param.value; break; + case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL: pack.bSerialize = param.value != 0; break; case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; } } @@ -144,6 +145,9 @@ rocprofiler_configure_agent_thread_trace_service( case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL: pack.perfcounter_ctrl = param.value; break; + case ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL: + if(param.value != 0) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + break; case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; } } diff --git a/projects/rocprofiler-sdk/tests/thread-trace/agent.cpp b/projects/rocprofiler-sdk/tests/thread-trace/agent.cpp index d81c31ecbe..a12ddf5189 100644 --- a/projects/rocprofiler-sdk/tests/thread-trace/agent.cpp +++ b/projects/rocprofiler-sdk/tests/thread-trace/agent.cpp @@ -120,6 +120,7 @@ query_available_agents(rocprofiler_agent_version_t /* version */, parameters.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, 0xF}); parameters.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, 0x6000000}); parameters.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, 0x11}); + parameters.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 0}); ROCPROFILER_CALL( rocprofiler_configure_agent_thread_trace_service(agent_ctx, diff --git a/projects/rocprofiler-sdk/tests/thread-trace/multi_dispatch.cpp b/projects/rocprofiler-sdk/tests/thread-trace/multi_dispatch.cpp index dc2522ce32..e1630edf9d 100644 --- a/projects/rocprofiler-sdk/tests/thread-trace/multi_dispatch.cpp +++ b/projects/rocprofiler-sdk/tests/thread-trace/multi_dispatch.cpp @@ -71,6 +71,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* tool_data) "code object tracing service configure"); std::vector params{}; + params.push_back({ROCPROFILER_ATT_PARAMETER_SERIALIZE_ALL, 1}); ROCPROFILER_CALL( rocprofiler_configure_dispatch_thread_trace_service(client_ctx,