From f47df6108b8fb4d07b400fcd1fd42191ed8760df Mon Sep 17 00:00:00 2001 From: "U, Srihari" Date: Thu, 24 Jul 2025 12:11:36 +0530 Subject: [PATCH] [rocprofv3] rocpd doesn't generate output files for counter collection (#480) * Fix kernel dispatch for counter collection * Updated change log * Fix format * rename output csv file * Fix warnings * Address review comment * Address final review comment [ROCm/rocprofiler-sdk commit: 3a36fd13fed56f9b00ba452d23a27365e892e89e] --- projects/rocprofiler-sdk/CHANGELOG.md | 1 + .../source/lib/output/generateRocpd.cpp | 242 ++++++++++-------- .../source/lib/python/rocpd/source/csv.cpp | 2 +- 3 files changed, 141 insertions(+), 104 deletions(-) diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index 5c992ae193..6550fc14d8 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -216,6 +216,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - Code object disassembly was missing function inlining information - Fixed queue preemption error and HSA_STATUS_ERROR_INVALID_PACKET_FORMAT error for stochastic PC-sampling for MI300X, leading to more stable runs. - Fixed the system hang issue for host-trap PC-sampling on MI300X. +- Fixed rocpd counter collection issue when counter collection alone is enabled, rocpd_kernel_dispatch table gets populated by counters data instead of kernel_dispatch data. ### Removed diff --git a/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp b/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp index 6899919ade..49970e4792 100644 --- a/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp +++ b/projects/rocprofiler-sdk/source/lib/output/generateRocpd.cpp @@ -970,46 +970,45 @@ write_rocpd( } }; - auto insert_kernel_dispatch_data = [&conn, &tool_metadata, &string_entries, node_id, this_pid]( - const auto& _gen, auto& dispatch_evt_ids) { + auto insert_kernel_dispatch_data = [&, node_id, this_pid](auto& dispatch_evt_ids) { auto _sqlgenperf_rocpd = get_simple_timer("rocpd_kernel_dispatch"); - for(auto pitr : _gen) - { - auto _deferred = sql::deferred_transaction{conn}; - for(auto itr : _gen.get(pitr)) + auto process_dispatch = [&](uint64_t dispatch_id, + uint64_t kernel_id, + const auto& corr_id, + const auto& info, + const auto& kind, + uint32_t thread_id, + uint64_t queue_id, + uint64_t stream_id, + uint64_t start_timestamp, + uint64_t end_timestamp, + const auto& grid, + const auto& workgroup, + bool enable_duplicate_check) { + // Skip if we've already processed this dispatch_id + if(dispatch_evt_ids.size() > dispatch_id && dispatch_evt_ids[dispatch_id] != 0) return; + + auto kern_name = (kernel_id > 0) + ? tool_metadata.get_kernel_symbol(kernel_id)->formatted_kernel_name + : "unknown_kernel"; + + auto evt_id = create_event(conn, + { + insert_value("category_id", string_entries.at(kind)), + insert_value("stack_id", corr_id.internal), + insert_value("parent_stack_id", corr_id.internal), + insert_value("correlation_id", corr_id.external.value), + }); + + // Ensure dispatch_evt_ids is large enough + if(dispatch_evt_ids.size() < dispatch_id + 1) + common::container::resize(dispatch_evt_ids, dispatch_id + 1, 0UL); + + // Check for duplicates if requested + if(enable_duplicate_check && dispatch_evt_ids.at(dispatch_id) != 0) { - // insert thread info if it doesn't already exist - get_thread_id(itr.thread_id); - - auto kind = tool_metadata.buffer_names.at(itr.kind); - auto info = itr.dispatch_info; - auto kernel_id = info.kernel_id; - auto dispatch_id = info.dispatch_id; - auto corr_id = itr.correlation_id; - auto grid = info.grid_size; - auto workgroup = info.workgroup_size; - auto kern_name = tool_metadata.get_kernel_symbol(kernel_id)->formatted_kernel_name; - auto stream_id = get_stream_id(itr.stream_id); - auto queue_id = get_queue_id(info.queue_id); - auto region_name = - (corr_id.external.value > 0) - ? tool_metadata.get_kernel_name(kernel_id, corr_id.external.value) - : std::string_view{}; - - auto evt_id = - create_event(conn, - { - insert_value("category_id", string_entries.at(kind)), - insert_value("stack_id", corr_id.internal), - insert_value("parent_stack_id", corr_id.internal), - insert_value("correlation_id", corr_id.external.value), - }); - - if(dispatch_evt_ids.size() < dispatch_id + 1) - common::container::resize(dispatch_evt_ids, dispatch_id + 1, 0UL); - - ROCP_CI_LOG_IF(WARNING, dispatch_evt_ids.at(dispatch_id) != 0) + ROCP_CI_LOG(WARNING) << fmt::format("duplicate kernel dispatch id {} :: event_id={}, kernel_id={}, " "corr_id={}, name='{}'", dispatch_id, @@ -1017,45 +1016,115 @@ write_rocpd( kernel_id, corr_id.internal, kern_name); + } - dispatch_evt_ids.at(dispatch_id) = evt_id; + dispatch_evt_ids.at(dispatch_id) = evt_id; - auto stmt = get_insert_statement( - "rocpd_kernel_dispatch{{uuid}}", - { - insert_value("id", dispatch_id), - insert_value("nid", node_id), - insert_value("pid", this_pid), - insert_value("tid", itr.thread_id), - insert_value("agent_id", tool_metadata.get_agent(info.agent_id)->node_id), - insert_value("kernel_id", kernel_id), - insert_value("dispatch_id", dispatch_id), - insert_value("queue_id", queue_id), - insert_value("stream_id", stream_id), - insert_value("start", itr.start_timestamp), - insert_value("end", itr.end_timestamp), - insert_value("private_segment_size", info.private_segment_size), - insert_value("group_segment_size", info.group_segment_size), - insert_value("workgroup_size_x", workgroup.x), - insert_value("workgroup_size_y", workgroup.y), - insert_value("workgroup_size_z", workgroup.z), - insert_value("grid_size_x", grid.x), - insert_value("grid_size_y", grid.y), - insert_value("grid_size_z", grid.z), - insert_value("region_name_id", string_entries.at(region_name)), - insert_value("event_id", evt_id), - }); + auto region_name = + (corr_id.external.value > 0 && + (enable_duplicate_check || kernel_id > 0)) + ? tool_metadata.get_kernel_name(kernel_id, corr_id.external.value) + : std::string_view{}; - execute_raw_sql_statements(conn, stmt); + auto agent_node_id = tool_metadata.get_agent(info.agent_id)->node_id; + + // Insert into kernel dispatch table + auto stmt = get_insert_statement( + "rocpd_kernel_dispatch{{uuid}}", + { + insert_value("id", dispatch_id), + insert_value("nid", node_id), + insert_value("pid", this_pid), + insert_value("tid", thread_id), + insert_value("agent_id", agent_node_id), + insert_value("kernel_id", kernel_id), + insert_value("dispatch_id", dispatch_id), + insert_value("queue_id", queue_id), + insert_value("stream_id", stream_id), + insert_value("start", start_timestamp), + insert_value("end", end_timestamp), + insert_value("private_segment_size", info.private_segment_size), + insert_value("group_segment_size", info.group_segment_size), + insert_value("workgroup_size_x", workgroup.x), + insert_value("workgroup_size_y", workgroup.y), + insert_value("workgroup_size_z", workgroup.z), + insert_value("grid_size_x", grid.x), + insert_value("grid_size_y", grid.y), + insert_value("grid_size_z", grid.z), + insert_value("region_name_id", string_entries.at(region_name)), + insert_value("event_id", evt_id), + }); + + execute_raw_sql_statements(conn, stmt); + }; + + if(kernel_dispatch_gen.empty()) + { + for(auto pctr : counter_collection_gen) + { + auto _deferred = sql::deferred_transaction{conn}; + for(const auto& record : counter_collection_gen.get(pctr)) + { + const auto& dispatch_data = record.dispatch_data; + const auto& info = dispatch_data.dispatch_info; + + // Register thread ID + get_thread_id(record.thread_id); + + // Use buffer category for kernel dispatches + auto kind = + tool_metadata.buffer_names.at(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH); + + // Process this dispatch + process_dispatch(info.dispatch_id, // dispatch_id + info.kernel_id, // kernel_id + dispatch_data.correlation_id, // corr_id + info, // info + kind, // kind + record.thread_id, // thread_id + get_queue_id(info.queue_id), // queue_id + get_stream_id(record.stream_id), // stream_id + dispatch_data.start_timestamp, // start_timestamp + dispatch_data.end_timestamp, // end_timestamp + info.grid_size, // grid + info.workgroup_size, // workgroup + false // enable_duplicate_check + ); + } + } + } + else + { + for(auto pitr : kernel_dispatch_gen) + { + auto _deferred = sql::deferred_transaction{conn}; + for(auto itr : kernel_dispatch_gen.get(pitr)) + { + // Register thread ID + get_thread_id(itr.thread_id); + + // Process this dispatch + process_dispatch(itr.dispatch_info.dispatch_id, // dispatch_id + itr.dispatch_info.kernel_id, // kernel_id + itr.correlation_id, // corr_id + itr.dispatch_info, // info + tool_metadata.buffer_names.at(itr.kind), // kind + itr.thread_id, // thread_id + get_queue_id(itr.dispatch_info.queue_id), // queue_id + get_stream_id(itr.stream_id), // stream_id + itr.start_timestamp, // start_timestamp + itr.end_timestamp, // end_timestamp + itr.dispatch_info.grid_size, // grid + itr.dispatch_info.workgroup_size, // workgroup + true // enable_duplicate_check + ); + } } } }; - auto insert_pmc_event_data = [&conn, - &tool_metadata, - &string_entries, - &counter_collection_gen, - &kernel_dispatch_gen](auto& dispatch_evt_ids) { + auto insert_pmc_event_data = [&conn, &tool_metadata, &counter_collection_gen]( + auto& dispatch_evt_ids) { auto _sqlgenperf_rocpd = get_simple_timer("rocpd_pmc_event"); size_t idx = tool_metadata.pmc_event_offset; for(auto ditr : counter_collection_gen) @@ -1066,39 +1135,6 @@ write_rocpd( const auto& info = record.dispatch_data.dispatch_info; auto dispatch_id = info.dispatch_id; - if(kernel_dispatch_gen.empty()) - { - auto kind = - tool_metadata.buffer_names.at(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH); - auto kernel_id = info.kernel_id; - auto corr_id = record.dispatch_data.correlation_id; - auto kern_name = - tool_metadata.get_kernel_symbol(kernel_id)->formatted_kernel_name; - - auto evt_id = - create_event(conn, - { - insert_value("category_id", string_entries.at(kind)), - insert_value("stack_id", corr_id.internal), - insert_value("parent_stack_id", corr_id.internal), - insert_value("correlation_id", corr_id.external.value), - }); - - if(dispatch_evt_ids.size() < dispatch_id + 1) - common::container::resize(dispatch_evt_ids, dispatch_id + 1, 0UL); - - ROCP_CI_LOG_IF(WARNING, dispatch_evt_ids.at(dispatch_id) != 0) << fmt::format( - "duplicate kernel dispatch id {} :: event_id={}, kernel_id={}, " - "corr_id={}, name='{}'", - dispatch_id, - evt_id, - kernel_id, - corr_id.internal, - kern_name); - - dispatch_evt_ids.at(dispatch_id) = evt_id; - } - auto evt_id = dispatch_evt_ids.at(dispatch_id); for(const auto& count : record.read()) { @@ -1377,7 +1413,7 @@ write_rocpd( insert_api_data(rocdecode_api_gen); } - insert_kernel_dispatch_data(kernel_dispatch_gen, dispatch_to_evt_id); + insert_kernel_dispatch_data(dispatch_to_evt_id); insert_pmc_event_data(dispatch_to_evt_id); insert_memory_copy_data(memory_copy_gen); diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp index 5be3f97977..0e011e0663 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/csv.cpp @@ -109,7 +109,7 @@ CsvManager::CsvManager(rocprofiler::tool::output_config output_cfg) {CsvType::ROCJPEG_API, {"rocjpeg_api_trace.csv", API_TRACE_HEADER}}, {CsvType::COUNTER, - {"counter_trace.csv", + {"counter_collection.csv", "\"Pid\",\"Correlation_Id\",\"Dispatch_Id\",\"Agent_Id\",\"Queue_Id\"," "\"Process_Id\"," "\"Thread_Id\","