From d89a46ca2ec097ef7117a080e548dd16387a58f0 Mon Sep 17 00:00:00 2001 From: Benjamin Welton Date: Fri, 18 Oct 2024 15:48:22 -0700 Subject: [PATCH] Agent Profiling Fixes for Broken/Improper API Usage (#1122) Prevent's multiple setups of agent profiling on the same agent. Fixes agent read context to only read agents that were setup. Prevent copy of agent profiling internal data struct and reset hsa_signal on move to prevent inadvertant delete. [ROCm/rocprofiler-sdk commit: 788e68716747b1f812748031cafada3ac4bad05a] --- .../include/rocprofiler-sdk/device_counting_service.h | 2 ++ .../source/lib/rocprofiler-sdk/context/context.hpp | 1 + .../source/lib/rocprofiler-sdk/counters/controller.cpp | 5 +++++ .../lib/rocprofiler-sdk/counters/device_counting.cpp | 4 +++- .../lib/rocprofiler-sdk/counters/device_counting.hpp | 8 +++++++- 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/device_counting_service.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/device_counting_service.h index a0331be8f6..9bc7253446 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/device_counting_service.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/device_counting_service.h @@ -84,6 +84,8 @@ typedef void (*rocprofiler_device_counting_service_callback_t)( * @return ::rocprofiler_status_t * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist. * @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Returned if the buffer is not found. + * @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT Returned if context already has agent + * profiling configured for agent_id. * @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured */ rocprofiler_status_t diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp index dd5cfb78fc..92a9f89eca 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/context/context.hpp @@ -91,6 +91,7 @@ struct dispatch_counter_collection_service struct device_counting_service { + std::unordered_set conf_agents; std::vector agent_data; enum class state diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/controller.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/controller.cpp index 60f59c7cab..843262b5e2 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/controller.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/controller.cpp @@ -92,6 +92,11 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t context_i std::make_unique(); } + if(ctx.device_counter_collection->conf_agents.emplace(agent_id.handle).second == false) + { + return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + } + ctx.device_counter_collection->agent_data.emplace_back(); ctx.device_counter_collection->agent_data.back().callback_data = rocprofiler_user_data_t{.ptr = user_data}; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.cpp index 06e5b8a2af..c06c6b532c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.cpp @@ -280,6 +280,7 @@ read_agent_ctx(const context::context* ctx, for(auto& callback_data : agent_ctx.agent_data) { + if(!callback_data.profile || !callback_data.set_profile) continue; const auto* agent = agent::get_agent_cache(callback_data.profile->agent); // If the agent no longer exists or we don't have a profile queue, reading is an error @@ -402,6 +403,8 @@ start_agent_ctx(const context::context* ctx) break; } + callback_data.set_profile = false; + // Ask the tool what profile we should use for this agent callback_data.cb( {.handle = ctx->context_idx}, @@ -455,7 +458,6 @@ start_agent_ctx(const context::context* ctx) continue; } - callback_data.set_profile = false; CHECK(callback_data.profile); // Generate necessary structures in the context (packet gen, etc) to process diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.hpp index 8436bbc291..6a1a738b8f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/device_counting.hpp @@ -73,7 +73,13 @@ struct agent_callback_data , agent_id(rhs.agent_id) , cb(rhs.cb) , buffer(rhs.buffer) - {} + { + rhs.completion.handle = 0; + rhs.start_signal.handle = 0; + } + + agent_callback_data& operator=(const agent_callback_data&) = delete; + agent_callback_data(const agent_callback_data&) = delete; ~agent_callback_data(); };