diff --git a/samples/counter_collection/CMakeLists.txt b/samples/counter_collection/CMakeLists.txt index bb2631b755..128440e3f4 100644 --- a/samples/counter_collection/CMakeLists.txt +++ b/samples/counter_collection/CMakeLists.txt @@ -106,3 +106,29 @@ set_tests_properties( PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT "${counter-collection-functional-counter-env}" FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}") + +add_library(counter-collection-agent-profiling-client SHARED) +target_sources(counter-collection-agent-profiling-client PRIVATE agent_profiling.cpp + client.hpp) +target_link_libraries( + counter-collection-agent-profiling-client + PUBLIC rocprofiler::samples-build-flags + PRIVATE rocprofiler-sdk::rocprofiler-sdk rocprofiler::samples-common-library) + +add_executable(counter-collection-agent-profiling) +target_sources(counter-collection-agent-profiling PRIVATE main.cpp) +target_link_libraries(counter-collection-agent-profiling + PRIVATE counter-collection-agent-profiling-client Threads::Threads) + +rocprofiler_samples_get_preload_env(PRELOAD_ENV counter-collection-agent-profiling-client) + +set(counter-collection-functional-counter-env "${PRELOAD_ENV}" "${LIBRARY_PATH_ENV}") + +add_test(NAME counter-collection-agent-profiling + COMMAND $) + +set_tests_properties( + counter-collection-agent-profiling + PROPERTIES TIMEOUT 120 LABELS "samples" ENVIRONMENT + "${counter-collection-functional-counter-env}" FAIL_REGULAR_EXPRESSION + "${ROCPROFILER_DEFAULT_FAIL_REGEX}") diff --git a/samples/counter_collection/agent_profiling.cpp b/samples/counter_collection/agent_profiling.cpp new file mode 100644 index 0000000000..b9a3435c72 --- /dev/null +++ b/samples/counter_collection/agent_profiling.cpp @@ -0,0 +1,347 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "client.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ + << " failed with error code " << CHECKSTATUS << ": " << status_msg \ + << std::endl; \ + std::stringstream errmsg{}; \ + errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ + << status_msg << ")"; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } + +int +start() +{ + return 1; +} + +namespace +{ +rocprofiler_context_id_t& +get_client_ctx() +{ + static rocprofiler_context_id_t ctx; + return ctx; +} + +rocprofiler_buffer_id_t& +get_buffer() +{ + static rocprofiler_buffer_id_t buf = {}; + return buf; +} + +/** + * Buffer callback called when the buffer is full. rocprofiler_record_header_t + * can contain counter records as well as other records (such as tracing). These + * records need to be filtered based on the category type. + */ +void +buffered_callback(rocprofiler_context_id_t, + rocprofiler_buffer_id_t, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* user_data, + uint64_t) +{ + std::stringstream ss; + // Iterate through the returned records + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER) + {} + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_VALUE) + { + // Print the returned counter data. + auto* record = static_cast(header->payload); + ss << " (Id: " << record->id << " Value [D]: " << record->counter_value << "," + << " user_data: " << record->user_data.value << "),"; + } + } + + auto* output_stream = static_cast(user_data); + if(!output_stream) throw std::runtime_error{"nullptr to output stream"}; + + *output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n"; +} + +std::unordered_map& +get_profile_cache() +{ + static std::unordered_map profile_cache; + return profile_cache; +} + +/** + * Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue. + * rocprofiler_profile_config_id_t* is a return to specify what counters to collect + * for this dispatch (dispatch_packet). This example function creates a profile + * to collect the counter SQ_WAVES for all kernel dispatch packets. + */ +void +set_profile(rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t agent, + rocprofiler_agent_set_profile_callback_t set_config, + void*) +{ + /** + * This simple example uses the same profile counter set for all agents. + * We store this in a cache to prevent constructing many identical profile counter + * sets. We first check the cache to see if we have already constructed a counter" + * set for the agent. If we have, return it. Otherwise, construct a new profile counter + * set. + */ + auto search_cache = [&]() { + if(auto pos = get_profile_cache().find(agent.handle); pos != get_profile_cache().end()) + { + set_config(context_id, pos->second); + return true; + } + return false; + }; + + if(!search_cache()) + { + std::cerr << "No profile for agent found in cache\n"; + exit(-1); + } +} + +rocprofiler_profile_config_id_t +build_profile_for_agent(rocprofiler_agent_id_t agent) +{ + std::set counters_to_collect = {"SQ_WAVES"}; + std::vector gpu_counters; + + ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( + agent, + [](rocprofiler_agent_id_t, + rocprofiler_counter_id_t* counters, + size_t num_counters, + void* user_data) { + std::vector* vec = + static_cast*>(user_data); + for(size_t i = 0; i < num_counters; i++) + { + vec->push_back(counters[i]); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + static_cast(&gpu_counters)), + "Could not fetch supported counters"); + + std::vector collect_counters; + for(auto& counter : gpu_counters) + { + rocprofiler_counter_info_v0_t version; + ROCPROFILER_CALL( + rocprofiler_query_counter_info( + counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast(&version)), + "Could not query info for counter"); + if(counters_to_collect.count(std::string(version.name)) > 0) + { + std::clog << "Counter: " << counter.handle << " " << version.name << "\n"; + collect_counters.push_back(counter); + } + } + + rocprofiler_profile_config_id_t profile; + ROCPROFILER_CALL(rocprofiler_create_profile_config( + agent, collect_counters.data(), collect_counters.size(), &profile), + "Could not construct profile cfg"); + + return profile; +} + +std::atomic& +exit_toggle() +{ + static std::atomic exit_toggle = false; + return exit_toggle; +} + +int +tool_init(rocprofiler_client_finalize_t, void* user_data) +{ + ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed"); + + ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), + 4096, + 2048, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + buffered_callback, + user_data, + &get_buffer()), + "buffer creation failed"); + + std::vector agents; + rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver, + const void** agents_arr, + size_t num_agents, + void* udata) { + if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0) + throw std::runtime_error{"unexpected rocprofiler agent version"}; + auto* agents_v = static_cast*>(udata); + for(size_t i = 0; i < num_agents; ++i) + agents_v->emplace_back(*static_cast(agents_arr[i])); + return ROCPROFILER_STATUS_SUCCESS; + }; + + ROCPROFILER_CALL( + rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, + iterate_cb, + sizeof(rocprofiler_agent_t), + const_cast(static_cast(&agents))), + "query available agents"); + + auto client_thread = rocprofiler_callback_thread_t{}; + ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread), + "failure creating callback thread"); + ROCPROFILER_CALL(rocprofiler_assign_callback_thread(get_buffer(), client_thread), + "failed to assign thread for buffer"); + + // Construct the profiles in advance for each agent that is a GPU + rocprofiler_agent_id_t agent_id; + for(const auto& agent : agents) + { + if(agent.type == ROCPROFILER_AGENT_TYPE_GPU) + { + get_profile_cache().emplace(agent.id.handle, build_profile_for_agent(agent.id)); + agent_id = agent.id; + break; + } + } + + if(agents.empty()) + { + std::cerr << "No agents found" << std::endl; + return 1; + } + + ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service( + get_client_ctx(), get_buffer(), agent_id, set_profile, nullptr), + "Could not setup buffered service"); + + std::thread([=]() { + size_t count = 1; + rocprofiler_start_context(get_client_ctx()); + while(exit_toggle().load() == false) + { + rocprofiler_sample_agent_profile_counting_service( + get_client_ctx(), {.value = count}, ROCPROFILER_COUNTER_FLAG_NONE); + count++; + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + exit_toggle().store(false); + }).detach(); + + // no errors + return 0; +} + +void +tool_fini(void* user_data) +{ + exit_toggle().store(true); + while(exit_toggle().load() == true) + {}; + + std::clog << "In tool fini\n"; + rocprofiler_stop_context(get_client_ctx()); + ROCPROFILER_CALL(rocprofiler_flush_buffer(get_buffer()), "buffer flush"); + + auto* output_stream = static_cast(user_data); + *output_stream << std::flush; + if(output_stream != &std::cout && output_stream != &std::cerr) delete output_stream; +} +} // namespace + +extern "C" rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* id) +{ + // set the client name + id->name = "CounterClientSample"; + + // compute major/minor/patch version info + uint32_t major = version / 10000; + uint32_t minor = (version % 10000) / 100; + uint32_t patch = version % 100; + + // generate info string + auto info = std::stringstream{}; + info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "." + << minor << "." << patch << " (" << runtime_version << ")"; + + std::clog << info.str() << std::endl; + + std::ostream* output_stream = nullptr; + std::string filename = "counter_collection.log"; + if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile; + if(filename == "stdout") + output_stream = &std::cout; + else if(filename == "stderr") + output_stream = &std::cerr; + else + output_stream = new std::ofstream{filename}; + + // create configure data + static auto cfg = + rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t), + &tool_init, + &tool_fini, + static_cast(output_stream)}; + + // return pointer to configure data + return &cfg; +} diff --git a/source/include/rocprofiler-sdk/agent_profile.h b/source/include/rocprofiler-sdk/agent_profile.h index 167eb43ce3..f339770e78 100644 --- a/source/include/rocprofiler-sdk/agent_profile.h +++ b/source/include/rocprofiler-sdk/agent_profile.h @@ -25,36 +25,98 @@ #include #include -ROCPROFILER_EXTERN_C_INIT - /** * @defgroup AGENT_PROFILE_COUNTING_SERVICE Agent Profile Counting Service * @brief needs brief description * * @{ */ +ROCPROFILER_EXTERN_C_INIT /** - * @brief Configure Profile Counting Service for agent. + * @brief Callback to set the profile config for the agent. * * @param [in] context_id context id - * @param [in] buffer_id id of the buffer to use for the counting service * @param [in] config_id Profile config detailing the counters to collect for this kernel * @return ::rocprofiler_status_t + * @retval ::ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND Returned if the config_id is not found + * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the ctx is not valid + * @retval ::ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED Returned if attempting to make this + * call outside of context startup. + * @retval ::ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH Agent of profile does not match agent of the + * context. + * @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured */ -rocprofiler_status_t ROCPROFILER_API -rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_profile_config_id_t config_id); +typedef rocprofiler_status_t (*rocprofiler_agent_set_profile_callback_t)( + rocprofiler_context_id_t context_id, + rocprofiler_profile_config_id_t config_id); /** - * @brief Sample Profile Counting Service for agent. + * @brief Configure Profile Counting Service for agent. Called when the context is started. + * Selects the counters to be used for agent profiling. * - * @param [out] data // It is always a size of one - * @return ::rocprofiler_status_t + * @param [in] context_id context id + * @param [in] agent_id agent id + * @param [in] set_config Function to call to set the profile config (see + * rocprofiler_agent_set_profile_callback_t) + * @param [in] user_data Data supplied to rocprofiler_configure_agent_profile_counting_service */ -rocprofiler_status_t ROCPROFILER_API -rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id); +typedef void (*rocprofiler_agent_profile_callback_t)( + rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_set_profile_callback_t set_config, + void* user_data); + +/** + * @brief Configure Profile Counting Service for agent. There may only be one agent profile + * configured per context and can be only one active context that is profiling a single agent + * at a time. Multiple agent contexts can be started at the same time if they are profiling + * different agents. + * + * @param [in] context_id context id + * @param [in] buffer_id id of the buffer to use for the counting service. When + * rocprofiler_sample_agent_profile_counting_service is called, counter data will be written + * to this buffer. + * @param [in] agent_id agent to configure profiling on. + * @param [in] cb Callback called when the context is started for the tool to specify what + * counters to collect (rocprofiler_profile_config_id_t). + * @param [in] user_data User supplied data to be passed to the callback cb when triggered + * @param [in] config_id Profile config detailing the counters to collect for this kernel + * @return ::rocprofiler_status_t + * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist. + * @retval ::ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND Returned if the buffer is not found. + * @retval ::ROCPROFILER_STATUS_SUCCESS Returned if succesfully configured + */ +rocprofiler_status_t +rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_profile_callback_t cb, + void* user_data) + ROCPROFILER_NONNULL(4) ROCPROFILER_API; + +/** + * @brief Trigger a read of the counter data for the agent profile. The counter data will be + * written to the buffer specified in rocprofiler_configure_agent_profile_counting_service. + * The data in rocprofiler_user_data_t will be written to the buffer along with the counter data. + * flags can be used to specify if this call should be performed asynchronously (default is + * synchronous). + * + * @param [in] context_id context id + * @param [in] user_data User supplied data, included in records outputted to buffer. + * @param [in] flags Flags to specify how the counter data should be collected (defaults to sync). + * @return ::rocprofiler_status_t + * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID Returned if the context does not exist or + * the context is not configured for agent profiling. + * @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR Returned if another operation is in progress ( + * start/stop ctx or another read). + * @retval ::ROCPROFILER_STATUS_ERROR Returned if HSA has not been initialized yet. + * @retval ::ROCPROFILER_STATUS_SUCCESS Returned if read request was successful. + */ +rocprofiler_status_t +rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id, + rocprofiler_user_data_t user_data, + rocprofiler_counter_flag_t flags) ROCPROFILER_API; /** @} */ diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index 0216b47e8a..2e8a5b22d6 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -97,6 +97,11 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND, ///< Could not find the counter profile ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT, ///< Cannot enable both agent and dispatch ///< counting in the same context. + ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT, ///< No agent context found, may not be an error + ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED, ///< Sample rate exceeded + ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE, ///< Profile queue creation failed + ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS, ///< No hardware counters were specified + ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH, ///< Agent mismatch between profile and context. ROCPROFILER_STATUS_LAST, } rocprofiler_status_t; @@ -385,6 +390,16 @@ typedef enum /// ::rocprofiler_profile_counting_dispatch_record_t } rocprofiler_counter_record_kind_t; +/** + * @brief Enumeration of flags that can be used with some counter api calls + */ +typedef enum +{ + ROCPROFILER_COUNTER_FLAG_NONE = 0, + ROCPROFILER_COUNTER_FLAG_ASYNC, ///< Do not wait for completion before returning. + ROCPROFILER_COUNTER_FLAG_LAST, +} rocprofiler_counter_flag_t; + //--------------------------------------------------------------------------------------// // // ALIASES @@ -651,6 +666,7 @@ typedef struct rocprofiler_counter_instance_id_t id; ///< counter identifier double counter_value; ///< counter value rocprofiler_dispatch_id_t dispatch_id; + rocprofiler_user_data_t user_data; /// @var dispatch_id /// @brief A value greater than zero indicates that this counter record is associated with a diff --git a/source/lib/rocprofiler-sdk/agent.cpp b/source/lib/rocprofiler-sdk/agent.cpp index 95b668336f..df37b3eef3 100644 --- a/source/lib/rocprofiler-sdk/agent.cpp +++ b/source/lib/rocprofiler-sdk/agent.cpp @@ -656,8 +656,8 @@ get_agent_topology() auto& get_agent_caches() { - static auto _v = std::vector{}; - return _v; + static auto*& _v = common::static_object>::construct(); + return *_v; } struct agent_pair @@ -920,7 +920,7 @@ construct_agent_cache(::HsaApiTable* table) try { get_agent_caches().emplace_back( - rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_); + rocp_agent, hsa_agent, itr.first, _nearest_cpu, *table->amd_ext_, *table->core_); } catch(std::runtime_error& err) { if(rocp_agent->type == ROCPROFILER_AGENT_TYPE_GPU) diff --git a/source/lib/rocprofiler-sdk/agent_profile.cpp b/source/lib/rocprofiler-sdk/agent_profile.cpp index 7c484d6dc5..35470079a5 100644 --- a/source/lib/rocprofiler-sdk/agent_profile.cpp +++ b/source/lib/rocprofiler-sdk/agent_profile.cpp @@ -22,14 +22,29 @@ #include +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp" #include "lib/rocprofiler-sdk/counters/core.hpp" +#include "rocprofiler-sdk/fwd.h" extern "C" { rocprofiler_status_t ROCPROFILER_API -rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_profile_config_id_t config_id) +rocprofiler_configure_agent_profile_counting_service(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_profile_callback_t cb, + void* user_data) { - return rocprofiler::counters::configure_agent_collection(context_id, buffer_id, config_id); + return rocprofiler::counters::configure_agent_collection( + context_id, buffer_id, agent_id, cb, user_data); +} + +rocprofiler_status_t ROCPROFILER_API +rocprofiler_sample_agent_profile_counting_service(rocprofiler_context_id_t context_id, + rocprofiler_user_data_t user_data, + rocprofiler_counter_flag_t flags) +{ + return rocprofiler::counters::read_agent_ctx( + rocprofiler::context::get_registered_context(context_id), user_data, flags); } } \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/aql/helpers.cpp b/source/lib/rocprofiler-sdk/aql/helpers.cpp index 5eec9dd9e9..0628f4af94 100644 --- a/source/lib/rocprofiler-sdk/aql/helpers.cpp +++ b/source/lib/rocprofiler-sdk/aql/helpers.cpp @@ -26,6 +26,7 @@ #include +#include "lib/common/logging.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/counters/id_decode.hpp" @@ -111,5 +112,54 @@ get_dim_info(rocprofiler_agent_id_t agent, return ROCPROFILER_STATUS_SUCCESS; } + +rocprofiler_status_t +set_profiler_active_on_queue(const AmdExtTable& api, + hsa_amd_memory_pool_t pool, + hsa_agent_t hsa_agent, + const rocprofiler_profile_pkt_cb& packet_submit) +{ + // Inject packet to enable profiling of other process queues on this queue + hsa_ven_amd_aqlprofile_profile_t profile{}; + profile.agent = hsa_agent; + + // Query for cmd buffer size + hsa_ven_amd_aqlprofile_info_type_t info_type = + (hsa_ven_amd_aqlprofile_info_type_t)((int) HSA_VEN_AMD_AQLPROFILE_INFO_ENABLE_CMD); + if(hsa_ven_amd_aqlprofile_get_info(&profile, info_type, nullptr) != HSA_STATUS_SUCCESS) + { + return ROCPROFILER_STATUS_ERROR; + } + // Allocate cmd buffer + const size_t mask = 0x1000 - 1; + auto size = (profile.command_buffer.size + mask) & ~mask; + + if(api.hsa_amd_memory_pool_allocate_fn(pool, size, 0, &profile.command_buffer.ptr) != + HSA_STATUS_SUCCESS) + { + ROCP_WARNING << "Failed to allocate memory to enable profile command on agent, some " + "counters will be unavailable"; + return ROCPROFILER_STATUS_ERROR; + } + if(api.hsa_amd_agents_allow_access_fn(1, &hsa_agent, nullptr, profile.command_buffer.ptr) != + HSA_STATUS_SUCCESS) + { + ROCP_WARNING << "Agent cannot access memory, some counters will be unavailable"; + return ROCPROFILER_STATUS_ERROR; + } + + hsa::rocprofiler_packet packet{}; + if(hsa_ven_amd_aqlprofile_get_info(&profile, info_type, &packet.ext_amd_aql_pm4) != + HSA_STATUS_SUCCESS) + { + ROCP_WARNING << "Failed to generate command packet, some counters will be unavailable"; + return ROCPROFILER_STATUS_ERROR; + } + + packet_submit(packet); + api.hsa_amd_memory_pool_free_fn(profile.command_buffer.ptr); + return ROCPROFILER_STATUS_SUCCESS; +} + } // namespace aql } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/aql/helpers.hpp b/source/lib/rocprofiler-sdk/aql/helpers.hpp index bbaa104e48..802479871a 100644 --- a/source/lib/rocprofiler-sdk/aql/helpers.hpp +++ b/source/lib/rocprofiler-sdk/aql/helpers.hpp @@ -32,11 +32,13 @@ #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/counters/metrics.hpp" +#include "lib/rocprofiler-sdk/hsa/rocprofiler_packet.hpp" namespace rocprofiler { namespace aql { +using rocprofiler_profile_pkt_cb = std::function; // Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID from aqlprofile hsa_ven_amd_aqlprofile_id_query_t get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric); @@ -58,5 +60,11 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id, hsa_agent_t agent, hsa_ven_amd_aqlprofile_event_t event, uint32_t sample_id); + +rocprofiler_status_t +set_profiler_active_on_queue(const AmdExtTable& api, + hsa_amd_memory_pool_t pool, + hsa_agent_t hsa_agent, + const rocprofiler_profile_pkt_cb& packet_submit); } // namespace aql } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/context/context.cpp b/source/lib/rocprofiler-sdk/context/context.cpp index 6a3d3368ed..9ca3797f54 100644 --- a/source/lib/rocprofiler-sdk/context/context.cpp +++ b/source/lib/rocprofiler-sdk/context/context.cpp @@ -317,10 +317,13 @@ start_context(rocprofiler_context_id_t context_id) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED; } + auto status = ROCPROFILER_STATUS_SUCCESS; + if(cfg->counter_collection) rocprofiler::counters::start_context(cfg); if(cfg->thread_trace) cfg->thread_trace->start_context(); + if(cfg->agent_counter_collection) status = rocprofiler::counters::start_agent_ctx(cfg); - return ROCPROFILER_STATUS_SUCCESS; + return status; } rocprofiler_status_t @@ -344,9 +347,16 @@ stop_context(rocprofiler_context_id_t idx) if(nactive > 0) get_num_active_contexts().fetch_sub(1, std::memory_order_release); if(_expected->counter_collection) + { rocprofiler::counters::stop_context(const_cast(_expected)); - else if(_expected->thread_trace) - _expected->thread_trace->stop_context(); + } + + if(_expected->thread_trace) _expected->thread_trace->stop_context(); + + if(_expected->agent_counter_collection) + { + rocprofiler::counters::stop_agent_ctx(const_cast(_expected)); + } return ROCPROFILER_STATUS_SUCCESS; } } diff --git a/source/lib/rocprofiler-sdk/context/context.hpp b/source/lib/rocprofiler-sdk/context/context.hpp index 2ebe9218b5..cb5286a0d7 100644 --- a/source/lib/rocprofiler-sdk/context/context.hpp +++ b/source/lib/rocprofiler-sdk/context/context.hpp @@ -30,9 +30,11 @@ #include "lib/common/synchronized.hpp" #include "lib/rocprofiler-sdk/context/correlation_id.hpp" #include "lib/rocprofiler-sdk/context/domain.hpp" +#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp" #include "lib/rocprofiler-sdk/counters/core.hpp" #include "lib/rocprofiler-sdk/external_correlation.hpp" #include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" +#include "rocprofiler-sdk/agent.h" #include #include @@ -86,12 +88,28 @@ struct dispatch_counter_collection_service struct agent_counter_collection_service { + rocprofiler::counters::agent_callback_data callback_data; + // Signal to manage the startup of the context. Allows us to ensure that + // the AQL packet we inject with start_context() completes before returning + hsa_signal_t start_signal; std::shared_ptr profile; rocprofiler_buffer_id_t buffer; + rocprofiler_agent_id_t agent_id; + rocprofiler_agent_profile_callback_t cb; + void* user_data; // A flag to state wether or not the counter set is currently enabled. This is primarily - // to protect against multithreaded calls to enable a context (and enabling already enabled - // counters). - std::atomic enabled{false}; + // to protect against multithreaded calls to enable a context (and enabling already + // enabled counters). + + enum class state + { + DISABLED, + LOCKED, + ENABLED + }; + std::atomic status{state::DISABLED}; + + common::Synchronized enabled{false}; }; struct context diff --git a/source/lib/rocprofiler-sdk/counters/CMakeLists.txt b/source/lib/rocprofiler-sdk/counters/CMakeLists.txt index 30d9abb67b..6256c85c35 100644 --- a/source/lib/rocprofiler-sdk/counters/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/counters/CMakeLists.txt @@ -1,7 +1,9 @@ -set(ROCPROFILER_LIB_COUNTERS_SOURCES metrics.cpp dimensions.cpp evaluate_ast.cpp core.cpp - id_decode.cpp dispatch_handlers.cpp controller.cpp) -set(ROCPROFILER_LIB_COUNTERS_HEADERS metrics.hpp dimensions.hpp evaluate_ast.hpp core.hpp - id_decode.hpp dispatch_handlers.hpp controller.hpp) +set(ROCPROFILER_LIB_COUNTERS_SOURCES + metrics.cpp dimensions.cpp evaluate_ast.cpp core.cpp id_decode.cpp + dispatch_handlers.cpp controller.cpp agent_profiling.cpp) +set(ROCPROFILER_LIB_COUNTERS_HEADERS + metrics.hpp dimensions.hpp evaluate_ast.hpp core.hpp id_decode.hpp + dispatch_handlers.hpp controller.hpp agent_profiling.hpp) target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_COUNTERS_SOURCES} ${ROCPROFILER_LIB_COUNTERS_HEADERS}) diff --git a/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp b/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp new file mode 100644 index 0000000000..f6c33916dd --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/agent_profiling.cpp @@ -0,0 +1,492 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/rocprofiler-sdk/counters/agent_profiling.hpp" +#include + +#include "lib/common/logging.hpp" +#include "lib/rocprofiler-sdk/buffer.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/counters/controller.hpp" +#include "lib/rocprofiler-sdk/counters/core.hpp" +#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" +#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" +#include "lib/rocprofiler-sdk/hsa/rocprofiler_packet.hpp" +#include "rocprofiler-sdk/fwd.h" + +namespace rocprofiler +{ +namespace counters +{ +std::atomic& +hsa_inited() +{ + static std::atomic inited{false}; + return inited; +} + +uint64_t +submitPacket(const CoreApiTable& table, hsa_queue_t* queue, const void* packet) +{ + const uint32_t pkt_size = 0x40; + + // advance command queue + const uint64_t write_idx = table.hsa_queue_add_write_index_scacq_screl_fn(queue, 1); + while((write_idx - table.hsa_queue_load_read_index_relaxed_fn(queue)) >= queue->size) + { + sched_yield(); + } + + const uint32_t slot_idx = (uint32_t)(write_idx % queue->size); + // NOLINTBEGIN(performance-no-int-to-ptr) + uint32_t* queue_slot = + reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * pkt_size)); + // NOLINTEND(performance-no-int-to-ptr) + + const uint32_t* slot_data = reinterpret_cast(packet); + + // Copy buffered commands into the queue slot. + // Overwrite the AQL invalid header (first dword) last. + // This prevents the slot from being read until it's fully written. + memcpy(&queue_slot[1], &slot_data[1], pkt_size - sizeof(uint32_t)); + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&queue_slot[0]); + header_atomic_ptr->store(slot_data[0], std::memory_order_release); + + // ringdoor bell + table.hsa_signal_store_relaxed_fn(queue->doorbell_signal, write_idx); + + return write_idx; +} + +namespace +{ +uint16_t +header_pkt(hsa_packet_type_t type) +{ + uint16_t header = type << HSA_PACKET_HEADER_TYPE; + header |= 1 << HSA_PACKET_HEADER_BARRIER; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE; + return header; +} + +std::unique_ptr +construct_aql_pkt(const hsa::AgentCache& agent, std::shared_ptr& profile) +{ + if(counter_callback_info::setup_profile_config(agent, profile) != ROCPROFILER_STATUS_SUCCESS) + { + return nullptr; + } + + auto pkts = profile->pkt_generator->construct_packet( + CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table()); + + pkts->start.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + pkts->start.completion_signal.handle = 0; + pkts->stop.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + pkts->read.header = header_pkt(HSA_PACKET_TYPE_VENDOR_SPECIFIC); + return pkts; +} + +bool +agent_async_handler(hsa_signal_value_t /*signal_v*/, void* data) +{ + const auto* ctx = context::get_registered_context({.handle = (uint64_t) data}); + if(!ctx) return false; + + const auto& agent_ctx = *ctx->agent_counter_collection; + const auto& prof_config = agent_ctx.profile; + + // Decode the AQL packet data + auto decoded_pkt = + EvaluateAST::read_pkt(prof_config->pkt_generator.get(), *agent_ctx.callback_data.packet); + EvaluateAST::read_special_counters( + *prof_config->agent, prof_config->required_special_counters, decoded_pkt); + + auto* buf = buffer::get_buffer(agent_ctx.buffer.handle); + if(!buf) + { + ROCP_FATAL << fmt::format("Buffer {} destroyed before record was written", + agent_ctx.buffer.handle); + return false; + } + + // Write out the AQL data to the buffer + for(auto& ast : prof_config->asts) + { + std::vector>> cache; + auto* ret = CHECK_NOTNULL(ast.evaluate(decoded_pkt, cache)); + ast.set_out_id(*ret); + for(auto& val : *ret) + { + val.user_data = agent_ctx.callback_data.user_data; + buf->emplace( + ROCPROFILER_BUFFER_CATEGORY_COUNTERS, ROCPROFILER_COUNTER_RECORD_VALUE, val); + } + } + + // reset the signal to allow another sample to start + agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.callback_data.completion, + 1); + return true; +} + +void +init_callback_data(const rocprofiler::context::context& ctx, const hsa::AgentCache& agent) +{ + // Note: Calls to this function should be protected by agent_ctx.status being set + // to LOCKED by the caller. This is to prevent multiple threads from trying to + // setup the same agent at the same time. + auto& agent_ctx = *ctx.agent_counter_collection; + if(agent_ctx.callback_data.packet) return; + + agent_ctx.callback_data.packet = construct_aql_pkt(agent, agent_ctx.profile); + + if(agent_ctx.callback_data.completion.handle != 0) return; + + // If we do not have a completion handle, this is our first time profiling this agent. + // Setup our shared data structures. + agent_ctx.callback_data.queue = agent.profile_queue(); + + agent_ctx.callback_data.table = CHECK_NOTNULL(hsa::get_queue_controller())->get_core_table(); + + // Tri-state signal + // 1: allow next sample to start + // 0: sample in progress + // -1: sample complete + CHECK_EQ(agent_ctx.callback_data.table.hsa_signal_create_fn( + 1, 0, nullptr, &agent_ctx.callback_data.completion), + HSA_STATUS_SUCCESS); + + // Signal to manage the startup of the context. Allows us to ensure that + // the AQL packet we inject with start_context() completes before returning + CHECK_EQ( + agent_ctx.callback_data.table.hsa_signal_create_fn(1, 0, nullptr, &agent_ctx.start_signal), + HSA_STATUS_SUCCESS); + + // Setup callback + // NOLINTBEGIN(performance-no-int-to-ptr) + CHECK_EQ(CHECK_NOTNULL(hsa::get_queue_controller()) + ->get_ext_table() + .hsa_amd_signal_async_handler_fn(agent_ctx.callback_data.completion, + HSA_SIGNAL_CONDITION_LT, + 0, + agent_async_handler, + (void*) ctx.context_idx), + HSA_STATUS_SUCCESS); + // NOLINTEND(performance-no-int-to-ptr) + + // Set state of the queue to allow profiling (may not be needed since AQL + // may do this in the future). + aql::set_profiler_active_on_queue( + CHECK_NOTNULL(hsa::get_queue_controller())->get_ext_table(), + agent.cpu_pool(), + agent.get_hsa_agent(), + [&](hsa::rocprofiler_packet pkt) { + pkt.ext_amd_aql_pm4.completion_signal = agent_ctx.callback_data.completion; + submitPacket( + agent_ctx.callback_data.table, agent_ctx.callback_data.queue, (void*) &pkt); + if(agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn( + agent_ctx.callback_data.completion, + HSA_SIGNAL_CONDITION_EQ, + 0, + 20000000, + HSA_WAIT_STATE_ACTIVE) != 0) + { + ROCP_FATAL << "Could not set agent to be profiled"; + } + agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn( + agent_ctx.callback_data.completion, 1); + }); +} +} // namespace + +rocprofiler_status_t +read_agent_ctx(const context::context* ctx, + rocprofiler_user_data_t user_data, + rocprofiler_counter_flag_t flags) +{ + if(!ctx->agent_counter_collection || !ctx->agent_counter_collection->profile) + { + if(!ctx->agent_counter_collection) + { + ROCP_ERROR << fmt::format("Context {} has no agent counter collection", + ctx->context_idx); + } + else + { + ROCP_ERROR << fmt::format("Context {} has no profile", ctx->context_idx); + } + return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID; + } + + auto& agent_ctx = *ctx->agent_counter_collection; + + if(hsa_inited().load() == false) + { + return ROCPROFILER_STATUS_ERROR; + } + + const auto* agent = agent::get_agent_cache(agent_ctx.profile->agent); + + // If the agent no longer exists or we don't have a profile queue, reading is an error + if(!agent || !agent->profile_queue()) return ROCPROFILER_STATUS_ERROR; + + // Set the state to LOCKED to prevent other calls to start/stop/read. + auto expected = rocprofiler::context::agent_counter_collection_service::state::ENABLED; + if(!agent_ctx.status.compare_exchange_strong( + expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED)) + { + return ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR; + } + + CHECK(agent_ctx.callback_data.packet); + + ROCP_TRACE << fmt::format("Agent Infor for Running Counter: Name = {}, XCC = {}, " + "SE = {}, CU = {}, SIMD = {}", + agent->get_rocp_agent()->name, + agent->get_rocp_agent()->num_xcc, + agent->get_rocp_agent()->num_shader_banks, + agent->get_rocp_agent()->cu_count, + agent->get_rocp_agent()->simd_arrays_per_engine); + + // Remove when AQL is updated to not require stop to be called first + submitPacket(agent_ctx.callback_data.table, + agent->profile_queue(), + (void*) &agent_ctx.callback_data.packet->stop); + + // Submit the read packet to the queue + submitPacket(agent_ctx.callback_data.table, + agent->profile_queue(), + (void*) &agent_ctx.callback_data.packet->read); + + // Submit a barrier packet. This is needed to flush hardware caches. Without this + // the read packet may not have the correct data. + rocprofiler::hsa::rocprofiler_packet barrier{}; + barrier.barrier_and.header = header_pkt(HSA_PACKET_TYPE_BARRIER_AND); + barrier.barrier_and.completion_signal = agent_ctx.callback_data.completion; + agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.callback_data.completion, + 0); + agent_ctx.callback_data.user_data = user_data; + submitPacket( + agent_ctx.callback_data.table, agent->profile_queue(), (void*) &barrier.barrier_and); + + // Wait for the barrier/read packet to complete + if(flags != ROCPROFILER_COUNTER_FLAG_ASYNC) + { + // Wait for any inprogress samples to complete before returning + agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(agent_ctx.callback_data.completion, + HSA_SIGNAL_CONDITION_EQ, + 1, + UINT64_MAX, + HSA_WAIT_STATE_ACTIVE); + } + + agent_ctx.status.exchange( + rocprofiler::context::agent_counter_collection_service::state::ENABLED); + return ROCPROFILER_STATUS_SUCCESS; +} + +rocprofiler_status_t +start_agent_ctx(const context::context* ctx) +{ + auto status = ROCPROFILER_STATUS_SUCCESS; + if(!ctx->agent_counter_collection) + { + return status; + } + + auto& agent_ctx = *ctx->agent_counter_collection; + + if(hsa_inited().load() == false) + { + return ROCPROFILER_STATUS_SUCCESS; + } + + const auto* agent = agent::get_agent_cache(agent::get_agent(agent_ctx.agent_id)); + // Note: we may not have an AgentCache yet if HSA is not started. + // This is not an error and the startup will happen on hsa registration. + if(!agent) return ROCPROFILER_STATUS_ERROR; + + // But if we have an agent cache, we need a profile queue. + if(!agent->profile_queue()) + { + return ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE; + } + + // Set the state to LOCKED to prevent other calls to start/stop/read. + auto expected = rocprofiler::context::agent_counter_collection_service::state::DISABLED; + if(!agent_ctx.status.compare_exchange_strong( + expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED)) + { + return ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED; + } + + // Ask the tool what profile we should use for this agent + agent_ctx.cb( + {.handle = ctx->context_idx}, + agent_ctx.agent_id, + [](rocprofiler_context_id_t context_id, + rocprofiler_profile_config_id_t config_id) -> rocprofiler_status_t { + auto* cb_ctx = rocprofiler::context::get_mutable_registered_context(context_id); + if(!cb_ctx) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID; + + auto config = rocprofiler::counters::get_profile_config(config_id); + if(!config) return ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND; + + if(!cb_ctx->agent_counter_collection) + { + return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID; + } + + // Only allow profiles to be set in the locked state + if(cb_ctx->agent_counter_collection->status.load() != + rocprofiler::context::agent_counter_collection_service::state::LOCKED) + { + return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; + } + + // Only update the profile if it has changed. Avoids packet regeneration. + if(!cb_ctx->agent_counter_collection->profile || + cb_ctx->agent_counter_collection->profile->id.handle != config_id.handle) + { + if(cb_ctx->agent_counter_collection->agent_id.handle != config->agent->id.handle) + { + return ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH; + } + + cb_ctx->agent_counter_collection->profile = config; + cb_ctx->agent_counter_collection->callback_data.packet.reset(); + } + return ROCPROFILER_STATUS_SUCCESS; + }, + agent_ctx.user_data); + + // User didn't set a profile + if(!agent_ctx.profile) + { + agent_ctx.status.exchange( + rocprofiler::context::agent_counter_collection_service::state::DISABLED); + return status; + } + + // Generate necessary structures in the context (packet gen, etc) to process + // this packet. + init_callback_data(*ctx, *agent); + + // No hardware counters were actually asked for (i.e. all constants) + if(agent_ctx.profile->reqired_hw_counters.empty()) + { + agent_ctx.status.exchange( + rocprofiler::context::agent_counter_collection_service::state::DISABLED); + return ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS; + } + + // We could not generate AQL packets for some reason + if(!agent_ctx.callback_data.packet) + { + agent_ctx.status.exchange( + rocprofiler::context::agent_counter_collection_service::state::DISABLED); + return ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED; + } + + agent_ctx.callback_data.packet->start.completion_signal = agent_ctx.start_signal; + agent_ctx.callback_data.table.hsa_signal_store_relaxed_fn(agent_ctx.start_signal, 1); + submitPacket(agent_ctx.callback_data.table, + agent->profile_queue(), + (void*) &agent_ctx.callback_data.packet->start); + + // Wait for startup to finish before continuing + agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn( + agent_ctx.start_signal, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + + agent_ctx.status.exchange( + rocprofiler::context::agent_counter_collection_service::state::ENABLED); + return ROCPROFILER_STATUS_SUCCESS; +} + +rocprofiler_status_t +stop_agent_ctx(const context::context* ctx) +{ + auto status = ROCPROFILER_STATUS_SUCCESS; + if(!ctx->agent_counter_collection || !ctx->agent_counter_collection->profile) + { + return status; + } + + auto& agent_ctx = *ctx->agent_counter_collection; + + if(hsa_inited().load() == false) + { + return ROCPROFILER_STATUS_SUCCESS; + } + + const auto* agent = agent::get_agent_cache(agent_ctx.profile->agent); + if(!agent || !agent->profile_queue()) return status; + + auto expected = rocprofiler::context::agent_counter_collection_service::state::ENABLED; + if(!agent_ctx.status.compare_exchange_strong( + expected, rocprofiler::context::agent_counter_collection_service::state::LOCKED)) + { + // Status is already stopped or being enabled elsewhere. + return ROCPROFILER_STATUS_SUCCESS; + } + + CHECK(agent_ctx.callback_data.packet); + + submitPacket(agent_ctx.callback_data.table, + agent->profile_queue(), + (void*) &agent_ctx.callback_data.packet->stop); + + // Wait for any inprogress samples to complete before returning + agent_ctx.callback_data.table.hsa_signal_wait_relaxed_fn(agent_ctx.callback_data.completion, + HSA_SIGNAL_CONDITION_EQ, + 1, + UINT64_MAX, + HSA_WAIT_STATE_ACTIVE); + + return status; +} + +// If we have ctx's that were started before HSA was initialized, we need to +// actually start those contexts now. +rocprofiler_status_t +agent_profile_hsa_registration() +{ + hsa_inited().store(true); + + for(auto& ctx : context::get_active_contexts()) + { + if(!ctx->agent_counter_collection) continue; + start_agent_ctx(ctx); + } + + return ROCPROFILER_STATUS_SUCCESS; +} + +agent_callback_data::~agent_callback_data() +{ + if(completion.handle != 0) table.hsa_signal_destroy_fn(completion); +} +} // namespace counters +} // namespace rocprofiler \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/counters/agent_profiling.hpp b/source/lib/rocprofiler-sdk/counters/agent_profiling.hpp new file mode 100644 index 0000000000..603828a3a1 --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/agent_profiling.hpp @@ -0,0 +1,86 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +#pragma once + +#include +#include +#include + +#include "lib/rocprofiler-sdk/hsa/aql_packet.hpp" + +namespace rocprofiler +{ +namespace context +{ +struct context; +} + +namespace counters +{ +struct agent_callback_data +{ + CoreApiTable table; + hsa_queue_t* queue{nullptr}; + std::unique_ptr packet; + + // Tri-state signal used to know what the current state of processing + // a sample is. The states are: + // 1: allow next sample to start (i.e. no in progress work) + // 0: sample in progress + // -1: sample complete (i.e. signal for caller that sample is ready) + hsa_signal_t completion{.handle = 0}; + rocprofiler_user_data_t user_data{.value = 0}; + ~agent_callback_data(); +}; + +// If we have contexts that are started before HSA init. This +// function will start those contexts. Should only be called +// as part of the HSA init process in rocprofiler. +rocprofiler_status_t +agent_profile_hsa_registration(); + +// Send the AQL start packet to a queue on the agent to start +// collecting counter data. This function is synchronous and will +// return when the agent has started collecting data (or if there +// is an error). +rocprofiler_status_t +start_agent_ctx(const context::context* ctx); + +// Send the AQL end packet to a queue on the agent to stop +// collecting counter data. This function is synchronous and will +// return when the agent has stopped collecting data (or if there +// is an error). +rocprofiler_status_t +stop_agent_ctx(const context::context* ctx); + +// Read the counter data from the agent. This function is synchronous +// if flags is not set to ASYNC. If ASYNC is set, the function will +// return before data has been written to the buffer. Overlapping +// read calls are not allowed in ASYNC mode and will result in +// this call waiting for the previous sample to complete. +rocprofiler_status_t +read_agent_ctx(const context::context* ctx, + rocprofiler_user_data_t user_data, + rocprofiler_counter_flag_t flags); + +} // namespace counters +} // namespace rocprofiler \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/counters/controller.cpp b/source/lib/rocprofiler-sdk/counters/controller.cpp index 64447e3bff..3511483def 100644 --- a/source/lib/rocprofiler-sdk/counters/controller.cpp +++ b/source/lib/rocprofiler-sdk/counters/controller.cpp @@ -64,9 +64,11 @@ CounterController::destroy_profile(uint64_t id) } rocprofiler_status_t -CounterController::configure_agent_collection(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer, - rocprofiler_profile_config_id_t config_id) +CounterController::configure_agent_collection(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_profile_callback_t cb, + void* user_data) { auto* ctx_p = rocprofiler::context::get_mutable_registered_context(context_id); if(!ctx_p) return ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID; @@ -74,13 +76,10 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t co auto& ctx = *ctx_p; if(ctx.counter_collection) return ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT; - if(!rocprofiler::buffer::get_buffer(buffer.handle)) + if(!rocprofiler::buffer::get_buffer(buffer_id.handle)) { return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND; } - auto cfg = get_profile_cfg(config_id); - - if(!cfg) return ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND; if(!ctx.agent_counter_collection) { @@ -88,8 +87,10 @@ CounterController::configure_agent_collection(rocprofiler_context_id_t co std::make_unique(); } - ctx.agent_counter_collection->profile = cfg; - ctx.agent_counter_collection->buffer = buffer; + ctx.agent_counter_collection->agent_id = agent_id; + ctx.agent_counter_collection->cb = cb; + ctx.agent_counter_collection->user_data = user_data; + ctx.agent_counter_collection->buffer = buffer_id; return ROCPROFILER_STATUS_SUCCESS; } diff --git a/source/lib/rocprofiler-sdk/counters/controller.hpp b/source/lib/rocprofiler-sdk/counters/controller.hpp index 2a29f1b1b0..5083810a71 100644 --- a/source/lib/rocprofiler-sdk/counters/controller.hpp +++ b/source/lib/rocprofiler-sdk/counters/controller.hpp @@ -87,9 +87,11 @@ public: void* record_callback_args); std::shared_ptr get_profile_cfg(rocprofiler_profile_config_id_t id); - rocprofiler_status_t configure_agent_collection(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer, - rocprofiler_profile_config_id_t config_id); + static rocprofiler_status_t configure_agent_collection(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_profile_callback_t cb, + void* user_data); private: rocprofiler::common::Synchronized>> diff --git a/source/lib/rocprofiler-sdk/counters/core.cpp b/source/lib/rocprofiler-sdk/counters/core.cpp index 9fca5a5bde..0150af79dd 100644 --- a/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/core.cpp @@ -207,11 +207,14 @@ stop_context(const context::context* ctx) } rocprofiler_status_t -configure_agent_collection(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_profile_config_id_t config_id) +configure_agent_collection(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_profile_callback_t cb, + void* user_data) { - return get_controller().configure_agent_collection(context_id, buffer_id, config_id); + return get_controller().configure_agent_collection( + context_id, buffer_id, agent_id, cb, user_data); } rocprofiler_status_t diff --git a/source/lib/rocprofiler-sdk/counters/core.hpp b/source/lib/rocprofiler-sdk/counters/core.hpp index 9e23badb76..6110c94c90 100644 --- a/source/lib/rocprofiler-sdk/counters/core.hpp +++ b/source/lib/rocprofiler-sdk/counters/core.hpp @@ -94,9 +94,11 @@ configure_callback_dispatch(rocprofiler_context_id_t con void* record_callback_args); rocprofiler_status_t -configure_agent_collection(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_profile_config_id_t config_id); +configure_agent_collection(rocprofiler_context_id_t context_id, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_agent_id_t agent_id, + rocprofiler_agent_profile_callback_t cb, + void* user_data); void start_context(const context::context*); diff --git a/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp b/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp index 8fdcf787ae..66b6082a19 100644 --- a/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp +++ b/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp @@ -57,7 +57,8 @@ get_reduce_op_type_from_string(const std::string& op) std::vector* perform_reduction(ReduceOperation reduce_op, std::vector* input_array) { - rocprofiler_record_counter_t result{.id = 0, .counter_value = 0, .dispatch_id = 0}; + rocprofiler_record_counter_t result{ + .id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}}; if(input_array->empty()) return input_array; switch(reduce_op) { @@ -83,12 +84,14 @@ perform_reduction(ReduceOperation reduce_op, std::vectorbegin(), input_array->end(), - rocprofiler_record_counter_t{.id = 0, .counter_value = 0, .dispatch_id = 0}, + rocprofiler_record_counter_t{ + .id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}}, [](auto& a, auto& b) { return rocprofiler_record_counter_t{ .id = a.id, .counter_value = a.counter_value + b.counter_value, - .dispatch_id = a.dispatch_id}; + .dispatch_id = a.dispatch_id, + .user_data = {.value = 0}}; }); break; } @@ -97,12 +100,14 @@ perform_reduction(ReduceOperation reduce_op, std::vectorbegin(), input_array->end(), - rocprofiler_record_counter_t{.id = 0, .counter_value = 0, .dispatch_id = 0}, + rocprofiler_record_counter_t{ + .id = 0, .counter_value = 0, .dispatch_id = 0, .user_data = {.value = 0}}, [](auto& a, auto& b) { return rocprofiler_record_counter_t{ .id = a.id, .counter_value = a.counter_value + b.counter_value, - .dispatch_id = a.dispatch_id}; + .dispatch_id = a.dispatch_id, + .user_data = {.value = 0}}; }); result.counter_value /= input_array->size(); break; @@ -219,7 +224,8 @@ EvaluateAST::EvaluateAST(rocprofiler_counter_id_t out_id, _raw_value = std::get(ast.value); _static_value.push_back({.id = 0, .counter_value = static_cast(std::get(ast.value)), - .dispatch_id = 0}); + .dispatch_id = 0, + .user_data = {.value = 0}}); } for(const auto& nextAst : ast.counter_set) @@ -596,28 +602,32 @@ EvaluateAST::evaluate( return rocprofiler_record_counter_t{ .id = a.id, .counter_value = a.counter_value + b.counter_value, - .dispatch_id = a.dispatch_id}; + .dispatch_id = a.dispatch_id, + .user_data = {.value = 0}}; }); case SUBTRACTION_NODE: return perform_op([](auto& a, auto& b) { return rocprofiler_record_counter_t{ .id = a.id, .counter_value = a.counter_value - b.counter_value, - .dispatch_id = a.dispatch_id}; + .dispatch_id = a.dispatch_id, + .user_data = {.value = 0}}; }); case MULTIPLY_NODE: return perform_op([](auto& a, auto& b) { return rocprofiler_record_counter_t{ .id = a.id, .counter_value = a.counter_value * b.counter_value, - .dispatch_id = a.dispatch_id}; + .dispatch_id = a.dispatch_id, + .user_data = {.value = 0}}; }); case DIVIDE_NODE: return perform_op([](auto& a, auto& b) { return rocprofiler_record_counter_t{ .id = a.id, .counter_value = (b.counter_value == 0 ? 0 : a.counter_value / b.counter_value), - .dispatch_id = a.dispatch_id}; + .dispatch_id = a.dispatch_id, + .user_data = {.value = 0}}; }); case REFERENCE_NODE: { diff --git a/source/lib/rocprofiler-sdk/counters/tests/CMakeLists.txt b/source/lib/rocprofiler-sdk/counters/tests/CMakeLists.txt index 3ce3ef6440..c9ffcba857 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/counters/tests/CMakeLists.txt @@ -2,12 +2,54 @@ rocprofiler_deactivate_clang_tidy() include(GoogleTest) -set(ROCPROFILER_LIB_COUNTER_TEST_SOURCES metrics_test.cpp evaluate_ast_test.cpp - dimension.cpp init_order.cpp core.cpp) +find_program( + amdclangpp_EXECUTABLE REQUIRED + NAMES amdclang++ + HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm + PATH_SUFFIXES bin llvm/bin NO_CACHE) + +function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE) + separate_arguments( + CLANG_ARG_LIST + UNIX_COMMAND + "-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1 + -target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}") + add_custom_command( + OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} + COMMAND ${amdclangpp_EXECUTABLE} ${CLANG_ARG_LIST} + COMMAND + ${CMAKE_COMMAND} -E copy + ${PROJECT_BINARY_DIR}/source/lib/rocprofiler-sdk/counters/tests/${OUTPUT_FILE} + ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE} + OUTPUT ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE} + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE} + ${CMAKE_BINARY_DIR}/bin/${OUTPUT_FILE} + COMMENT "Building ${OUTPUT_FILE}...") + set(HSACO_TARGET_LIST + ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} + PARENT_SCOPE) +endfunction(generate_hsaco) + +foreach(target_id ${GPU_TARGETS}) + # generate kernel bitcodes + generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/agent_kernels.cl + ${target_id}_agent_kernels.hsaco) +endforeach() + +add_custom_target(agent_hasco_targets DEPENDS ${HSACO_TARGET_LIST}) + +set(ROCPROFILER_LIB_COUNTER_TEST_SOURCES + metrics_test.cpp evaluate_ast_test.cpp dimension.cpp init_order.cpp core.cpp + code_object_loader.cpp agent_profiling.cpp) +set(ROCPROFILER_LIB_COUNTER_TEST_HEADERS code_object_loader.hpp agent_profiling.hpp) add_executable(counter-test) -target_sources(counter-test PRIVATE ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES}) +target_sources(counter-test PRIVATE ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES} + ${ROCPROFILER_LIB_COUNTER_TEST_HEADERS}) + +add_dependencies(counter-test agent_hasco_targets) target_link_libraries( counter-test diff --git a/source/lib/rocprofiler-sdk/counters/tests/agent_kernels.cl b/source/lib/rocprofiler-sdk/counters/tests/agent_kernels.cl new file mode 100644 index 0000000000..7389288447 --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/tests/agent_kernels.cl @@ -0,0 +1,31 @@ +/* Copyright (c) 2022 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +__kernel void +null_kernel() +{ + int gid = (int)get_global_id(0); + int i = 0; + while (gid >= 0) { + gid = (int)get_global_id(0); + i++; + if (i > 1000) break; + } +} diff --git a/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp b/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp new file mode 100644 index 0000000000..d20690c3d7 --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.cpp @@ -0,0 +1,483 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/rocprofiler-sdk/counters/tests/agent_profiling.hpp" +#include "lib/common/logging.hpp" +#include "lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp" + +#include "lib/common/filesystem.hpp" +#include "lib/common/utility.hpp" +#include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/counters/core.hpp" +#include "lib/rocprofiler-sdk/counters/dispatch_handlers.hpp" +#include "lib/rocprofiler-sdk/counters/metrics.hpp" +#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" +#include "lib/rocprofiler-sdk/hsa/queue.hpp" +#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" +#include "rocprofiler-sdk/buffer.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace rocprofiler::counters::testing; +using namespace rocprofiler::counters; +using namespace rocprofiler; + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ + << " failed with error code " << CHECKSTATUS << ": " << status_msg \ + << std::endl; \ + std::stringstream errmsg{}; \ + errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ + << status_msg << ")"; \ + ASSERT_EQ(CHECKSTATUS, ROCPROFILER_STATUS_SUCCESS) << errmsg.str(); \ + } \ + } + +namespace +{ +AmdExtTable& +get_ext_table() +{ + static auto _v = []() { + auto val = AmdExtTable{}; + val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info; + val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools; + val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate; + val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free; + val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info; + val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access; + val.hsa_amd_queue_set_priority_fn = hsa_amd_queue_set_priority; + val.hsa_amd_signal_async_handler_fn = hsa_amd_signal_async_handler; + return val; + }(); + return _v; +} + +CoreApiTable& +get_api_table() +{ + static auto _v = []() { + auto val = CoreApiTable{}; + val.hsa_iterate_agents_fn = hsa_iterate_agents; + val.hsa_agent_get_info_fn = hsa_agent_get_info; + val.hsa_queue_create_fn = hsa_queue_create; + val.hsa_queue_destroy_fn = hsa_queue_destroy; + val.hsa_signal_create_fn = hsa_signal_create; + val.hsa_signal_destroy_fn = hsa_signal_destroy; + val.hsa_signal_store_screlease_fn = hsa_signal_store_screlease; + val.hsa_signal_load_scacquire_fn = hsa_signal_load_scacquire; + val.hsa_signal_add_relaxed_fn = hsa_signal_add_relaxed; + val.hsa_signal_subtract_relaxed_fn = hsa_signal_subtract_relaxed; + val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed; + val.hsa_queue_create_fn = hsa_queue_create; + val.hsa_queue_add_write_index_scacq_screl_fn = hsa_queue_add_write_index_scacq_screl; + val.hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed; + val.hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed; + val.hsa_signal_load_relaxed_fn = hsa_signal_load_relaxed; + + return val; + }(); + return _v; +} + +auto +findDeviceMetrics(const hsa::AgentCache& agent, const std::unordered_set& metrics) +{ + std::vector ret; + auto all_counters = counters::getMetricMap(); + + ROCP_ERROR << "Looking up counters for " << std::string(agent.name()); + auto gfx_metrics = common::get_val(*all_counters, std::string(agent.name())); + if(!gfx_metrics) + { + ROCP_ERROR << "No counters found for " << std::string(agent.name()); + return ret; + } + + for(auto& counter : *gfx_metrics) + { + if(metrics.count(counter.name()) > 0 || metrics.empty()) + { + ret.push_back(counter); + } + } + ROCP_ERROR << "No counters found for " << std::string(agent.name()); + return ret; +} + +void +test_init() +{ + HsaApiTable table; + table.amd_ext_ = &get_ext_table(); + table.core_ = &get_api_table(); + agent::construct_agent_cache(&table); + ASSERT_TRUE(hsa::get_queue_controller() != nullptr); + hsa::get_queue_controller()->init(get_api_table(), get_ext_table()); +} + +std::vector& +global_recs() +{ + static std::vector recs; + return recs; +} + +void +check_output_created(rocprofiler_context_id_t, + rocprofiler_buffer_id_t, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* user_data, + uint64_t) +{ + // verifies that we got a record containing some data for a counter + // does NOT validate the counters values. + if(user_data == nullptr) return; + + uint64_t found_value = 0; + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_PROFILE_COUNTING_DISPATCH_HEADER) + {} + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_COUNTERS && + header->kind == ROCPROFILER_COUNTER_RECORD_VALUE) + { + // Print the returned counter data. + auto* record = static_cast(header->payload); + if(found_value != 0 && found_value != record->user_data.value) + { + ROCP_FATAL << "Have records with different user data values we didn't expect"; + break; + } + found_value = record->user_data.value; + // ROCP_ERROR << fmt::format("Found counter value: {}", record->counter_value); + global_recs().push_back(*record); + } + } + + auto* signal = reinterpret_cast(user_data); + hsa_signal_store_relaxed(*signal, static_cast(found_value)); +} + +struct test_kernels +{ + CodeObject obj; + + test_kernels(const rocprofiler::hsa::AgentCache& agent) + { + CHECK(agent.get_rocp_agent()); + // Getting hasco Path + std::string hasco_file_path = + std::string(agent.get_rocp_agent()->name) + std::string("_agent_kernels.hsaco"); + search_hasco(common::filesystem::current_path(), hasco_file_path); + CHECK_EQ(load_code_object(hasco_file_path, agent.get_hsa_agent(), obj), HSA_STATUS_SUCCESS); + } + + uint64_t load_kernel(const rocprofiler::hsa::AgentCache& agent, + const std::string& kernel_name) const + { + Kernel kern; + CHECK_EQ(get_kernel(obj, kernel_name, agent.get_hsa_agent(), kern), HSA_STATUS_SUCCESS); + return kern.handle; + } +}; + +uint16_t +packet_header(hsa_packet_type_t type) +{ + uint16_t header = type << HSA_PACKET_HEADER_TYPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE; + return header; +} + +rocprofiler::hsa::rocprofiler_packet +gen_kernel_pkt(uint64_t obj) +{ + rocprofiler::hsa::rocprofiler_packet packet{}; + memset(((uint8_t*) &packet.kernel_dispatch) + 4, 0, sizeof(hsa_kernel_dispatch_packet_t) - 4); + packet.kernel_dispatch.setup = 1; + packet.kernel_dispatch.header = packet_header(HSA_PACKET_TYPE_KERNEL_DISPATCH); + packet.kernel_dispatch.workgroup_size_x = 1; + packet.kernel_dispatch.workgroup_size_y = 1; + packet.kernel_dispatch.workgroup_size_z = 1; + packet.kernel_dispatch.grid_size_x = 1; + packet.kernel_dispatch.grid_size_y = 1; + packet.kernel_dispatch.grid_size_z = 1; + packet.kernel_dispatch.kernel_object = obj; + packet.kernel_dispatch.kernarg_address = nullptr; + packet.kernel_dispatch.completion_signal.handle = 0; + ROCP_ERROR << fmt::format("{:x}", packet.kernel_dispatch.kernel_object); + return packet; +} + +uint64_t +submitPacket(hsa_queue_t* queue, const void* packet) +{ + const uint32_t slot_size_b = 0x40; + + // advance command queue + const uint64_t write_idx = hsa_queue_add_write_index_scacq_screl(queue, 1); + while((write_idx - hsa_queue_load_read_index_relaxed(queue)) >= queue->size) + { + sched_yield(); + } + + const uint32_t slot_idx = (uint32_t)(write_idx % queue->size); + // NOLINTBEGIN(performance-no-int-to-ptr) + uint32_t* queue_slot = + reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + const uint32_t* slot_data = reinterpret_cast(packet); + + // Copy buffered commands into the queue slot. + // Overwrite the AQL invalid header (first dword) last. + // This prevents the slot from being read until it's fully written. + memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&queue_slot[0]); + // NOLINTEND(performance-no-int-to-ptr) + header_atomic_ptr->store(slot_data[0], std::memory_order_release); + + // ringdoor bell + hsa_signal_store_relaxed(queue->doorbell_signal, write_idx); + + return write_idx; +} + +} // namespace + +class agent_profile_test : public ::testing::Test +{ +protected: + agent_profile_test() {} + + static void test_run(rocprofiler_counter_flag_t flags = ROCPROFILER_COUNTER_FLAG_NONE, + const std::unordered_set& test_metrics = {}, + size_t delay = 1) + { + hsa_init(); + registration::init_logging(); + registration::set_init_status(-1); + context::push_client(1); + test_init(); + counters::agent_profile_hsa_registration(); + + std::string kernel_name = "null_kernel"; + + ASSERT_TRUE(hsa::get_queue_controller() != nullptr); + ASSERT_GT(hsa::get_queue_controller()->get_supported_agents().size(), 0); + for(const auto& [_, agent] : hsa::get_queue_controller()->get_supported_agents()) + { + auto metrics = findDeviceMetrics(agent, test_metrics); + ASSERT_FALSE(metrics.empty()); + ASSERT_TRUE(agent.get_rocp_agent()); + test_kernels kernel_loader(agent); + auto kernel_handle = kernel_loader.load_kernel(agent, kernel_name); + auto kernel_pkt = gen_kernel_pkt(kernel_handle); + + hsa_queue_t* queue; + CHECK_EQ(hsa_queue_create(agent.get_hsa_agent(), + 64, + HSA_QUEUE_TYPE_SINGLE, + nullptr, + nullptr, + UINT32_MAX, + UINT32_MAX, + &queue), + HSA_STATUS_SUCCESS); + + rocprofiler::hsa::rocprofiler_packet barrier{}; + hsa_signal_t completion_signal; + hsa_signal_create(1, 0, nullptr, &completion_signal); + barrier.barrier_and.header = packet_header(HSA_PACKET_TYPE_BARRIER_AND); + barrier.barrier_and.completion_signal = completion_signal; + + hsa_signal_t found_data; + hsa_signal_create(0, 0, nullptr, &found_data); + size_t track_metric = 0; + for(auto& metric : metrics) + { + // global_recs().clear(); + track_metric++; + ROCP_ERROR << "Testing metric " << metric.name(); + rocprofiler_context_id_t ctx = {.handle = 0}; + ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed"); + rocprofiler_buffer_id_t opt_buff_id = {.handle = 0}; + ROCPROFILER_CALL(rocprofiler_create_buffer(ctx, + 500 * sizeof(size_t), + 500 * sizeof(size_t), + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + check_output_created, + &found_data, + &opt_buff_id), + "Could not create buffer"); + /** + * Check profile construction + */ + rocprofiler_profile_config_id_t cfg_id = {}; + rocprofiler_counter_id_t id = {.handle = metric.id()}; + ROCPROFILER_CALL( + rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &cfg_id), + "Unable to create profile"); + + ROCPROFILER_CALL( + rocprofiler_configure_agent_profile_counting_service( + ctx, + opt_buff_id, + agent.get_rocp_agent()->id, + [](rocprofiler_context_id_t context_id, + rocprofiler_agent_id_t, + rocprofiler_agent_set_profile_callback_t set_config, + void* user_data) { + CHECK(user_data); + if(auto status = set_config( + context_id, + *static_cast(user_data)); + status != ROCPROFILER_STATUS_SUCCESS) + { + ROCP_FATAL << rocprofiler_get_status_string(status); + } + }, + static_cast(&cfg_id)), + "Could not create agent collection"); + + // This queue will only be present if a context exists when AgentCache is + // construction This is a workaround for the test environment since we create + // contexts after AgentCache constructed. + agent::get_agent_cache(agent.get_rocp_agent()) + ->init_agent_profile_queue(get_api_table(), get_ext_table()); + + hsa_signal_store_screlease(completion_signal, 1); + hsa_signal_store_screlease(found_data, 0); + auto status = rocprofiler_start_context(ctx); + if(status == ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS) + { + ROCP_ERROR << fmt::format("No hardware counters for {}, skipping", + metric.name()); + continue; + } + else if(status != ROCPROFILER_STATUS_SUCCESS) + { + ROCP_FATAL << "Failed to start context - " + << rocprofiler_get_status_string(status); + } + + ROCPROFILER_CALL(status, "Could not start context"); + + // Execute kernel + submitPacket(queue, &kernel_pkt); + submitPacket(queue, &kernel_pkt); + submitPacket(queue, &kernel_pkt); + submitPacket(queue, &kernel_pkt); + submitPacket(queue, &kernel_pkt); + submitPacket(queue, &barrier); + usleep(delay); + // Wait for completion + hsa_signal_wait_relaxed(completion_signal, + HSA_SIGNAL_CONDITION_EQ, + 0, + UINT64_MAX, + HSA_WAIT_STATE_BLOCKED); + + // Sample the counting service. + ROCPROFILER_CALL(rocprofiler_sample_agent_profile_counting_service( + ctx, {.value = track_metric}, flags), + "Could not sample"); + ROCPROFILER_CALL(rocprofiler_stop_context(ctx), "Could not stop context"); + rocprofiler_flush_buffer(opt_buff_id); + + if(hsa_signal_wait_relaxed(found_data, + HSA_SIGNAL_CONDITION_EQ, + track_metric, + 20000000, + HSA_WAIT_STATE_BLOCKED) != + static_cast(track_metric)) + { + ROCP_FATAL << "Failed to get data for " << metric.name(); + } + } + hsa_signal_destroy(completion_signal); + hsa_signal_destroy(found_data); + hsa_queue_destroy(queue); + } + registration::set_init_status(1); + context::pop_client(1); + } +}; + +TEST_F(agent_profile_test, sync_counters) { test_run(); } +TEST_F(agent_profile_test, async_counters) { test_run(ROCPROFILER_COUNTER_FLAG_ASYNC); } +TEST_F(agent_profile_test, sync_grbm_verify) +{ + test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GRBM_COUNT"}, 50000); + ROCP_ERROR << global_recs().size(); + + for(const auto& val : global_recs()) + { + rocprofiler_counter_id_t id; + rocprofiler_query_record_counter_id(val.id, &id); + rocprofiler_counter_info_v0_t info; + rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info); + ROCP_ERROR << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); + EXPECT_GT(val.counter_value, 0.0); + } +} + +TEST_F(agent_profile_test, sync_gpu_util_verify) +{ + test_run(ROCPROFILER_COUNTER_FLAG_NONE, {"GPU_UTIL"}, 50000); + ROCP_ERROR << global_recs().size(); + + for(const auto& val : global_recs()) + { + rocprofiler_counter_id_t id; + rocprofiler_query_record_counter_id(val.id, &id); + rocprofiler_counter_info_v0_t info; + rocprofiler_query_counter_info(id, ROCPROFILER_COUNTER_INFO_VERSION_0, &info); + ROCP_ERROR << fmt::format("Name: {} Counter value: {}", info.name, val.counter_value); + EXPECT_GT(val.counter_value, 0.0); + } +} \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.hpp b/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.hpp new file mode 100644 index 0000000000..7c3d524cbc --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/tests/agent_profiling.hpp @@ -0,0 +1,23 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/counters/tests/code_object_loader.cpp b/source/lib/rocprofiler-sdk/counters/tests/code_object_loader.cpp new file mode 100644 index 0000000000..27162a82d5 --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/tests/code_object_loader.cpp @@ -0,0 +1,106 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp" +#include + +#include "lib/common/logging.hpp" + +namespace rocprofiler +{ +namespace counters +{ +namespace testing +{ +hsa_status_t +load_code_object(const std::string& filename, hsa_agent_t agent, CodeObject& code_object) +{ + hsa_status_t err; + code_object.file = open(filename.c_str(), O_RDONLY); + if(code_object.file == -1) + { + ROCP_FATAL << "Could not load code object " << filename; + } + + err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr); + if(err != HSA_STATUS_SUCCESS) return err; + + err = hsa_executable_create_alt(HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + nullptr, + &code_object.executable); + if(err != HSA_STATUS_SUCCESS) return err; + err = hsa_executable_load_agent_code_object( + code_object.executable, agent, code_object.code_obj_rdr, nullptr, nullptr); + if(err != HSA_STATUS_SUCCESS) return err; + + err = hsa_executable_freeze(code_object.executable, nullptr); + + return err; +} + +hsa_status_t +get_kernel(const CodeObject& code_object, + const std::string& kernel, + hsa_agent_t agent, + Kernel& kern) +{ + hsa_executable_symbol_t symbol; + hsa_status_t err = + hsa_executable_get_symbol_by_name(code_object.executable, kernel.c_str(), &agent, &symbol); + if(err != HSA_STATUS_SUCCESS) + { + err = hsa_executable_get_symbol_by_name( + code_object.executable, (kernel + ".kd").c_str(), &agent, &symbol); + if(err != HSA_STATUS_SUCCESS) + { + return err; + } + } + ROCP_INFO << "kernel-name: " << kernel.c_str() << "\n"; + err = hsa_executable_symbol_get_info( + symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kern.handle); + + return err; +} + +void +search_hasco(const common::filesystem::path& directory, std::string& filename) +{ + for(const auto& entry : common::filesystem::directory_iterator(directory)) + { + if(common::filesystem::is_regular_file(entry)) + { + if(entry.path().filename() == filename) + { + filename = entry.path(); + } + } + else if(common::filesystem::is_directory(entry)) + { + search_hasco(entry, filename); // Recursive call for subdirectories + } + } +} +} // namespace testing +} // namespace counters +} // namespace rocprofiler \ No newline at end of file diff --git a/source/lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp b/source/lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp new file mode 100644 index 0000000000..7e49adb2dd --- /dev/null +++ b/source/lib/rocprofiler-sdk/counters/tests/code_object_loader.hpp @@ -0,0 +1,65 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +#include "lib/common/filesystem.hpp" + +namespace rocprofiler +{ +namespace counters +{ +namespace testing +{ +struct CodeObject +{ + hsa_file_t file = 0; + hsa_code_object_reader_t code_obj_rdr = {}; + hsa_executable_t executable = {}; +}; + +hsa_status_t +load_code_object(const std::string& filename, hsa_agent_t agent, CodeObject& code_object); +struct Kernel +{ + uint64_t handle = 0; + uint32_t scratch = 0; + uint32_t group = 0; + uint32_t kernarg_size = 0; + uint32_t kernarg_align = 0; +}; + +hsa_status_t +get_kernel(const CodeObject& code_object, + const std::string& kernel, + hsa_agent_t agent, + Kernel& kern); + +void +search_hasco(const common::filesystem::path& directory, std::string& filename); +} // namespace testing +} // namespace counters +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/counters/tests/core.cpp b/source/lib/rocprofiler-sdk/counters/tests/core.cpp index 29402438be..339efa669f 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/core.cpp @@ -747,54 +747,3 @@ TEST(core, public_api_iterate_agents) EXPECT_TRUE(from_api.empty()); } } - -TEST(core, init_agent_collection) -{ - ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS); - registration::init_logging(); - registration::set_init_status(-1); - context::push_client(1); - ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "context creation failed"); - auto agents = hsa::get_queue_controller()->get_supported_agents(); - - rocprofiler_buffer_id_t opt_buff_id = {.handle = 0}; - ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(), - 500 * sizeof(size_t), - 500 * sizeof(size_t), - ROCPROFILER_BUFFER_POLICY_LOSSLESS, - null_buffered_callback, - nullptr, - &opt_buff_id), - "Could not create buffer"); - for(const auto& [_, agent] : agents) - { - auto metrics = findDeviceMetrics(agent, {}); - ASSERT_FALSE(metrics.empty()); - ASSERT_TRUE(agent.get_rocp_agent()); - for(auto& metric : metrics) - { - expected_dispatch expected = {}; - rocprofiler_counter_id_t id = {.handle = metric.id()}; - ROCPROFILER_CALL( - rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &expected.id), - "Unable to create profile"); - - ROCPROFILER_CALL(rocprofiler_configure_agent_profile_counting_service( - get_client_ctx(), opt_buff_id, expected.id), - "Could not create agent collection"); - { - auto cfg = counters::get_profile_config(expected.id); - auto* ctx = rocprofiler::context::get_mutable_registered_context(get_client_ctx()); - ASSERT_TRUE(ctx); - ASSERT_TRUE(ctx->agent_counter_collection); - EXPECT_EQ(ctx->agent_counter_collection->profile, cfg); - EXPECT_EQ(ctx->agent_counter_collection->buffer.handle, opt_buff_id.handle); - } - ROCPROFILER_CALL(rocprofiler_destroy_profile_config(expected.id), - "Could not delete profile id"); - } - } - rocprofiler_destroy_buffer(opt_buff_id); - registration::set_init_status(1); - context::pop_client(1); -} diff --git a/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp b/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp index 012e6d0554..f17640082b 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/evaluate_ast_test.cpp @@ -646,17 +646,22 @@ TEST(evaluate_ast, counter_reduction_sum) sum_vec(base_counter_data["KRUEGER"])), 2}, {"KRAMER", - plus_vec(times_vec( - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}}, - sum_vec(base_counter_data["VOORHEES"])), - sum_vec(base_counter_data["KRUEGER"])), + plus_vec( + times_vec(std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}}, + sum_vec(base_counter_data["VOORHEES"])), + sum_vec(base_counter_data["KRUEGER"])), 2}, {"GHOSTFACE", plus_vec(sum_vec(base_counter_data["VOORHEES"]), - divide_vec(sum_vec(base_counter_data["KRUEGER"]), - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}})), + divide_vec( + sum_vec(base_counter_data["KRUEGER"]), + std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}})), 2}, }; @@ -727,17 +732,22 @@ TEST(evaluate_ast, counter_reduction_min) min_vec(base_counter_data["KRUEGER"])), 2}, {"KRAMER", - plus_vec(times_vec( - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}}, - min_vec(base_counter_data["VOORHEES"])), - min_vec(base_counter_data["KRUEGER"])), + plus_vec( + times_vec(std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}}, + min_vec(base_counter_data["VOORHEES"])), + min_vec(base_counter_data["KRUEGER"])), 2}, {"GHOSTFACE", plus_vec(min_vec(base_counter_data["VOORHEES"]), - divide_vec(min_vec(base_counter_data["KRUEGER"]), - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}})), + divide_vec( + min_vec(base_counter_data["KRUEGER"]), + std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}})), 2}, }; @@ -808,17 +818,22 @@ TEST(evaluate_ast, counter_reduction_max) max_vec(base_counter_data["KRUEGER"])), 2}, {"KRAMER", - plus_vec(times_vec( - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}}, - max_vec(base_counter_data["VOORHEES"])), - max_vec(base_counter_data["KRUEGER"])), + plus_vec( + times_vec(std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}}, + max_vec(base_counter_data["VOORHEES"])), + max_vec(base_counter_data["KRUEGER"])), 2}, {"GHOSTFACE", plus_vec(max_vec(base_counter_data["VOORHEES"]), - divide_vec(max_vec(base_counter_data["KRUEGER"]), - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}})), + divide_vec( + max_vec(base_counter_data["KRUEGER"]), + std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}})), 2}, }; @@ -891,17 +906,22 @@ TEST(evaluate_ast, counter_reduction_avg) avg_vec(base_counter_data["KRUEGER"])), 2}, {"KRAMER", - plus_vec(times_vec( - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}}, - avg_vec(base_counter_data["VOORHEES"])), - avg_vec(base_counter_data["KRUEGER"])), + plus_vec( + times_vec(std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}}, + avg_vec(base_counter_data["VOORHEES"])), + avg_vec(base_counter_data["KRUEGER"])), 2}, {"GHOSTFACE", plus_vec(avg_vec(base_counter_data["VOORHEES"]), - divide_vec(avg_vec(base_counter_data["KRUEGER"]), - std::vector{ - {.id = 0, .counter_value = 5.0, .dispatch_id = 0}})), + divide_vec( + avg_vec(base_counter_data["KRUEGER"]), + std::vector{{.id = 0, + .counter_value = 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}})), 2}, }; @@ -962,18 +982,23 @@ TEST(evaluate_ast, evaluate_mixed_counters) {"BATES", times_vec( std::vector{ - {.id = 0, .counter_value = 32, .dispatch_id = 0}}, + {.id = 0, .counter_value = 32, .dispatch_id = 0, .user_data = {.value = 0}}}, sum_vec(base_counter_data["VOORHEES"])), 2}, {"KRAMER", times_vec(sum_vec(base_counter_data["KRUEGER"]), - std::vector{ - {.id = 0, .counter_value = 8.0 / 5.0, .dispatch_id = 0}}), + std::vector{{.id = 0, + .counter_value = 8.0 / 5.0, + .dispatch_id = 0, + .user_data = {.value = 0}}}), 3}, {"TORRANCE", - times_vec(sum_vec(base_counter_data["KRUEGER"]), - std::vector{ - {.id = 0, .counter_value = 104.0 / (156.0 * 8.0), .dispatch_id = 0}}), + times_vec( + sum_vec(base_counter_data["KRUEGER"]), + std::vector{{.id = 0, + .counter_value = 104.0 / (156.0 * 8.0), + .dispatch_id = 0, + .user_data = {.value = 0}}}), 4}, }; diff --git a/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h b/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h index 2083cd6d86..65b917942a 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h +++ b/source/lib/rocprofiler-sdk/counters/tests/metrics_test.h @@ -166,7 +166,12 @@ static const std::unordered_map>> derived_gfx908 = {{"gfx908", - {{"SQ_WAVES_sum", + {{"GPU_UTIL", + "", + "", + "100*GRBM_GUI_ACTIVE/GRBM_COUNT", + "Percentage of the time that GUI is active"}, + {"SQ_WAVES_sum", "", "", "reduce(SQ_WAVES,sum)", diff --git a/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml b/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml index 3c905b1a61..4aed14fb71 100755 --- a/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml +++ b/source/lib/rocprofiler-sdk/counters/xml/derived_counters.xml @@ -48,7 +48,7 @@ - + @@ -425,6 +425,7 @@ + @@ -469,6 +470,9 @@ + + + diff --git a/source/lib/rocprofiler-sdk/hsa/agent_cache.cpp b/source/lib/rocprofiler-sdk/hsa/agent_cache.cpp index 09e90c3994..dd4ba1b9a5 100644 --- a/source/lib/rocprofiler-sdk/hsa/agent_cache.cpp +++ b/source/lib/rocprofiler-sdk/hsa/agent_cache.cpp @@ -26,6 +26,8 @@ #include #include +#include "lib/rocprofiler-sdk/context/context.hpp" + namespace { // This function checks to see if the provided @@ -123,11 +125,48 @@ namespace rocprofiler { namespace hsa { +void +AgentCache::init_agent_profile_queue(const CoreApiTable& api, const AmdExtTable& ext) const +{ + static std::mutex m_mutex; + std::lock_guard lock(m_mutex); + + using context = rocprofiler::context::context; + const auto* agent_ctx = []() -> const context* { + for(auto& ctx : rocprofiler::context::get_registered_contexts()) + { + if(ctx->agent_counter_collection) return ctx; + } + return nullptr; + }(); + + if(!agent_ctx || m_profile_queue) return; + ROCP_ERROR << "Creating Profile Queue"; + // create the queue and set it to high_priority + CHECK(api.hsa_queue_create_fn) << "no hsa_queue_create_fn in api table"; + auto status = api.hsa_queue_create_fn(get_hsa_agent(), + 64, + HSA_QUEUE_TYPE_SINGLE, + nullptr, + nullptr, + UINT32_MAX, + UINT32_MAX, + &m_profile_queue); + if(status != HSA_STATUS_SUCCESS && status != HSA_STATUS_INFO_BREAK) + { + throw std::runtime_error("Error: Queue is not initialized"); + } + + CHECK(ext.hsa_amd_queue_set_priority_fn) << "no hsa_amd_queue_set_priority_fn in api table"; + ext.hsa_amd_queue_set_priority_fn(m_profile_queue, HSA_AMD_QUEUE_PRIORITY_HIGH); +} + AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent, hsa_agent_t hsa_agent, size_t index, hsa_agent_t nearest_cpu, - const AmdExtTable& ext_table) + const AmdExtTable& ext_table, + const CoreApiTable& api) : m_rocp_agent{rocp_agent} , m_index{index} , m_hsa_agent{hsa_agent} @@ -139,6 +178,7 @@ AgentCache::AgentCache(const rocprofiler_agent_t* rocp_agent, { init_cpu_pool(ext_table, *this); init_gpu_pool(ext_table, *this); + init_agent_profile_queue(api, ext_table); } catch(std::runtime_error& e) { ROCP_WARNING << fmt::format( diff --git a/source/lib/rocprofiler-sdk/hsa/agent_cache.hpp b/source/lib/rocprofiler-sdk/hsa/agent_cache.hpp index 2ae4936fe1..cced1c8abc 100644 --- a/source/lib/rocprofiler-sdk/hsa/agent_cache.hpp +++ b/source/lib/rocprofiler-sdk/hsa/agent_cache.hpp @@ -52,7 +52,8 @@ public: hsa_agent_t hsa_agent, size_t index, hsa_agent_t nearest_cpu, - const AmdExtTable& ext_table); + const AmdExtTable& ext_table, + const CoreApiTable& api); ~AgentCache() = default; AgentCache(const AgentCache&) = default; AgentCache(AgentCache&&) noexcept = default; @@ -67,10 +68,12 @@ public: CONST_NONCONST_ACCESSOR(hsa_agent_t, get_hsa_agent, m_hsa_agent); CONST_NONCONST_ACCESSOR(hsa_agent_t, near_cpu, m_nearest_cpu); + hsa_queue_t* profile_queue() const { return m_profile_queue; } const rocprofiler_agent_t* get_rocp_agent() const { return m_rocp_agent; } std::string_view name() const { return m_name; } size_t index() const { return m_index; } + void init_agent_profile_queue(const CoreApiTable& api, const AmdExtTable& ext) const; bool operator==(const rocprofiler_agent_t*) const; bool operator==(hsa_agent_t) const; @@ -88,7 +91,8 @@ private: hsa_amd_memory_pool_t m_kernarg_pool{.handle = 0}; hsa_amd_memory_pool_t m_gpu_pool{.handle = 0}; - std::string_view m_name = {}; + std::string_view m_name = {}; + mutable hsa_queue_t* m_profile_queue = {}; }; inline bool diff --git a/source/lib/rocprofiler-sdk/registration.cpp b/source/lib/rocprofiler-sdk/registration.cpp index 280261a0cb..d49be4ec6e 100644 --- a/source/lib/rocprofiler-sdk/registration.cpp +++ b/source/lib/rocprofiler-sdk/registration.cpp @@ -748,6 +748,9 @@ rocprofiler_set_api_table(const char* name, // need to construct agent mappings before initializing the queue controller rocprofiler::agent::construct_agent_cache(hsa_api_table); rocprofiler::hsa::queue_controller_init(hsa_api_table); + // Process agent ctx's that were started prior to HSA init + rocprofiler::counters::agent_profile_hsa_registration(); + rocprofiler::hsa::async_copy_init(hsa_api_table, lib_instance); rocprofiler::code_object::initialize(hsa_api_table); diff --git a/source/lib/rocprofiler-sdk/rocprofiler.cpp b/source/lib/rocprofiler-sdk/rocprofiler.cpp index 4981d7c7aa..13a2da9ec8 100644 --- a/source/lib/rocprofiler-sdk/rocprofiler.cpp +++ b/source/lib/rocprofiler-sdk/rocprofiler.cpp @@ -96,7 +96,17 @@ ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_PROFILE_NOT_FOUND, ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_DISPATCH_CONFLICT, "Cannot have both an agent counter collection and a dispatch counter " "in the same context") - +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_INTERNAL_NO_AGENT_CONTEXT, + "No context has agent profiling enabled, " + "error generally not returned to tools") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_SAMPLE_RATE_EXCEEDED, + "A sample is in progress and a new sample cannot be started") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NO_PROFILE_QUEUE, + "No profile queue is available for this agent") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NO_HARDWARE_COUNTERS, + "Counter set does not include any hardware counters") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_MISMATCH, + "Counter profile agent does not match the agent in the context") template const char* get_status_name(rocprofiler_status_t status, std::index_sequence)