diff --git a/projects/rocprofiler-sdk/samples/counter_collection/client.cpp b/projects/rocprofiler-sdk/samples/counter_collection/client.cpp index c8b54cfd8f..3b12162b1b 100644 --- a/projects/rocprofiler-sdk/samples/counter_collection/client.cpp +++ b/projects/rocprofiler-sdk/samples/counter_collection/client.cpp @@ -121,6 +121,12 @@ buffered_callback(rocprofiler_context_id_t, *output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n"; } +std::unordered_map& +get_profile_cache() +{ + static std::unordered_map profile_cache; + return profile_cache; +} /** * Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue. * rocprofiler_profile_config_id_t* is a return to specify what counters to collect @@ -140,12 +146,9 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, * set for the agent. If we have, return it. Otherwise, construct a new profile counter * set. */ - static std::shared_mutex m_mutex = {}; - static std::unordered_map profile_cache = {}; - auto search_cache = [&]() { - if(auto pos = profile_cache.find(dispatch_data.dispatch_info.agent_id.handle); - pos != profile_cache.end()) + if(auto pos = get_profile_cache().find(dispatch_data.dispatch_info.agent_id.handle); + pos != get_profile_cache().end()) { *config = pos->second; return true; @@ -153,22 +156,21 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, return false; }; + if(!search_cache()) { - auto rlock = std::shared_lock{m_mutex}; - if(search_cache()) return; + std::cerr << "No profile for agent found in cache\n"; + exit(-1); } +} - auto wlock = std::unique_lock{m_mutex}; - if(search_cache()) return; - - // Counters we want to collect (here its SQ_WAVES) - std::set counters_to_collect = {"SQ_WAVES"}; - // GPU Counter IDs +rocprofiler_profile_config_id_t +build_profile_for_agent(rocprofiler_agent_id_t agent) +{ + std::set counters_to_collect = {"SQ_WAVES"}; std::vector gpu_counters; - // Iterate through the agents and get the counters available on that agent ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( - dispatch_data.dispatch_info.agent_id, + agent, [](rocprofiler_agent_id_t, rocprofiler_counter_id_t* counters, size_t num_counters, @@ -185,7 +187,6 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, "Could not fetch supported counters"); std::vector collect_counters; - // Look for the counters contained in counters_to_collect in gpu_counters for(auto& counter : gpu_counters) { rocprofiler_counter_info_v0_t version; @@ -200,17 +201,12 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, } } - // Create a colleciton profile for the counters rocprofiler_profile_config_id_t profile; - ROCPROFILER_CALL(rocprofiler_create_profile_config(dispatch_data.dispatch_info.agent_id, - collect_counters.data(), - collect_counters.size(), - &profile), + ROCPROFILER_CALL(rocprofiler_create_profile_config( + agent, collect_counters.data(), collect_counters.size(), &profile), "Could not construct profile cfg"); - profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile); - // Return the profile to collect those counters for this dispatch - *config = profile; + return profile; } int @@ -227,6 +223,41 @@ tool_init(rocprofiler_client_finalize_t, void* user_data) &get_buffer()), "buffer creation failed"); + std::vector agents; + rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver, + const void** agents_arr, + size_t num_agents, + void* udata) { + if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0) + throw std::runtime_error{"unexpected rocprofiler agent version"}; + auto* agents_v = static_cast*>(udata); + for(size_t i = 0; i < num_agents; ++i) + agents_v->emplace_back(*static_cast(agents_arr[i])); + return ROCPROFILER_STATUS_SUCCESS; + }; + + ROCPROFILER_CALL( + rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0, + iterate_cb, + sizeof(rocprofiler_agent_t), + const_cast(static_cast(&agents))), + "query available agents"); + + // Construct the profiles in advance for each agent that is a GPU + for(const auto& agent : agents) + { + if(agent.type == ROCPROFILER_AGENT_TYPE_GPU) + { + get_profile_cache().emplace(agent.id.handle, build_profile_for_agent(agent.id)); + } + } + + if(agents.empty()) + { + std::cerr << "No agents found" << std::endl; + return 1; + } + auto client_thread = rocprofiler_callback_thread_t{}; ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread), "failure creating callback thread"); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp index f00b8a51ce..d5afd134c9 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.cpp @@ -697,6 +697,47 @@ get_agent(rocprofiler_agent_id_t id) return nullptr; } +const std::vector& +get_aql_handles() +{ + static std::vector _v = []() { + std::vector agent_handles; + for(auto& agent : get_agents()) + { + aqlprofile_agent_info_t agent_info = { + .agent_gfxip = agent->name, + .xcc_num = agent->num_xcc, + .se_num = agent->num_shader_banks, + .cu_num = agent->cu_count, + .shader_arrays_per_se = agent->simd_arrays_per_engine}; + aqlprofile_agent_handle_t handle = {.handle = 0}; + if(aqlprofile_register_agent(&handle, &agent_info) != HSA_STATUS_SUCCESS) + { + ROCP_WARNING << "Failed to register agent " << agent->name; + } + agent_handles.push_back(handle); + } + return agent_handles; + }(); + + return _v; +} + +const aqlprofile_agent_handle_t* +get_aql_agent(rocprofiler_agent_id_t id) +{ + size_t pos = 0; + for(const auto& itr : get_agents()) + { + if(itr && itr->id.handle == id.handle) + { + return &get_aql_handles().at(pos); + } + pos++; + } + return nullptr; +} + void construct_agent_cache(::HsaApiTable* table) { @@ -916,15 +957,15 @@ get_rocprofiler_agent(hsa_agent_t agent) return nullptr; } -std::optional +const hsa::AgentCache* get_agent_cache(const rocprofiler_agent_t* agent) { for(const auto& itr : get_agent_caches()) { - if(itr == agent) return itr; + if(itr == agent) return &itr; } - return std::nullopt; + return nullptr; } std::optional diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.hpp index b01132ebfe..e77c061e62 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/agent.hpp @@ -24,6 +24,7 @@ #include +#include "lib/rocprofiler-sdk/aql/aql_profile_v2.h" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" #include @@ -51,7 +52,7 @@ get_hsa_agent(const rocprofiler_agent_t* agent); const rocprofiler_agent_t* get_rocprofiler_agent(hsa_agent_t agent); -std::optional +const hsa::AgentCache* get_agent_cache(const rocprofiler_agent_t* agent); std::optional @@ -66,6 +67,9 @@ get_agent_cache(hsa_agent_t agent); std::unordered_set& get_agent_available_properties(); +const aqlprofile_agent_handle_t* +get_aql_agent(rocprofiler_agent_id_t id); + void construct_agent_cache(::HsaApiTable* table); } // namespace agent diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/aql_profile_v2.h b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/aql_profile_v2.h index 2d6a6da1e6..22613115d0 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/aql_profile_v2.h +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/aql_profile_v2.h @@ -1,34 +1,11 @@ -// MIT License -// -// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - #pragma once #include #include -#ifdef __cplusplus -extern "C" { -#endif +#define PUBLIC_API +extern "C" { typedef struct { uint64_t handle; @@ -149,7 +126,7 @@ typedef struct * @retval HSA_STATUS_SUCCESS registration ok * @retval HSA_STATUS_ERROR registration failed */ -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id, const aqlprofile_agent_info_t* agent_info); @@ -179,7 +156,7 @@ typedef enum // counters disable command buffer } aqlprofile_pmc_info_type_t; -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile, aqlprofile_pmc_info_type_t attribute, void* value); @@ -244,7 +221,7 @@ typedef hsa_status_t (*aqlprofile_memory_copy_t)(void* dst, * @retval HSA_STATUS_SUCCESS if the event was validated. * @retval HSA_STATUS_ERROR if the event was not validated. */ -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent, const aqlprofile_pmc_event_t* event, bool* result); @@ -258,7 +235,7 @@ aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent, * @retval HSA_STATUS_ERROR if some callback returns an error * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given */ -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle, aqlprofile_pmc_data_callback_t callback, void* userdata); @@ -282,7 +259,7 @@ typedef struct * @param[in] dealloc_cb Function to free memory allocated by alloc_cb * @param[in] userdata Data passed back to user via memory alloc callback */ -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets, aqlprofile_pmc_profile_t profile, @@ -295,7 +272,7 @@ aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle, * @brief Function to delete AQL packets after creation by aqlprofile_pmc_create_packets * @param[in] handle Returned by aqlprofile_pmc_create_packets() */ -void +PUBLIC_API void aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle); /** @@ -307,7 +284,7 @@ aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle); * @retval HSA_STATUS_ERROR if some callback returns an error * @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given */ -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_att_iterate_data(aqlprofile_handle_t handle, aqlprofile_att_data_callback_t callback, void* userdata); @@ -330,7 +307,7 @@ typedef struct * @retval HSA_STATUS_SUCCESS if all packets created succesfully * @retval HSA_STATUS_ERROR otherwise */ -hsa_status_t +PUBLIC_API hsa_status_t aqlprofile_att_create_packets(aqlprofile_handle_t* handle, aqlprofile_att_control_aql_packets_t* packets, aqlprofile_att_profile_t profile, @@ -339,7 +316,7 @@ aqlprofile_att_create_packets(aqlprofile_handle_t* handle, aqlprofile_memory_copy_t memcpy_cb, void* userdata); -void +PUBLIC_API void aqlprofile_att_delete_packets(aqlprofile_handle_t handle); /** @@ -476,21 +453,20 @@ enum WaveTrapStatus TRAP_STANDBY = 2 }; -typedef struct +struct __attribute__((packed)) pcinfo_t { size_t addr; - size_t marker_id; -} pcinfo_t; + int marker_id; +}; typedef struct __attribute__((packed)) { - uint64_t category : 8; - uint64_t hitcount : 56; - uint64_t latency; pcinfo_t pc; + int hitcount; + size_t latency; } att_trace_event_t; -typedef struct +struct wave_data_t { uint8_t simd; uint8_t wave_id; @@ -498,42 +474,65 @@ typedef struct uint8_t reserved; // VMEM Pipeline: instrs and stalls - int num_vmem_instrs; - int num_vmem_stalls; + int num_vmem_instrs = 0; + int num_vmem_stalls = 0; // FLAT instrs and stalls - int num_flat_instrs; - int num_flat_stalls; + int num_flat_instrs = 0; + int num_flat_stalls = 0; // LDS instr and stalls - int num_lds_instrs; - int num_lds_stalls; + int num_lds_instrs = 0; + int num_lds_stalls = 0; // SCA instrs stalls - int num_salu_instrs; - int num_smem_instrs; - int num_salu_stalls; - int num_smem_stalls; + int num_salu_instrs = 0; + int num_smem_instrs = 0; + int num_salu_stalls = 0; + int num_smem_stalls = 0; // Branch - int num_branch_instrs; - int num_branch_taken_instrs; - int num_branch_stalls; + int num_branch_instrs = 0; + int num_branch_taken_instrs = 0; + int num_branch_stalls = 0; // total VMEM/FLAT/LDS/SMEM instructions issued - int num_mem_instrs; // total issued memory instructions - int num_valu_stalls; - size_t num_valu_instrs; - size_t num_issued_instrs; // total issued instructions (compute + memory) + int num_mem_instrs = 0; // total issued memory instructions + int num_valu_stalls = 0; + size_t num_valu_instrs = 0; + size_t num_issued_instrs = 0; // total issued instructions (compute + memory) - int64_t begin_time; // Begin and end cycle - int64_t end_time; - int64_t traceID; + int64_t begin_time = 0; // Begin and end cycle + int64_t end_time = 0; + int64_t traceID = -1; - size_t timeline_size; - size_t instructions_size; + size_t timeline_size = 0; + size_t instructions_size = 0; wave_state_t* timeline_array; wave_instruction_t* instructions_array; -} wave_data_t; +}; + +/** + * @brief Callback for iteration of all possible event coordinate IDs and coordinate names. + * @param [in] id Integer identifying type ID. + * @param [in] name Name of the trace type. + * @param [in] userdata User data supplied to back caller + * @retval HSA_STATUS_SUCCESS Continues iteration + * @retval OTHERS Any other HSA return values stops iteration, passing back this value through + * @ref aqlprofile_iterate_trace_type_ids + */ +typedef hsa_status_t (*aqlprofile_att_tracename_callback_t)(int id, const char* name, void* data); + +/** + * @brief Iterate over all possible event coordinate IDs and their names. + * @param [in] callback Callback to use for iteration of trace types + * @param [in] userdata Data to supply to callback @ref aqlprofile_tracename_callback_t + * @retval HSA_STATUS_SUCCESS if successful + * @retval HSA_STATUS_ERROR if error on interation + * @retval OTHERS If @ref aqlprofile_eventname_callback_t returns non-HSA_STATUS_SUCCESS, + * that value is returned. + */ +PUBLIC_API hsa_status_t +aqlprofile_att_iterate_trace_type_ids(aqlprofile_att_tracename_callback_t callback, void* userdata); /** * @brief Callback for rocprofiler to return ISA to aqlprofile ATT parser. @@ -561,7 +560,7 @@ typedef hsa_status_t (*aqlprofile_att_isa_callback_t)(char* isa_instruction, uint64_t* isa_memory_size, uint64_t* isa_size, uint64_t* source_size, - uint64_t marker_id, + uint32_t marker_id, uint64_t offset, void* userdata); @@ -604,11 +603,11 @@ typedef uint64_t (*aqlprofile_att_se_data_callback_t)(int* shader_engine_id /** * @brief Callback returning from aqlprofile_att_parser_iterate_event_list * @param[in] trace_event_id ID of the event. - * @param[in] trace_event_metadata Null-terminated string, entries separated by ';' + * @param[in] trace_event_name Event name. * @param[in] userdata userdata. */ typedef void (*aqlprofile_att_parser_iterate_event_cb_t)(int trace_event_id, - const char* trace_event_metadata, + const char* trace_event_name, void* userdata); /** @@ -616,7 +615,7 @@ typedef void (*aqlprofile_att_parser_iterate_event_cb_t)(int trace_event * @param[in] callback Callback where events are returned to. * @param[in] userdata userdata. */ -void +hsa_status_t aqlprofile_att_parser_iterate_event_list(aqlprofile_att_parser_iterate_event_cb_t callback, void* userdata); @@ -635,17 +634,17 @@ aqlprofile_att_parse_data(aqlprofile_att_se_data_callback_t se_data_callback, void* userdata); /** - * @brief Contains flags for how code objects are interpreted + * @brief Contains information of code objects. IDs can be reused for different load addresses. */ typedef union { + uint32_t raw; struct { uint32_t isUnload : 1; // 0 if code object is being loaded, 1 for unload uint32_t bFromStart : 1; // Has this code object been loaded before thread trace started? - uint32_t legacy_id : 30; // Legacy code object ID, if it fits in 30 bits. + uint32_t id : 30; // To be passed back to isa_string_callback in marker_id }; - uint32_t raw; } aqlprofile_att_header_marker_t; /** @@ -653,7 +652,6 @@ typedef union * @param[out] packets Returned packet * @param[in] handle The handle created from aqlprofile_att_create_packets() * @param[in] header Header containing code object information created from profiler - * @param[in] id To be passed back to isa_string_callback in marker_id * @param[in] addr Code object loaded address. * @param[in] size Code object loaded size. */ @@ -661,10 +659,6 @@ hsa_status_t aqlprofile_att_codeobj_load_marker(hsa_ext_amd_aql_pm4_packet_t* packets, aqlprofile_handle_t handle, aqlprofile_att_header_marker_t header, - uint64_t id, uint64_t addr, uint64_t size); - -#ifdef __cplusplus } -#endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.cpp index b5745d3260..26fdcdd8c7 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.cpp @@ -36,12 +36,12 @@ namespace rocprofiler namespace aql { hsa_ven_amd_aqlprofile_id_query_t -get_query_info(hsa_agent_t agent, const counters::Metric& metric) +get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric) { - hsa_ven_amd_aqlprofile_profile_t profile{.agent = agent}; + auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent)); + aqlprofile_pmc_profile_t profile{.agent = aql_agent}; hsa_ven_amd_aqlprofile_id_query_t query = {metric.block().c_str(), 0, 0}; - if(hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query) != - HSA_STATUS_SUCCESS) + if(aqlprofile_get_pmc_info(&profile, AQLPROFILE_INFO_BLOCK_ID, &query) != HSA_STATUS_SUCCESS) { ROCP_DFATAL << fmt::format("AQL failed to query info for counter {}", metric); throw std::runtime_error(fmt::format("AQL failed to query info for counter {}", metric)); @@ -50,16 +50,13 @@ get_query_info(hsa_agent_t agent, const counters::Metric& metric) } uint32_t -get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event) +get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& event) { - hsa_ven_amd_aqlprofile_profile_t query = {.agent = agent, - .type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, - .events = &event, - .event_count = 1}; - uint32_t max_block_counters = 0; - if(hsa_ven_amd_aqlprofile_get_info(&query, - HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, - &max_block_counters) != HSA_STATUS_SUCCESS) + auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent)); + aqlprofile_pmc_profile_t query = {.agent = aql_agent, .events = &event, .event_count = 1}; + uint32_t max_block_counters = 0; + if(aqlprofile_get_pmc_info(&query, AQLPROFILE_INFO_BLOCK_COUNTERS, &max_block_counters) != + HSA_STATUS_SUCCESS) { throw std::runtime_error(fmt::format("AQL failed to max block info for counter {}", static_cast(event.block_name))); @@ -94,10 +91,10 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id, } rocprofiler_status_t -get_dim_info(hsa_agent_t agent, - hsa_ven_amd_aqlprofile_event_t event, - uint32_t sample_id, - std::map& dims) +get_dim_info(rocprofiler_agent_id_t agent, + aqlprofile_pmc_event_t event, + uint32_t sample_id, + std::map& dims) { auto callback = [](int, int id, int extent, int, const char*, void* userdata) -> hsa_status_t { auto& map = *static_cast*>(userdata); @@ -105,8 +102,10 @@ get_dim_info(hsa_agent_t agent, return HSA_STATUS_SUCCESS; }; - if(hsa_ven_amd_aqlprofile_iterate_event_coord( - agent, event, sample_id, callback, static_cast(&dims)) != HSA_STATUS_SUCCESS) + auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent)); + + if(aqlprofile_iterate_event_coord( + aql_agent, event, sample_id, callback, static_cast(&dims)) != HSA_STATUS_SUCCESS) { return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD; } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.hpp index 44df378086..bbaa104e48 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/helpers.hpp @@ -30,6 +30,7 @@ #include +#include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/counters/metrics.hpp" namespace rocprofiler @@ -38,18 +39,18 @@ namespace aql { // Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID from aqlprofile hsa_ven_amd_aqlprofile_id_query_t -get_query_info(hsa_agent_t agent, const counters::Metric& metric); +get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric); // Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS from aqlprofiler uint32_t -get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event); +get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& event); // Query dimimension ids for counter event. Returns AQLProfiler ID -> extent rocprofiler_status_t -get_dim_info(hsa_agent_t agent, - hsa_ven_amd_aqlprofile_event_t event, - uint32_t sample_id, - std::map& dims); +get_dim_info(rocprofiler_agent_id_t agent, + aqlprofile_pmc_event_t event, + uint32_t sample_id, + std::map& dims); // Set dimension ids into id for sample rocprofiler_status_t diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp index 32a380cd4a..c76db36d93 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "lib/rocprofiler-sdk/aql/packet_construct.hpp" +#include "lib/rocprofiler-sdk/hsa/details/fmt.hpp" #include #include @@ -40,7 +41,7 @@ namespace rocprofiler { namespace aql { -CounterPacketConstruct::CounterPacketConstruct(const hsa::AgentCache& agent, +CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t agent, const std::vector& metrics) : _agent(agent) { @@ -48,21 +49,39 @@ CounterPacketConstruct::CounterPacketConstruct(const hsa::AgentCache& // for the counter. for(const auto& x : metrics) { - auto query_info = get_query_info(_agent.get_hsa_agent(), x); + auto query_info = get_query_info(_agent, x); _metrics.emplace_back().metric = x; uint32_t event_id = std::atoi(x.event().c_str()); + + ROCP_TRACE << fmt::format( + "Fetching events for counter {} (id={}, instance_count={}) on agent {} (name:{})", + x.name(), + event_id, + query_info.instance_count, + agent.handle, + rocprofiler::agent::get_agent(agent)->name); + for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index) { _metrics.back().instances.push_back( {static_cast(query_info.id), block_index, event_id}); + + _metrics.back().events.push_back( + {.block_index = block_index, + .event_id = event_id, + .flags = aqlprofile_pmc_event_flags_t{0}, + .block_name = static_cast(query_info.id)}); + bool validate_event_result; + + auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent)); + LOG_IF(FATAL, - hsa_ven_amd_aqlprofile_validate_event(_agent.get_hsa_agent(), - &_metrics.back().instances.back(), - &validate_event_result) != - HSA_STATUS_SUCCESS); + aqlprofile_validate_pmc_event(aql_agent, + &_metrics.back().events.back(), + &validate_event_result) != HSA_STATUS_SUCCESS); LOG_IF(FATAL, !validate_event_result) << "Invalid Metric: " << block_index << " " << event_id; _event_to_metric[std::make_tuple( @@ -84,12 +103,20 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext) auto& pkt = *pkt_ptr; if(_events.empty()) { + ROCP_TRACE << "No events for pkt"; return pkt_ptr; } pkt.empty = false; + const auto* agent_cache = + rocprofiler::agent::get_agent_cache(CHECK_NOTNULL(rocprofiler::agent::get_agent(_agent))); + if(!agent_cache) + { + ROCP_FATAL << "No agent cache for agent id: " << _agent.handle; + } + pkt.profile = hsa_ven_amd_aqlprofile_profile_t{ - _agent.get_hsa_agent(), + agent_cache->get_hsa_agent(), HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM? _events.data(), static_cast(_events.size()), @@ -100,8 +127,8 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext) auto& profile = pkt.profile; hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED; - ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_hsa_agent(), - _agent.kernarg_pool(), + ext.hsa_amd_agent_memory_pool_get_info_fn(agent_cache->get_hsa_agent(), + agent_cache->kernarg_pool(), HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, static_cast(&_access)); // Memory is accessable by both the GPU and CPU, unlock the command buffer for @@ -110,7 +137,7 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext) { throw std::runtime_error( fmt::format("Agent {} does not allow memory pool access for counter collection", - _agent.get_hsa_agent().handle)); + agent_cache->get_hsa_agent().handle)); } CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, nullptr), "could not generate packet sizes"); @@ -136,7 +163,7 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext) else { CHECK(*mem_loc); - hsa_agent_t agent = _agent.get_hsa_agent(); + hsa_agent_t agent = agent_cache->get_hsa_agent(); // Memory is accessable by both the GPU and CPU, unlock the command buffer for // sharing. LOG_IF(FATAL, @@ -149,9 +176,9 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext) // Build command and output buffers pkt.command_buf_mallocd = alloc_and_check( - _agent.cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size); + agent_cache->cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size); pkt.output_buffer_malloced = alloc_and_check( - _agent.kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size); + agent_cache->kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size); memset(profile.output_buffer.ptr, 0x0, profile.output_buffer.size); CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, &pkt.start), "failed to create start packet"); @@ -160,6 +187,13 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext) pkt.start.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; pkt.stop.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; pkt.read.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE; + ROCP_TRACE << fmt::format("Following Packets Generated (output_buffer={}, output_size={}). " + "Start Pkt: {}, Read Pkt: {}, Stop Pkt: {}", + profile.output_buffer.ptr, + profile.output_buffer.size, + pkt.start, + pkt.read, + pkt.stop); return pkt_ptr; } @@ -243,14 +277,14 @@ CounterPacketConstruct::event_to_metric(const hsa_ven_amd_aqlprofile_event_t& ev return nullptr; } -const std::vector& +const std::vector& CounterPacketConstruct::get_counter_events(const counters::Metric& metric) const { for(const auto& prof_metric : _metrics) { if(prof_metric.metric.id() == metric.id()) { - return prof_metric.instances; + return prof_metric.events; } } throw std::runtime_error(fmt::format("Cannot Find Events for {}", metric)); @@ -264,15 +298,14 @@ CounterPacketConstruct::can_collect() std::map, int64_t> max_allowed; for(auto& metric : _metrics) { - for(auto& instance : metric.instances) + for(auto& instance : metric.events) { auto block_pair = std::make_pair(instance.block_name, instance.block_index); auto [iter, inserted] = counter_count.emplace(block_pair, 0); iter->second++; if(inserted) { - max_allowed.emplace(block_pair, - get_block_counters(_agent.get_hsa_agent(), instance)); + max_allowed.emplace(block_pair, get_block_counters(_agent, instance)); } } } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.hpp index f2d944c819..ad080b41f1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/packet_construct.hpp @@ -52,19 +52,17 @@ namespace aql class CounterPacketConstruct { public: - CounterPacketConstruct(const hsa::AgentCache& agent, + CounterPacketConstruct(rocprofiler_agent_id_t agent, const std::vector& metrics); std::unique_ptr construct_packet(const AmdExtTable&); const counters::Metric* event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const; std::vector get_all_events() const; - hsa_agent_t hsa_agent() const { return _agent.get_hsa_agent(); } + const std::vector& get_counter_events(const counters::Metric&) const; - const std::vector& get_counter_events( - const counters::Metric&) const; + rocprofiler_agent_id_t agent() const { return _agent; } private: - const hsa::AgentCache& _agent; static constexpr size_t MEM_PAGE_ALIGN = 0x1000; static constexpr size_t MEM_PAGE_MASK = MEM_PAGE_ALIGN - 1; static size_t getPageAligned(size_t p) { return (p + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; } @@ -74,10 +72,12 @@ protected: { counters::Metric metric; std::vector instances; + std::vector events; }; void can_collect(); + rocprofiler_agent_id_t _agent; std::vector _metrics; std::vector _events; std::map, counters::Metric> diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/CMakeLists.txt index 317a9cb316..3600354a5a 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/CMakeLists.txt @@ -2,7 +2,7 @@ rocprofiler_deactivate_clang_tidy() include(GoogleTest) -set(ROCPROFILER_LIB_AQL_TEST_SOURCES "aql_test.cpp") +set(ROCPROFILER_LIB_AQL_TEST_SOURCES "aql_test.cpp" "helpers.cpp") add_executable(aql-test) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp index 5f5179cb40..3b3d92e39d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp @@ -122,7 +122,7 @@ TEST(aql_profile, construct_packets) LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle); auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"}); ASSERT_EQ(metrics.size(), 1); - CounterPacketConstruct(agent, metrics); + CounterPacketConstruct(agent.get_rocp_agent()->id, metrics); } hsa_shut_down(); } @@ -142,7 +142,7 @@ TEST(aql_profile, too_many_counters) { try { - CounterPacketConstruct(agent, metrics); + CounterPacketConstruct(agent.get_rocp_agent()->id, metrics); } catch(const std::exception& e) { EXPECT_NE(e.what(), nullptr) << e.what(); @@ -164,7 +164,7 @@ TEST(aql_profile, packet_generation_single) for(const auto& [_, agent] : agents) { auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"}); - CounterPacketConstruct pkt(agent, metrics); + CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics); auto test_pkt = pkt.construct_packet(rocprofiler::get_ext_table()); EXPECT_TRUE(test_pkt); } @@ -183,7 +183,7 @@ TEST(aql_profile, packet_generation_multi) { auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES", "TA_FLAT_READ_WAVEFRONTS"}); - CounterPacketConstruct pkt(agent, metrics); + CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics); auto test_pkt = pkt.construct_packet(rocprofiler::get_ext_table()); EXPECT_TRUE(test_pkt); } diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/helpers.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/helpers.cpp new file mode 100644 index 0000000000..47ec458eb1 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/aql/tests/helpers.cpp @@ -0,0 +1,357 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "lib/rocprofiler-sdk/agent.hpp" +#include "lib/rocprofiler-sdk/aql/helpers.hpp" +#include "lib/rocprofiler-sdk/aql/packet_construct.hpp" +#include "lib/rocprofiler-sdk/counters/id_decode.hpp" +#include "lib/rocprofiler-sdk/counters/metrics.hpp" +#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" +#include "lib/rocprofiler-sdk/hsa/queue.hpp" +#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" + +using namespace rocprofiler; + +namespace +{ +AmdExtTable& +get_ext_table() +{ + static auto _v = []() { + auto val = AmdExtTable{}; + val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info; + val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools; + val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate; + val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free; + val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info; + val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access; + return val; + }(); + return _v; +} + +CoreApiTable& +get_api_table() +{ + static auto _v = []() { + auto val = CoreApiTable{}; + val.hsa_iterate_agents_fn = hsa_iterate_agents; + val.hsa_agent_get_info_fn = hsa_agent_get_info; + val.hsa_queue_create_fn = hsa_queue_create; + val.hsa_queue_destroy_fn = hsa_queue_destroy; + val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed; + return val; + }(); + return _v; +} + +auto +findDeviceMetrics(const rocprofiler_agent_t& agent, const std::unordered_set& metrics) +{ + std::vector ret; + auto all_counters = counters::getBaseHardwareMetrics(); + + ROCP_ERROR << "Looking up counters for " << std::string(agent.name); + + auto gfx_metrics = common::get_val(all_counters, std::string(agent.name)); + if(!gfx_metrics) + { + ROCP_ERROR << "No counters found for " << std::string(agent.name); + return ret; + } + + for(auto& counter : *gfx_metrics) + { + if((metrics.count(counter.name()) > 0 || metrics.empty()) && !counter.block().empty()) + { + ret.push_back(counter); + } + } + return ret; +} + +hsa_ven_amd_aqlprofile_id_query_t +v1_get_query_info(hsa_agent_t agent, const counters::Metric& metric) +{ + hsa_ven_amd_aqlprofile_profile_t profile{.agent = agent}; + hsa_ven_amd_aqlprofile_id_query_t query = {metric.block().c_str(), 0, 0}; + if(hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query) != + HSA_STATUS_SUCCESS) + { + DLOG(FATAL) << fmt::format("AQL failed to query info for counter {}", metric); + throw std::runtime_error(fmt::format("AQL failed to query info for counter {}", metric)); + } + return query; +} + +uint32_t +v1_get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event) +{ + hsa_ven_amd_aqlprofile_profile_t query = {.agent = agent, + .type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, + .events = &event, + .event_count = 1}; + uint32_t max_block_counters = 0; + if(hsa_ven_amd_aqlprofile_get_info(&query, + HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS, + &max_block_counters) != HSA_STATUS_SUCCESS) + { + throw std::runtime_error(fmt::format("AQL failed to max block info for counter {}", + static_cast(event.block_name))); + } + return max_block_counters; +} + +rocprofiler_status_t +v1_get_dim_info(hsa_agent_t agent, + hsa_ven_amd_aqlprofile_event_t event, + uint32_t sample_id, + std::map& dims) +{ + auto callback = [](int, int id, int extent, int, const char*, void* userdata) -> hsa_status_t { + auto& map = *static_cast*>(userdata); + map.emplace(id, extent); + return HSA_STATUS_SUCCESS; + }; + + if(hsa_ven_amd_aqlprofile_iterate_event_coord( + agent, event, sample_id, callback, static_cast(&dims)) != HSA_STATUS_SUCCESS) + { + return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD; + } + + return ROCPROFILER_STATUS_SUCCESS; +} + +void +test_init() +{ + HsaApiTable table; + table.amd_ext_ = &get_ext_table(); + table.core_ = &get_api_table(); + agent::construct_agent_cache(&table); + ASSERT_TRUE(hsa::get_queue_controller() != nullptr); + hsa::get_queue_controller()->init(get_api_table(), get_ext_table()); +} +} // namespace + +TEST(aql_helpers, get_query_info) +{ + auto agents = agent::get_agents(); + ASSERT_FALSE(agents.empty()); + + for(auto agent : agents) + { + // auto aql_agent = *CHECK_NOTNULL(agent::get_aql_agent(agent->id)); + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue; + auto metrics = findDeviceMetrics(*agent, {}); + ASSERT_FALSE(metrics.empty()); + + for(auto& metric : metrics) + { + auto query = aql::get_query_info(agent->id, metric); + ROCP_INFO << fmt::format("{},{},{}", query.id, query.name, query.instance_count); + EXPECT_TRUE(query.name != nullptr); + EXPECT_TRUE(query.instance_count != 0); + EXPECT_TRUE(query.id < std::numeric_limits().max()); + } + } +} + +TEST(aql_helpers, get_query_info_compare_v1) +{ + ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS); + test_init(); + auto agents = agent::get_agents(); + + ASSERT_FALSE(agents.empty()); + + for(auto agent : agents) + { + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue; + auto metrics = findDeviceMetrics(*agent, {}); + ASSERT_FALSE(metrics.empty()); + + for(auto& metric : metrics) + { + auto query = aql::get_query_info(agent->id, metric); + auto query_v1 = + v1_get_query_info(agent::get_agent_cache(agent)->get_hsa_agent(), metric); + // v1 query with hsa_agent + + EXPECT_EQ(query.id, query_v1.id); + EXPECT_EQ(std::string(query.name), std::string(query_v1.name)); + EXPECT_EQ(query.instance_count, query_v1.instance_count); + } + } + hsa_shut_down(); +} + +TEST(aql_helpers, get_block_counters) +{ + auto agents = agent::get_agents(); + ASSERT_FALSE(agents.empty()); + + for(auto agent : agents) + { + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue; + auto metrics = findDeviceMetrics(*agent, {}); + ASSERT_FALSE(metrics.empty()); + + for(auto& metric : metrics) + { + auto query = aql::get_query_info(agent->id, metric); + for(unsigned block_index = 0; block_index < query.instance_count; ++block_index) + { + aqlprofile_pmc_event_t event = { + .block_index = block_index, + .event_id = static_cast(std::atoi(metric.event().c_str())), + .flags = aqlprofile_pmc_event_flags_t{0}, + .block_name = static_cast(query.id)}; + auto max_block_counters = aql::get_block_counters(agent->id, event); + EXPECT_GT(max_block_counters, 0); + } + } + } +} + +TEST(aql_helpers, get_block_counters_compare_v1) +{ + ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS); + test_init(); + auto agents = agent::get_agents(); + + ASSERT_FALSE(agents.empty()); + + for(auto agent : agents) + { + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue; + auto metrics = findDeviceMetrics(*agent, {}); + ASSERT_FALSE(metrics.empty()); + + for(auto& metric : metrics) + { + auto query = aql::get_query_info(agent->id, metric); + for(unsigned block_index = 0; block_index < query.instance_count; ++block_index) + { + aqlprofile_pmc_event_t event = { + .block_index = block_index, + .event_id = static_cast(std::atoi(metric.event().c_str())), + .flags = aqlprofile_pmc_event_flags_t{0}, + .block_name = static_cast(query.id)}; + + hsa_ven_amd_aqlprofile_event_t event_v1 = { + .block_name = static_cast(query.id), + .block_index = block_index, + .counter_id = static_cast(std::atoi(metric.event().c_str()))}; + EXPECT_EQ(aql::get_block_counters(agent->id, event), + v1_get_block_counters(agent::get_agent_cache(agent)->get_hsa_agent(), + event_v1)); + } + } + } + hsa_shut_down(); +} + +TEST(aql_helpers, get_dim_info) +{ + auto agents = agent::get_agents(); + ASSERT_FALSE(agents.empty()); + + for(auto agent : agents) + { + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue; + auto metrics = findDeviceMetrics(*agent, {}); + ASSERT_FALSE(metrics.empty()); + + for(auto& metric : metrics) + { + auto query = aql::get_query_info(agent->id, metric); + for(unsigned block_index = 0; block_index < query.instance_count; ++block_index) + { + aqlprofile_pmc_event_t event = { + .block_index = block_index, + .event_id = static_cast(std::atoi(metric.event().c_str())), + .flags = aqlprofile_pmc_event_flags_t{0}, + .block_name = static_cast(query.id)}; + std::map dims; + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, aql::get_dim_info(agent->id, event, 0, dims)); + EXPECT_GT(dims.size(), 0); + } + } + } +} + +TEST(aql_helpers, get_dim_info_compare_v1) +{ + ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS); + test_init(); + auto agents = agent::get_agents(); + + ASSERT_FALSE(agents.empty()); + + for(auto agent : agents) + { + if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue; + auto metrics = findDeviceMetrics(*agent, {}); + ASSERT_FALSE(metrics.empty()); + + for(auto& metric : metrics) + { + std::map dims; + std::map dims_v1; + auto query = aql::get_query_info(agent->id, metric); + for(unsigned block_index = 0; block_index < query.instance_count; ++block_index) + { + aqlprofile_pmc_event_t event = { + .block_index = block_index, + .event_id = static_cast(std::atoi(metric.event().c_str())), + .flags = aqlprofile_pmc_event_flags_t{0}, + .block_name = static_cast(query.id)}; + + hsa_ven_amd_aqlprofile_event_t event_v1 = { + .block_name = static_cast(query.id), + .block_index = block_index, + .counter_id = static_cast(std::atoi(metric.event().c_str()))}; + EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, aql::get_dim_info(agent->id, event, 0, dims)); + EXPECT_EQ( + ROCPROFILER_STATUS_SUCCESS, + v1_get_dim_info( + agent::get_agent_cache(agent)->get_hsa_agent(), event_v1, 0, dims_v1)); + EXPECT_EQ(dims.size(), dims_v1.size()); + EXPECT_EQ(dims, dims_v1); + } + } + } + hsa_shut_down(); +} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp index 4d28c490b0..ff9e857204 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters.cpp @@ -79,11 +79,6 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id, /** * @brief This call returns the number of instances specific counter contains. - * WARNING: There is a restriction on this call in the alpha/beta release - * of rocprof. This call will not return correct instance information in - * tool_init and must be called as part of the dispatch callback for accurate - * instance counting information. The reason for this restriction is that HSA - * is not yet loaded on tool_init. * * @param [in] agent rocprofiler agent * @param [in] counter_id counter id (obtained from iterate_agent_supported_counters) @@ -97,11 +92,6 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t, { *instance_count = 0; - if(rocprofiler::counters::get_dimension_cache().empty()) - { - return ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED; - } - const auto* dims = rocprofiler::common::get_val(rocprofiler::counters::get_dimension_cache(), counter_id.handle); if(!dims) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND; @@ -174,11 +164,6 @@ rocprofiler_iterate_counter_dimensions(rocprofiler_counter_id_t id, rocprofiler_available_dimensions_cb_t info_cb, void* user_data) { - if(rocprofiler::counters::get_dimension_cache().empty()) - { - return ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED; - } - const auto* dims = rocprofiler::common::get_val(rocprofiler::counters::get_dimension_cache(), id.handle); if(!dims) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp index 439db1bd50..2e6f9414e0 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/core.cpp @@ -219,7 +219,7 @@ counter_callback_info::setup_profile_config(const hsa::AgentCache& age } profile->pkt_generator = std::make_unique( - agent, + agent.get_rocp_agent()->id, std::vector{profile->reqired_hw_counters.begin(), profile->reqired_hw_counters.end()}); return ROCPROFILER_STATUS_SUCCESS; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/dimensions.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/dimensions.cpp index 1a0df8e8c8..d367219e28 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/dimensions.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/dimensions.cpp @@ -56,18 +56,17 @@ getBlockDimensions(std::string_view agent, const Metric& metric) std::vector ret; - for(const auto& [_, maybe_agent] : - CHECK_NOTNULL(hsa::get_queue_controller())->get_supported_agents()) + for(const auto* maybe_agent : rocprofiler::agent::get_agents()) { - if(maybe_agent.name() == agent) + if(std::string(maybe_agent->name) == agent) { - aql::CounterPacketConstruct pkt_gen(maybe_agent, {metric}); + aql::CounterPacketConstruct pkt_gen(maybe_agent->id, {metric}); const auto& events = pkt_gen.get_counter_events(metric); for(const auto& event : events) { std::map dims; - auto status = aql::get_dim_info(maybe_agent.get_hsa_agent(), event, 0, dims); + auto status = aql::get_dim_info(maybe_agent->id, event, 0, dims); CHECK_EQ(status, ROCPROFILER_STATUS_SUCCESS) << rocprofiler_get_status_string(status); @@ -103,16 +102,6 @@ get_dimension_cache() common::static_object>>:: construct([]() -> std::unordered_map> { std::unordered_map> dims; - /** - * Fails if HSA is not loaded by retruning nothing. This should not remain after - * AQL is transistioned away from HSA. - */ - if(CHECK_NOTNULL(rocprofiler::hsa::get_queue_controller()) - ->get_supported_agents() - .empty()) - { - return {}; - } const auto& asts = counters::get_ast_map(); for(const auto& [gfx, metrics] : asts) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp index f0a84ef4bb..3f188b573e 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/evaluate_ast.cpp @@ -456,11 +456,15 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket { std::unordered_map>* data; const aql::CounterPacketConstruct* pkt_gen; + hsa_agent_t agent; }; + auto agent = CHECK_NOTNULL(rocprofiler::agent::get_agent_cache( + CHECK_NOTNULL(rocprofiler::agent::get_agent(pkt_gen->agent())))) + ->get_hsa_agent(); std::unordered_map> ret; - if(pkt.isEmpty()) return ret; - it_data aql_data{.data = &ret, .pkt_gen = pkt_gen}; + if(pkt.empty) return ret; + it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = agent}; ; hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data( &pkt.profile, @@ -477,10 +481,8 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket auto& next_rec = vec.emplace_back(); set_counter_in_rec(next_rec.id, {.handle = metric->id()}); // Actual dimension info needs to be used here in the future - auto aql_status = aql::set_dim_id_from_sample(next_rec.id, - it.pkt_gen->hsa_agent(), - info_data->pmc_data.event, - info_data->sample_id); + auto aql_status = aql::set_dim_id_from_sample( + next_rec.id, it.agent, info_data->pmc_data.event, info_data->sample_id); CHECK_EQ(aql_status, ROCPROFILER_STATUS_SUCCESS) << rocprofiler_get_status_string(aql_status); diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/dimension.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/dimension.cpp index 7d6dc6c89a..dfd6bf7d0d 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/dimension.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/counters/tests/dimension.cpp @@ -251,12 +251,12 @@ TEST(dimension, block_dim_test) } else { - aql::CounterPacketConstruct pkt_gen(agent, {metric}); + aql::CounterPacketConstruct pkt_gen(agent.get_rocp_agent()->id, {metric}); const auto& events = pkt_gen.get_counter_events(metric); for(const auto& event : events) { std::map dims; - auto status = aql::get_dim_info(agent.get_hsa_agent(), event, 0, dims); + auto status = aql::get_dim_info(agent.get_rocp_agent()->id, event, 0, dims); CHECK_EQ(status, ROCPROFILER_STATUS_SUCCESS) << rocprofiler_get_status_string(status); for(const auto& [id, extent] : dims) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp index 286a275a40..cb50d44d85 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp @@ -62,16 +62,20 @@ public: AQLPacket(const AQLPacket&) = delete; AQLPacket& operator=(const AQLPacket&) = delete; + aqlprofile_handle_t pkt_handle = {.handle = 0}; + aqlprofile_pmc_aql_packets_t pkts = {.start_packet = null_amd_aql_pm4_packet, + .stop_packet = null_amd_aql_pm4_packet, + .read_packet = null_amd_aql_pm4_packet}; + + bool empty = {true}; hsa_ven_amd_aqlprofile_profile_t profile = {}; hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet; hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet; hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet; - common::container::small_vector before_krn_pkt = {}; common::container::small_vector after_krn_pkt = {}; bool isEmpty() const { return empty; } - bool empty = true; }; class CounterAQLPacket : public AQLPacket diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp index 03c4ec0ecb..3f300fa0bf 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue_controller.cpp @@ -191,7 +191,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table) // Generate supported agents for(const auto* itr : agents) { - auto cached_agent = agent::get_agent_cache(itr); + const auto* cached_agent = agent::get_agent_cache(itr); if(cached_agent && cached_agent->get_rocp_agent()->type == ROCPROFILER_AGENT_TYPE_GPU) { get_supported_agents().emplace(cached_agent->index(), *cached_agent);