Removal of HSA from counter collection (#697)

* Minor fix

Removal of HSA from counter collection

Tests for AQL

Updated counter collection client to build profiles in tool init

* Rebased

* Debug printing

* Formatting

* More format

* fix shadowing

---------

Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>

[ROCm/rocprofiler-sdk commit: c2f659ab5c]
This commit is contained in:
Benjamin Welton
2024-04-12 18:46:10 -07:00
committed by GitHub
parent 2aef3c3d15
commit 6f0c1958da
18 changed files with 639 additions and 199 deletions
@@ -121,6 +121,12 @@ buffered_callback(rocprofiler_context_id_t,
*output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n";
}
std::unordered_map<uint64_t, rocprofiler_profile_config_id_t>&
get_profile_cache()
{
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache;
return profile_cache;
}
/**
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
* rocprofiler_profile_config_id_t* is a return to specify what counters to collect
@@ -140,12 +146,9 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
* set.
*/
static std::shared_mutex m_mutex = {};
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
auto search_cache = [&]() {
if(auto pos = profile_cache.find(dispatch_data.dispatch_info.agent_id.handle);
pos != profile_cache.end())
if(auto pos = get_profile_cache().find(dispatch_data.dispatch_info.agent_id.handle);
pos != get_profile_cache().end())
{
*config = pos->second;
return true;
@@ -153,22 +156,21 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
return false;
};
if(!search_cache())
{
auto rlock = std::shared_lock{m_mutex};
if(search_cache()) return;
std::cerr << "No profile for agent found in cache\n";
exit(-1);
}
}
auto wlock = std::unique_lock{m_mutex};
if(search_cache()) return;
// Counters we want to collect (here its SQ_WAVES)
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
// GPU Counter IDs
rocprofiler_profile_config_id_t
build_profile_for_agent(rocprofiler_agent_id_t agent)
{
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
std::vector<rocprofiler_counter_id_t> gpu_counters;
// Iterate through the agents and get the counters available on that agent
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
dispatch_data.dispatch_info.agent_id,
agent,
[](rocprofiler_agent_id_t,
rocprofiler_counter_id_t* counters,
size_t num_counters,
@@ -185,7 +187,6 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
"Could not fetch supported counters");
std::vector<rocprofiler_counter_id_t> collect_counters;
// Look for the counters contained in counters_to_collect in gpu_counters
for(auto& counter : gpu_counters)
{
rocprofiler_counter_info_v0_t version;
@@ -200,17 +201,12 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
}
}
// Create a colleciton profile for the counters
rocprofiler_profile_config_id_t profile;
ROCPROFILER_CALL(rocprofiler_create_profile_config(dispatch_data.dispatch_info.agent_id,
collect_counters.data(),
collect_counters.size(),
&profile),
ROCPROFILER_CALL(rocprofiler_create_profile_config(
agent, collect_counters.data(), collect_counters.size(), &profile),
"Could not construct profile cfg");
profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile);
// Return the profile to collect those counters for this dispatch
*config = profile;
return profile;
}
int
@@ -227,6 +223,41 @@ tool_init(rocprofiler_client_finalize_t, void* user_data)
&get_buffer()),
"buffer creation failed");
std::vector<rocprofiler_agent_v0_t> agents;
rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver,
const void** agents_arr,
size_t num_agents,
void* udata) {
if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0)
throw std::runtime_error{"unexpected rocprofiler agent version"};
auto* agents_v = static_cast<std::vector<rocprofiler_agent_v0_t>*>(udata);
for(size_t i = 0; i < num_agents; ++i)
agents_v->emplace_back(*static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]));
return ROCPROFILER_STATUS_SUCCESS;
};
ROCPROFILER_CALL(
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
iterate_cb,
sizeof(rocprofiler_agent_t),
const_cast<void*>(static_cast<const void*>(&agents))),
"query available agents");
// Construct the profiles in advance for each agent that is a GPU
for(const auto& agent : agents)
{
if(agent.type == ROCPROFILER_AGENT_TYPE_GPU)
{
get_profile_cache().emplace(agent.id.handle, build_profile_for_agent(agent.id));
}
}
if(agents.empty())
{
std::cerr << "No agents found" << std::endl;
return 1;
}
auto client_thread = rocprofiler_callback_thread_t{};
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
"failure creating callback thread");
@@ -697,6 +697,47 @@ get_agent(rocprofiler_agent_id_t id)
return nullptr;
}
const std::vector<aqlprofile_agent_handle_t>&
get_aql_handles()
{
static std::vector<aqlprofile_agent_handle_t> _v = []() {
std::vector<aqlprofile_agent_handle_t> agent_handles;
for(auto& agent : get_agents())
{
aqlprofile_agent_info_t agent_info = {
.agent_gfxip = agent->name,
.xcc_num = agent->num_xcc,
.se_num = agent->num_shader_banks,
.cu_num = agent->cu_count,
.shader_arrays_per_se = agent->simd_arrays_per_engine};
aqlprofile_agent_handle_t handle = {.handle = 0};
if(aqlprofile_register_agent(&handle, &agent_info) != HSA_STATUS_SUCCESS)
{
ROCP_WARNING << "Failed to register agent " << agent->name;
}
agent_handles.push_back(handle);
}
return agent_handles;
}();
return _v;
}
const aqlprofile_agent_handle_t*
get_aql_agent(rocprofiler_agent_id_t id)
{
size_t pos = 0;
for(const auto& itr : get_agents())
{
if(itr && itr->id.handle == id.handle)
{
return &get_aql_handles().at(pos);
}
pos++;
}
return nullptr;
}
void
construct_agent_cache(::HsaApiTable* table)
{
@@ -916,15 +957,15 @@ get_rocprofiler_agent(hsa_agent_t agent)
return nullptr;
}
std::optional<hsa::AgentCache>
const hsa::AgentCache*
get_agent_cache(const rocprofiler_agent_t* agent)
{
for(const auto& itr : get_agent_caches())
{
if(itr == agent) return itr;
if(itr == agent) return &itr;
}
return std::nullopt;
return nullptr;
}
std::optional<hsa::AgentCache>
@@ -24,6 +24,7 @@
#include <rocprofiler-sdk/agent.h>
#include "lib/rocprofiler-sdk/aql/aql_profile_v2.h"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
#include <hsa/hsa_api_trace.h>
@@ -51,7 +52,7 @@ get_hsa_agent(const rocprofiler_agent_t* agent);
const rocprofiler_agent_t*
get_rocprofiler_agent(hsa_agent_t agent);
std::optional<hsa::AgentCache>
const hsa::AgentCache*
get_agent_cache(const rocprofiler_agent_t* agent);
std::optional<hsa::AgentCache>
@@ -66,6 +67,9 @@ get_agent_cache(hsa_agent_t agent);
std::unordered_set<std::string>&
get_agent_available_properties();
const aqlprofile_agent_handle_t*
get_aql_agent(rocprofiler_agent_id_t id);
void
construct_agent_cache(::HsaApiTable* table);
} // namespace agent
@@ -1,34 +1,11 @@
// MIT License
//
// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <hsa/hsa.h>
#include <hsa/hsa_ven_amd_aqlprofile.h>
#ifdef __cplusplus
extern "C" {
#endif
#define PUBLIC_API
extern "C" {
typedef struct
{
uint64_t handle;
@@ -149,7 +126,7 @@ typedef struct
* @retval HSA_STATUS_SUCCESS registration ok
* @retval HSA_STATUS_ERROR registration failed
*/
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
const aqlprofile_agent_info_t* agent_info);
@@ -179,7 +156,7 @@ typedef enum
// counters disable command buffer
} aqlprofile_pmc_info_type_t;
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
aqlprofile_pmc_info_type_t attribute,
void* value);
@@ -244,7 +221,7 @@ typedef hsa_status_t (*aqlprofile_memory_copy_t)(void* dst,
* @retval HSA_STATUS_SUCCESS if the event was validated.
* @retval HSA_STATUS_ERROR if the event was not validated.
*/
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
const aqlprofile_pmc_event_t* event,
bool* result);
@@ -258,7 +235,7 @@ aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
* @retval HSA_STATUS_ERROR if some callback returns an error
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
*/
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
aqlprofile_pmc_data_callback_t callback,
void* userdata);
@@ -282,7 +259,7 @@ typedef struct
* @param[in] dealloc_cb Function to free memory allocated by alloc_cb
* @param[in] userdata Data passed back to user via memory alloc callback
*/
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle,
aqlprofile_pmc_aql_packets_t* packets,
aqlprofile_pmc_profile_t profile,
@@ -295,7 +272,7 @@ aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle,
* @brief Function to delete AQL packets after creation by aqlprofile_pmc_create_packets
* @param[in] handle Returned by aqlprofile_pmc_create_packets()
*/
void
PUBLIC_API void
aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle);
/**
@@ -307,7 +284,7 @@ aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle);
* @retval HSA_STATUS_ERROR if some callback returns an error
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
*/
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
aqlprofile_att_data_callback_t callback,
void* userdata);
@@ -330,7 +307,7 @@ typedef struct
* @retval HSA_STATUS_SUCCESS if all packets created succesfully
* @retval HSA_STATUS_ERROR otherwise
*/
hsa_status_t
PUBLIC_API hsa_status_t
aqlprofile_att_create_packets(aqlprofile_handle_t* handle,
aqlprofile_att_control_aql_packets_t* packets,
aqlprofile_att_profile_t profile,
@@ -339,7 +316,7 @@ aqlprofile_att_create_packets(aqlprofile_handle_t* handle,
aqlprofile_memory_copy_t memcpy_cb,
void* userdata);
void
PUBLIC_API void
aqlprofile_att_delete_packets(aqlprofile_handle_t handle);
/**
@@ -476,21 +453,20 @@ enum WaveTrapStatus
TRAP_STANDBY = 2
};
typedef struct
struct __attribute__((packed)) pcinfo_t
{
size_t addr;
size_t marker_id;
} pcinfo_t;
int marker_id;
};
typedef struct __attribute__((packed))
{
uint64_t category : 8;
uint64_t hitcount : 56;
uint64_t latency;
pcinfo_t pc;
int hitcount;
size_t latency;
} att_trace_event_t;
typedef struct
struct wave_data_t
{
uint8_t simd;
uint8_t wave_id;
@@ -498,42 +474,65 @@ typedef struct
uint8_t reserved;
// VMEM Pipeline: instrs and stalls
int num_vmem_instrs;
int num_vmem_stalls;
int num_vmem_instrs = 0;
int num_vmem_stalls = 0;
// FLAT instrs and stalls
int num_flat_instrs;
int num_flat_stalls;
int num_flat_instrs = 0;
int num_flat_stalls = 0;
// LDS instr and stalls
int num_lds_instrs;
int num_lds_stalls;
int num_lds_instrs = 0;
int num_lds_stalls = 0;
// SCA instrs stalls
int num_salu_instrs;
int num_smem_instrs;
int num_salu_stalls;
int num_smem_stalls;
int num_salu_instrs = 0;
int num_smem_instrs = 0;
int num_salu_stalls = 0;
int num_smem_stalls = 0;
// Branch
int num_branch_instrs;
int num_branch_taken_instrs;
int num_branch_stalls;
int num_branch_instrs = 0;
int num_branch_taken_instrs = 0;
int num_branch_stalls = 0;
// total VMEM/FLAT/LDS/SMEM instructions issued
int num_mem_instrs; // total issued memory instructions
int num_valu_stalls;
size_t num_valu_instrs;
size_t num_issued_instrs; // total issued instructions (compute + memory)
int num_mem_instrs = 0; // total issued memory instructions
int num_valu_stalls = 0;
size_t num_valu_instrs = 0;
size_t num_issued_instrs = 0; // total issued instructions (compute + memory)
int64_t begin_time; // Begin and end cycle
int64_t end_time;
int64_t traceID;
int64_t begin_time = 0; // Begin and end cycle
int64_t end_time = 0;
int64_t traceID = -1;
size_t timeline_size;
size_t instructions_size;
size_t timeline_size = 0;
size_t instructions_size = 0;
wave_state_t* timeline_array;
wave_instruction_t* instructions_array;
} wave_data_t;
};
/**
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
* @param [in] id Integer identifying type ID.
* @param [in] name Name of the trace type.
* @param [in] userdata User data supplied to back caller
* @retval HSA_STATUS_SUCCESS Continues iteration
* @retval OTHERS Any other HSA return values stops iteration, passing back this value through
* @ref aqlprofile_iterate_trace_type_ids
*/
typedef hsa_status_t (*aqlprofile_att_tracename_callback_t)(int id, const char* name, void* data);
/**
* @brief Iterate over all possible event coordinate IDs and their names.
* @param [in] callback Callback to use for iteration of trace types
* @param [in] userdata Data to supply to callback @ref aqlprofile_tracename_callback_t
* @retval HSA_STATUS_SUCCESS if successful
* @retval HSA_STATUS_ERROR if error on interation
* @retval OTHERS If @ref aqlprofile_eventname_callback_t returns non-HSA_STATUS_SUCCESS,
* that value is returned.
*/
PUBLIC_API hsa_status_t
aqlprofile_att_iterate_trace_type_ids(aqlprofile_att_tracename_callback_t callback, void* userdata);
/**
* @brief Callback for rocprofiler to return ISA to aqlprofile ATT parser.
@@ -561,7 +560,7 @@ typedef hsa_status_t (*aqlprofile_att_isa_callback_t)(char* isa_instruction,
uint64_t* isa_memory_size,
uint64_t* isa_size,
uint64_t* source_size,
uint64_t marker_id,
uint32_t marker_id,
uint64_t offset,
void* userdata);
@@ -604,11 +603,11 @@ typedef uint64_t (*aqlprofile_att_se_data_callback_t)(int* shader_engine_id
/**
* @brief Callback returning from aqlprofile_att_parser_iterate_event_list
* @param[in] trace_event_id ID of the event.
* @param[in] trace_event_metadata Null-terminated string, entries separated by ';'
* @param[in] trace_event_name Event name.
* @param[in] userdata userdata.
*/
typedef void (*aqlprofile_att_parser_iterate_event_cb_t)(int trace_event_id,
const char* trace_event_metadata,
const char* trace_event_name,
void* userdata);
/**
@@ -616,7 +615,7 @@ typedef void (*aqlprofile_att_parser_iterate_event_cb_t)(int trace_event
* @param[in] callback Callback where events are returned to.
* @param[in] userdata userdata.
*/
void
hsa_status_t
aqlprofile_att_parser_iterate_event_list(aqlprofile_att_parser_iterate_event_cb_t callback,
void* userdata);
@@ -635,17 +634,17 @@ aqlprofile_att_parse_data(aqlprofile_att_se_data_callback_t se_data_callback,
void* userdata);
/**
* @brief Contains flags for how code objects are interpreted
* @brief Contains information of code objects. IDs can be reused for different load addresses.
*/
typedef union
{
uint32_t raw;
struct
{
uint32_t isUnload : 1; // 0 if code object is being loaded, 1 for unload
uint32_t bFromStart : 1; // Has this code object been loaded before thread trace started?
uint32_t legacy_id : 30; // Legacy code object ID, if it fits in 30 bits.
uint32_t id : 30; // To be passed back to isa_string_callback in marker_id
};
uint32_t raw;
} aqlprofile_att_header_marker_t;
/**
@@ -653,7 +652,6 @@ typedef union
* @param[out] packets Returned packet
* @param[in] handle The handle created from aqlprofile_att_create_packets()
* @param[in] header Header containing code object information created from profiler
* @param[in] id To be passed back to isa_string_callback in marker_id
* @param[in] addr Code object loaded address.
* @param[in] size Code object loaded size.
*/
@@ -661,10 +659,6 @@ hsa_status_t
aqlprofile_att_codeobj_load_marker(hsa_ext_amd_aql_pm4_packet_t* packets,
aqlprofile_handle_t handle,
aqlprofile_att_header_marker_t header,
uint64_t id,
uint64_t addr,
uint64_t size);
#ifdef __cplusplus
}
#endif
@@ -36,12 +36,12 @@ namespace rocprofiler
namespace aql
{
hsa_ven_amd_aqlprofile_id_query_t
get_query_info(hsa_agent_t agent, const counters::Metric& metric)
get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric)
{
hsa_ven_amd_aqlprofile_profile_t profile{.agent = agent};
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
aqlprofile_pmc_profile_t profile{.agent = aql_agent};
hsa_ven_amd_aqlprofile_id_query_t query = {metric.block().c_str(), 0, 0};
if(hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query) !=
HSA_STATUS_SUCCESS)
if(aqlprofile_get_pmc_info(&profile, AQLPROFILE_INFO_BLOCK_ID, &query) != HSA_STATUS_SUCCESS)
{
ROCP_DFATAL << fmt::format("AQL failed to query info for counter {}", metric);
throw std::runtime_error(fmt::format("AQL failed to query info for counter {}", metric));
@@ -50,16 +50,13 @@ get_query_info(hsa_agent_t agent, const counters::Metric& metric)
}
uint32_t
get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event)
get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& event)
{
hsa_ven_amd_aqlprofile_profile_t query = {.agent = agent,
.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC,
.events = &event,
.event_count = 1};
uint32_t max_block_counters = 0;
if(hsa_ven_amd_aqlprofile_get_info(&query,
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS,
&max_block_counters) != HSA_STATUS_SUCCESS)
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
aqlprofile_pmc_profile_t query = {.agent = aql_agent, .events = &event, .event_count = 1};
uint32_t max_block_counters = 0;
if(aqlprofile_get_pmc_info(&query, AQLPROFILE_INFO_BLOCK_COUNTERS, &max_block_counters) !=
HSA_STATUS_SUCCESS)
{
throw std::runtime_error(fmt::format("AQL failed to max block info for counter {}",
static_cast<int64_t>(event.block_name)));
@@ -94,10 +91,10 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
}
rocprofiler_status_t
get_dim_info(hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id,
std::map<int, uint64_t>& dims)
get_dim_info(rocprofiler_agent_id_t agent,
aqlprofile_pmc_event_t event,
uint32_t sample_id,
std::map<int, uint64_t>& dims)
{
auto callback = [](int, int id, int extent, int, const char*, void* userdata) -> hsa_status_t {
auto& map = *static_cast<std::map<int, uint64_t>*>(userdata);
@@ -105,8 +102,10 @@ get_dim_info(hsa_agent_t agent,
return HSA_STATUS_SUCCESS;
};
if(hsa_ven_amd_aqlprofile_iterate_event_coord(
agent, event, sample_id, callback, static_cast<void*>(&dims)) != HSA_STATUS_SUCCESS)
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
if(aqlprofile_iterate_event_coord(
aql_agent, event, sample_id, callback, static_cast<void*>(&dims)) != HSA_STATUS_SUCCESS)
{
return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD;
}
@@ -30,6 +30,7 @@
#include <rocprofiler-sdk/fwd.h>
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
namespace rocprofiler
@@ -38,18 +39,18 @@ namespace aql
{
// Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID from aqlprofile
hsa_ven_amd_aqlprofile_id_query_t
get_query_info(hsa_agent_t agent, const counters::Metric& metric);
get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric);
// Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS from aqlprofiler
uint32_t
get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event);
get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& event);
// Query dimimension ids for counter event. Returns AQLProfiler ID -> extent
rocprofiler_status_t
get_dim_info(hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id,
std::map<int, uint64_t>& dims);
get_dim_info(rocprofiler_agent_id_t agent,
aqlprofile_pmc_event_t event,
uint32_t sample_id,
std::map<int, uint64_t>& dims);
// Set dimension ids into id for sample
rocprofiler_status_t
@@ -21,6 +21,7 @@
// SOFTWARE.
#include "lib/rocprofiler-sdk/aql/packet_construct.hpp"
#include "lib/rocprofiler-sdk/hsa/details/fmt.hpp"
#include <fmt/core.h>
#include <hsa/hsa_ext_amd.h>
@@ -40,7 +41,7 @@ namespace rocprofiler
{
namespace aql
{
CounterPacketConstruct::CounterPacketConstruct(const hsa::AgentCache& agent,
CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t agent,
const std::vector<counters::Metric>& metrics)
: _agent(agent)
{
@@ -48,21 +49,39 @@ CounterPacketConstruct::CounterPacketConstruct(const hsa::AgentCache&
// for the counter.
for(const auto& x : metrics)
{
auto query_info = get_query_info(_agent.get_hsa_agent(), x);
auto query_info = get_query_info(_agent, x);
_metrics.emplace_back().metric = x;
uint32_t event_id = std::atoi(x.event().c_str());
ROCP_TRACE << fmt::format(
"Fetching events for counter {} (id={}, instance_count={}) on agent {} (name:{})",
x.name(),
event_id,
query_info.instance_count,
agent.handle,
rocprofiler::agent::get_agent(agent)->name);
for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index)
{
_metrics.back().instances.push_back(
{static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id),
block_index,
event_id});
_metrics.back().events.push_back(
{.block_index = block_index,
.event_id = event_id,
.flags = aqlprofile_pmc_event_flags_t{0},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id)});
bool validate_event_result;
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
LOG_IF(FATAL,
hsa_ven_amd_aqlprofile_validate_event(_agent.get_hsa_agent(),
&_metrics.back().instances.back(),
&validate_event_result) !=
HSA_STATUS_SUCCESS);
aqlprofile_validate_pmc_event(aql_agent,
&_metrics.back().events.back(),
&validate_event_result) != HSA_STATUS_SUCCESS);
LOG_IF(FATAL, !validate_event_result)
<< "Invalid Metric: " << block_index << " " << event_id;
_event_to_metric[std::make_tuple(
@@ -84,12 +103,20 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
auto& pkt = *pkt_ptr;
if(_events.empty())
{
ROCP_TRACE << "No events for pkt";
return pkt_ptr;
}
pkt.empty = false;
const auto* agent_cache =
rocprofiler::agent::get_agent_cache(CHECK_NOTNULL(rocprofiler::agent::get_agent(_agent)));
if(!agent_cache)
{
ROCP_FATAL << "No agent cache for agent id: " << _agent.handle;
}
pkt.profile = hsa_ven_amd_aqlprofile_profile_t{
_agent.get_hsa_agent(),
agent_cache->get_hsa_agent(),
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM?
_events.data(),
static_cast<uint32_t>(_events.size()),
@@ -100,8 +127,8 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
auto& profile = pkt.profile;
hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_hsa_agent(),
_agent.kernarg_pool(),
ext.hsa_amd_agent_memory_pool_get_info_fn(agent_cache->get_hsa_agent(),
agent_cache->kernarg_pool(),
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
static_cast<void*>(&_access));
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
@@ -110,7 +137,7 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
{
throw std::runtime_error(
fmt::format("Agent {} does not allow memory pool access for counter collection",
_agent.get_hsa_agent().handle));
agent_cache->get_hsa_agent().handle));
}
CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, nullptr), "could not generate packet sizes");
@@ -136,7 +163,7 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
else
{
CHECK(*mem_loc);
hsa_agent_t agent = _agent.get_hsa_agent();
hsa_agent_t agent = agent_cache->get_hsa_agent();
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
// sharing.
LOG_IF(FATAL,
@@ -149,9 +176,9 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
// Build command and output buffers
pkt.command_buf_mallocd = alloc_and_check(
_agent.cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size);
agent_cache->cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size);
pkt.output_buffer_malloced = alloc_and_check(
_agent.kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size);
agent_cache->kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size);
memset(profile.output_buffer.ptr, 0x0, profile.output_buffer.size);
CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, &pkt.start), "failed to create start packet");
@@ -160,6 +187,13 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
pkt.start.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
pkt.stop.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
pkt.read.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
ROCP_TRACE << fmt::format("Following Packets Generated (output_buffer={}, output_size={}). "
"Start Pkt: {}, Read Pkt: {}, Stop Pkt: {}",
profile.output_buffer.ptr,
profile.output_buffer.size,
pkt.start,
pkt.read,
pkt.stop);
return pkt_ptr;
}
@@ -243,14 +277,14 @@ CounterPacketConstruct::event_to_metric(const hsa_ven_amd_aqlprofile_event_t& ev
return nullptr;
}
const std::vector<hsa_ven_amd_aqlprofile_event_t>&
const std::vector<aqlprofile_pmc_event_t>&
CounterPacketConstruct::get_counter_events(const counters::Metric& metric) const
{
for(const auto& prof_metric : _metrics)
{
if(prof_metric.metric.id() == metric.id())
{
return prof_metric.instances;
return prof_metric.events;
}
}
throw std::runtime_error(fmt::format("Cannot Find Events for {}", metric));
@@ -264,15 +298,14 @@ CounterPacketConstruct::can_collect()
std::map<std::pair<hsa_ven_amd_aqlprofile_block_name_t, uint32_t>, int64_t> max_allowed;
for(auto& metric : _metrics)
{
for(auto& instance : metric.instances)
for(auto& instance : metric.events)
{
auto block_pair = std::make_pair(instance.block_name, instance.block_index);
auto [iter, inserted] = counter_count.emplace(block_pair, 0);
iter->second++;
if(inserted)
{
max_allowed.emplace(block_pair,
get_block_counters(_agent.get_hsa_agent(), instance));
max_allowed.emplace(block_pair, get_block_counters(_agent, instance));
}
}
}
@@ -52,19 +52,17 @@ namespace aql
class CounterPacketConstruct
{
public:
CounterPacketConstruct(const hsa::AgentCache& agent,
CounterPacketConstruct(rocprofiler_agent_id_t agent,
const std::vector<counters::Metric>& metrics);
std::unique_ptr<hsa::CounterAQLPacket> construct_packet(const AmdExtTable&);
const counters::Metric* event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const;
std::vector<hsa_ven_amd_aqlprofile_event_t> get_all_events() const;
hsa_agent_t hsa_agent() const { return _agent.get_hsa_agent(); }
const std::vector<aqlprofile_pmc_event_t>& get_counter_events(const counters::Metric&) const;
const std::vector<hsa_ven_amd_aqlprofile_event_t>& get_counter_events(
const counters::Metric&) const;
rocprofiler_agent_id_t agent() const { return _agent; }
private:
const hsa::AgentCache& _agent;
static constexpr size_t MEM_PAGE_ALIGN = 0x1000;
static constexpr size_t MEM_PAGE_MASK = MEM_PAGE_ALIGN - 1;
static size_t getPageAligned(size_t p) { return (p + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; }
@@ -74,10 +72,12 @@ protected:
{
counters::Metric metric;
std::vector<hsa_ven_amd_aqlprofile_event_t> instances;
std::vector<aqlprofile_pmc_event_t> events;
};
void can_collect();
rocprofiler_agent_id_t _agent;
std::vector<AQLProfileMetric> _metrics;
std::vector<hsa_ven_amd_aqlprofile_event_t> _events;
std::map<std::tuple<hsa_ven_amd_aqlprofile_block_name_t, uint32_t, uint32_t>, counters::Metric>
@@ -2,7 +2,7 @@ rocprofiler_deactivate_clang_tidy()
include(GoogleTest)
set(ROCPROFILER_LIB_AQL_TEST_SOURCES "aql_test.cpp")
set(ROCPROFILER_LIB_AQL_TEST_SOURCES "aql_test.cpp" "helpers.cpp")
add_executable(aql-test)
@@ -122,7 +122,7 @@ TEST(aql_profile, construct_packets)
LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle);
auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"});
ASSERT_EQ(metrics.size(), 1);
CounterPacketConstruct(agent, metrics);
CounterPacketConstruct(agent.get_rocp_agent()->id, metrics);
}
hsa_shut_down();
}
@@ -142,7 +142,7 @@ TEST(aql_profile, too_many_counters)
{
try
{
CounterPacketConstruct(agent, metrics);
CounterPacketConstruct(agent.get_rocp_agent()->id, metrics);
} catch(const std::exception& e)
{
EXPECT_NE(e.what(), nullptr) << e.what();
@@ -164,7 +164,7 @@ TEST(aql_profile, packet_generation_single)
for(const auto& [_, agent] : agents)
{
auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"});
CounterPacketConstruct pkt(agent, metrics);
CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics);
auto test_pkt = pkt.construct_packet(rocprofiler::get_ext_table());
EXPECT_TRUE(test_pkt);
}
@@ -183,7 +183,7 @@ TEST(aql_profile, packet_generation_multi)
{
auto metrics =
rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES", "TA_FLAT_READ_WAVEFRONTS"});
CounterPacketConstruct pkt(agent, metrics);
CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics);
auto test_pkt = pkt.construct_packet(rocprofiler::get_ext_table());
EXPECT_TRUE(test_pkt);
}
@@ -0,0 +1,357 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <functional>
#include <map>
#include <unordered_set>
#include <hsa/hsa.h>
#include <hsa/hsa_api_trace.h>
#include <hsa/hsa_ven_amd_aqlprofile.h>
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
#include "lib/rocprofiler-sdk/aql/packet_construct.hpp"
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
#include "lib/rocprofiler-sdk/hsa/queue.hpp"
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
using namespace rocprofiler;
namespace
{
AmdExtTable&
get_ext_table()
{
static auto _v = []() {
auto val = AmdExtTable{};
val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info;
val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools;
val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate;
val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free;
val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info;
val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access;
return val;
}();
return _v;
}
CoreApiTable&
get_api_table()
{
static auto _v = []() {
auto val = CoreApiTable{};
val.hsa_iterate_agents_fn = hsa_iterate_agents;
val.hsa_agent_get_info_fn = hsa_agent_get_info;
val.hsa_queue_create_fn = hsa_queue_create;
val.hsa_queue_destroy_fn = hsa_queue_destroy;
val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed;
return val;
}();
return _v;
}
auto
findDeviceMetrics(const rocprofiler_agent_t& agent, const std::unordered_set<std::string>& metrics)
{
std::vector<counters::Metric> ret;
auto all_counters = counters::getBaseHardwareMetrics();
ROCP_ERROR << "Looking up counters for " << std::string(agent.name);
auto gfx_metrics = common::get_val(all_counters, std::string(agent.name));
if(!gfx_metrics)
{
ROCP_ERROR << "No counters found for " << std::string(agent.name);
return ret;
}
for(auto& counter : *gfx_metrics)
{
if((metrics.count(counter.name()) > 0 || metrics.empty()) && !counter.block().empty())
{
ret.push_back(counter);
}
}
return ret;
}
hsa_ven_amd_aqlprofile_id_query_t
v1_get_query_info(hsa_agent_t agent, const counters::Metric& metric)
{
hsa_ven_amd_aqlprofile_profile_t profile{.agent = agent};
hsa_ven_amd_aqlprofile_id_query_t query = {metric.block().c_str(), 0, 0};
if(hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query) !=
HSA_STATUS_SUCCESS)
{
DLOG(FATAL) << fmt::format("AQL failed to query info for counter {}", metric);
throw std::runtime_error(fmt::format("AQL failed to query info for counter {}", metric));
}
return query;
}
uint32_t
v1_get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event)
{
hsa_ven_amd_aqlprofile_profile_t query = {.agent = agent,
.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC,
.events = &event,
.event_count = 1};
uint32_t max_block_counters = 0;
if(hsa_ven_amd_aqlprofile_get_info(&query,
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS,
&max_block_counters) != HSA_STATUS_SUCCESS)
{
throw std::runtime_error(fmt::format("AQL failed to max block info for counter {}",
static_cast<int64_t>(event.block_name)));
}
return max_block_counters;
}
rocprofiler_status_t
v1_get_dim_info(hsa_agent_t agent,
hsa_ven_amd_aqlprofile_event_t event,
uint32_t sample_id,
std::map<int, uint64_t>& dims)
{
auto callback = [](int, int id, int extent, int, const char*, void* userdata) -> hsa_status_t {
auto& map = *static_cast<std::map<int, uint64_t>*>(userdata);
map.emplace(id, extent);
return HSA_STATUS_SUCCESS;
};
if(hsa_ven_amd_aqlprofile_iterate_event_coord(
agent, event, sample_id, callback, static_cast<void*>(&dims)) != HSA_STATUS_SUCCESS)
{
return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD;
}
return ROCPROFILER_STATUS_SUCCESS;
}
void
test_init()
{
HsaApiTable table;
table.amd_ext_ = &get_ext_table();
table.core_ = &get_api_table();
agent::construct_agent_cache(&table);
ASSERT_TRUE(hsa::get_queue_controller() != nullptr);
hsa::get_queue_controller()->init(get_api_table(), get_ext_table());
}
} // namespace
TEST(aql_helpers, get_query_info)
{
auto agents = agent::get_agents();
ASSERT_FALSE(agents.empty());
for(auto agent : agents)
{
// auto aql_agent = *CHECK_NOTNULL(agent::get_aql_agent(agent->id));
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto metrics = findDeviceMetrics(*agent, {});
ASSERT_FALSE(metrics.empty());
for(auto& metric : metrics)
{
auto query = aql::get_query_info(agent->id, metric);
ROCP_INFO << fmt::format("{},{},{}", query.id, query.name, query.instance_count);
EXPECT_TRUE(query.name != nullptr);
EXPECT_TRUE(query.instance_count != 0);
EXPECT_TRUE(query.id < std::numeric_limits<uint32_t>().max());
}
}
}
TEST(aql_helpers, get_query_info_compare_v1)
{
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
auto agents = agent::get_agents();
ASSERT_FALSE(agents.empty());
for(auto agent : agents)
{
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto metrics = findDeviceMetrics(*agent, {});
ASSERT_FALSE(metrics.empty());
for(auto& metric : metrics)
{
auto query = aql::get_query_info(agent->id, metric);
auto query_v1 =
v1_get_query_info(agent::get_agent_cache(agent)->get_hsa_agent(), metric);
// v1 query with hsa_agent
EXPECT_EQ(query.id, query_v1.id);
EXPECT_EQ(std::string(query.name), std::string(query_v1.name));
EXPECT_EQ(query.instance_count, query_v1.instance_count);
}
}
hsa_shut_down();
}
TEST(aql_helpers, get_block_counters)
{
auto agents = agent::get_agents();
ASSERT_FALSE(agents.empty());
for(auto agent : agents)
{
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto metrics = findDeviceMetrics(*agent, {});
ASSERT_FALSE(metrics.empty());
for(auto& metric : metrics)
{
auto query = aql::get_query_info(agent->id, metric);
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
{
aqlprofile_pmc_event_t event = {
.block_index = block_index,
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
.flags = aqlprofile_pmc_event_flags_t{0},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
auto max_block_counters = aql::get_block_counters(agent->id, event);
EXPECT_GT(max_block_counters, 0);
}
}
}
}
TEST(aql_helpers, get_block_counters_compare_v1)
{
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
auto agents = agent::get_agents();
ASSERT_FALSE(agents.empty());
for(auto agent : agents)
{
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto metrics = findDeviceMetrics(*agent, {});
ASSERT_FALSE(metrics.empty());
for(auto& metric : metrics)
{
auto query = aql::get_query_info(agent->id, metric);
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
{
aqlprofile_pmc_event_t event = {
.block_index = block_index,
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
.flags = aqlprofile_pmc_event_flags_t{0},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
hsa_ven_amd_aqlprofile_event_t event_v1 = {
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id),
.block_index = block_index,
.counter_id = static_cast<uint32_t>(std::atoi(metric.event().c_str()))};
EXPECT_EQ(aql::get_block_counters(agent->id, event),
v1_get_block_counters(agent::get_agent_cache(agent)->get_hsa_agent(),
event_v1));
}
}
}
hsa_shut_down();
}
TEST(aql_helpers, get_dim_info)
{
auto agents = agent::get_agents();
ASSERT_FALSE(agents.empty());
for(auto agent : agents)
{
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto metrics = findDeviceMetrics(*agent, {});
ASSERT_FALSE(metrics.empty());
for(auto& metric : metrics)
{
auto query = aql::get_query_info(agent->id, metric);
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
{
aqlprofile_pmc_event_t event = {
.block_index = block_index,
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
.flags = aqlprofile_pmc_event_flags_t{0},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
std::map<int, uint64_t> dims;
EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, aql::get_dim_info(agent->id, event, 0, dims));
EXPECT_GT(dims.size(), 0);
}
}
}
}
TEST(aql_helpers, get_dim_info_compare_v1)
{
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
auto agents = agent::get_agents();
ASSERT_FALSE(agents.empty());
for(auto agent : agents)
{
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto metrics = findDeviceMetrics(*agent, {});
ASSERT_FALSE(metrics.empty());
for(auto& metric : metrics)
{
std::map<int, uint64_t> dims;
std::map<int, uint64_t> dims_v1;
auto query = aql::get_query_info(agent->id, metric);
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
{
aqlprofile_pmc_event_t event = {
.block_index = block_index,
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
.flags = aqlprofile_pmc_event_flags_t{0},
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
hsa_ven_amd_aqlprofile_event_t event_v1 = {
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id),
.block_index = block_index,
.counter_id = static_cast<uint32_t>(std::atoi(metric.event().c_str()))};
EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, aql::get_dim_info(agent->id, event, 0, dims));
EXPECT_EQ(
ROCPROFILER_STATUS_SUCCESS,
v1_get_dim_info(
agent::get_agent_cache(agent)->get_hsa_agent(), event_v1, 0, dims_v1));
EXPECT_EQ(dims.size(), dims_v1.size());
EXPECT_EQ(dims, dims_v1);
}
}
}
hsa_shut_down();
}
@@ -79,11 +79,6 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id,
/**
* @brief This call returns the number of instances specific counter contains.
* WARNING: There is a restriction on this call in the alpha/beta release
* of rocprof. This call will not return correct instance information in
* tool_init and must be called as part of the dispatch callback for accurate
* instance counting information. The reason for this restriction is that HSA
* is not yet loaded on tool_init.
*
* @param [in] agent rocprofiler agent
* @param [in] counter_id counter id (obtained from iterate_agent_supported_counters)
@@ -97,11 +92,6 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t,
{
*instance_count = 0;
if(rocprofiler::counters::get_dimension_cache().empty())
{
return ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED;
}
const auto* dims = rocprofiler::common::get_val(rocprofiler::counters::get_dimension_cache(),
counter_id.handle);
if(!dims) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
@@ -174,11 +164,6 @@ rocprofiler_iterate_counter_dimensions(rocprofiler_counter_id_t id,
rocprofiler_available_dimensions_cb_t info_cb,
void* user_data)
{
if(rocprofiler::counters::get_dimension_cache().empty())
{
return ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED;
}
const auto* dims =
rocprofiler::common::get_val(rocprofiler::counters::get_dimension_cache(), id.handle);
if(!dims) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
@@ -219,7 +219,7 @@ counter_callback_info::setup_profile_config(const hsa::AgentCache& age
}
profile->pkt_generator = std::make_unique<rocprofiler::aql::CounterPacketConstruct>(
agent,
agent.get_rocp_agent()->id,
std::vector<counters::Metric>{profile->reqired_hw_counters.begin(),
profile->reqired_hw_counters.end()});
return ROCPROFILER_STATUS_SUCCESS;
@@ -56,18 +56,17 @@ getBlockDimensions(std::string_view agent, const Metric& metric)
std::vector<MetricDimension> ret;
for(const auto& [_, maybe_agent] :
CHECK_NOTNULL(hsa::get_queue_controller())->get_supported_agents())
for(const auto* maybe_agent : rocprofiler::agent::get_agents())
{
if(maybe_agent.name() == agent)
if(std::string(maybe_agent->name) == agent)
{
aql::CounterPacketConstruct pkt_gen(maybe_agent, {metric});
aql::CounterPacketConstruct pkt_gen(maybe_agent->id, {metric});
const auto& events = pkt_gen.get_counter_events(metric);
for(const auto& event : events)
{
std::map<int, uint64_t> dims;
auto status = aql::get_dim_info(maybe_agent.get_hsa_agent(), event, 0, dims);
auto status = aql::get_dim_info(maybe_agent->id, event, 0, dims);
CHECK_EQ(status, ROCPROFILER_STATUS_SUCCESS)
<< rocprofiler_get_status_string(status);
@@ -103,16 +102,6 @@ get_dimension_cache()
common::static_object<std::unordered_map<uint64_t, std::vector<MetricDimension>>>::
construct([]() -> std::unordered_map<uint64_t, std::vector<MetricDimension>> {
std::unordered_map<uint64_t, std::vector<MetricDimension>> dims;
/**
* Fails if HSA is not loaded by retruning nothing. This should not remain after
* AQL is transistioned away from HSA.
*/
if(CHECK_NOTNULL(rocprofiler::hsa::get_queue_controller())
->get_supported_agents()
.empty())
{
return {};
}
const auto& asts = counters::get_ast_map();
for(const auto& [gfx, metrics] : asts)
@@ -456,11 +456,15 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket
{
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>* data;
const aql::CounterPacketConstruct* pkt_gen;
hsa_agent_t agent;
};
auto agent = CHECK_NOTNULL(rocprofiler::agent::get_agent_cache(
CHECK_NOTNULL(rocprofiler::agent::get_agent(pkt_gen->agent()))))
->get_hsa_agent();
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>> ret;
if(pkt.isEmpty()) return ret;
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen};
if(pkt.empty) return ret;
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = agent};
;
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(
&pkt.profile,
@@ -477,10 +481,8 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket
auto& next_rec = vec.emplace_back();
set_counter_in_rec(next_rec.id, {.handle = metric->id()});
// Actual dimension info needs to be used here in the future
auto aql_status = aql::set_dim_id_from_sample(next_rec.id,
it.pkt_gen->hsa_agent(),
info_data->pmc_data.event,
info_data->sample_id);
auto aql_status = aql::set_dim_id_from_sample(
next_rec.id, it.agent, info_data->pmc_data.event, info_data->sample_id);
CHECK_EQ(aql_status, ROCPROFILER_STATUS_SUCCESS)
<< rocprofiler_get_status_string(aql_status);
@@ -251,12 +251,12 @@ TEST(dimension, block_dim_test)
}
else
{
aql::CounterPacketConstruct pkt_gen(agent, {metric});
aql::CounterPacketConstruct pkt_gen(agent.get_rocp_agent()->id, {metric});
const auto& events = pkt_gen.get_counter_events(metric);
for(const auto& event : events)
{
std::map<int, uint64_t> dims;
auto status = aql::get_dim_info(agent.get_hsa_agent(), event, 0, dims);
auto status = aql::get_dim_info(agent.get_rocp_agent()->id, event, 0, dims);
CHECK_EQ(status, ROCPROFILER_STATUS_SUCCESS)
<< rocprofiler_get_status_string(status);
for(const auto& [id, extent] : dims)
@@ -62,16 +62,20 @@ public:
AQLPacket(const AQLPacket&) = delete;
AQLPacket& operator=(const AQLPacket&) = delete;
aqlprofile_handle_t pkt_handle = {.handle = 0};
aqlprofile_pmc_aql_packets_t pkts = {.start_packet = null_amd_aql_pm4_packet,
.stop_packet = null_amd_aql_pm4_packet,
.read_packet = null_amd_aql_pm4_packet};
bool empty = {true};
hsa_ven_amd_aqlprofile_profile_t profile = {};
hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet;
hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet;
hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet;
common::container::small_vector<hsa_ext_amd_aql_pm4_packet_t, 3> before_krn_pkt = {};
common::container::small_vector<hsa_ext_amd_aql_pm4_packet_t, 2> after_krn_pkt = {};
bool isEmpty() const { return empty; }
bool empty = true;
};
class CounterAQLPacket : public AQLPacket
@@ -191,7 +191,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
// Generate supported agents
for(const auto* itr : agents)
{
auto cached_agent = agent::get_agent_cache(itr);
const auto* cached_agent = agent::get_agent_cache(itr);
if(cached_agent && cached_agent->get_rocp_agent()->type == ROCPROFILER_AGENT_TYPE_GPU)
{
get_supported_agents().emplace(cached_agent->index(), *cached_agent);