Removal of HSA from counter collection (#697)
* Minor fix
Removal of HSA from counter collection
Tests for AQL
Updated counter collection client to build profiles in tool init
* Rebased
* Debug printing
* Formatting
* More format
* fix shadowing
---------
Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
[ROCm/rocprofiler-sdk commit: c2f659ab5c]
This commit is contained in:
@@ -121,6 +121,12 @@ buffered_callback(rocprofiler_context_id_t,
|
||||
*output_stream << "[" << __FUNCTION__ << "] " << ss.str() << "\n";
|
||||
}
|
||||
|
||||
std::unordered_map<uint64_t, rocprofiler_profile_config_id_t>&
|
||||
get_profile_cache()
|
||||
{
|
||||
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache;
|
||||
return profile_cache;
|
||||
}
|
||||
/**
|
||||
* Callback from rocprofiler when an kernel dispatch is enqueued into the HSA queue.
|
||||
* rocprofiler_profile_config_id_t* is a return to specify what counters to collect
|
||||
@@ -140,12 +146,9 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
|
||||
* set for the agent. If we have, return it. Otherwise, construct a new profile counter
|
||||
* set.
|
||||
*/
|
||||
static std::shared_mutex m_mutex = {};
|
||||
static std::unordered_map<uint64_t, rocprofiler_profile_config_id_t> profile_cache = {};
|
||||
|
||||
auto search_cache = [&]() {
|
||||
if(auto pos = profile_cache.find(dispatch_data.dispatch_info.agent_id.handle);
|
||||
pos != profile_cache.end())
|
||||
if(auto pos = get_profile_cache().find(dispatch_data.dispatch_info.agent_id.handle);
|
||||
pos != get_profile_cache().end())
|
||||
{
|
||||
*config = pos->second;
|
||||
return true;
|
||||
@@ -153,22 +156,21 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
|
||||
return false;
|
||||
};
|
||||
|
||||
if(!search_cache())
|
||||
{
|
||||
auto rlock = std::shared_lock{m_mutex};
|
||||
if(search_cache()) return;
|
||||
std::cerr << "No profile for agent found in cache\n";
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
auto wlock = std::unique_lock{m_mutex};
|
||||
if(search_cache()) return;
|
||||
|
||||
// Counters we want to collect (here its SQ_WAVES)
|
||||
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
|
||||
// GPU Counter IDs
|
||||
rocprofiler_profile_config_id_t
|
||||
build_profile_for_agent(rocprofiler_agent_id_t agent)
|
||||
{
|
||||
std::set<std::string> counters_to_collect = {"SQ_WAVES"};
|
||||
std::vector<rocprofiler_counter_id_t> gpu_counters;
|
||||
|
||||
// Iterate through the agents and get the counters available on that agent
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
dispatch_data.dispatch_info.agent_id,
|
||||
agent,
|
||||
[](rocprofiler_agent_id_t,
|
||||
rocprofiler_counter_id_t* counters,
|
||||
size_t num_counters,
|
||||
@@ -185,7 +187,6 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
|
||||
"Could not fetch supported counters");
|
||||
|
||||
std::vector<rocprofiler_counter_id_t> collect_counters;
|
||||
// Look for the counters contained in counters_to_collect in gpu_counters
|
||||
for(auto& counter : gpu_counters)
|
||||
{
|
||||
rocprofiler_counter_info_v0_t version;
|
||||
@@ -200,17 +201,12 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data,
|
||||
}
|
||||
}
|
||||
|
||||
// Create a colleciton profile for the counters
|
||||
rocprofiler_profile_config_id_t profile;
|
||||
ROCPROFILER_CALL(rocprofiler_create_profile_config(dispatch_data.dispatch_info.agent_id,
|
||||
collect_counters.data(),
|
||||
collect_counters.size(),
|
||||
&profile),
|
||||
ROCPROFILER_CALL(rocprofiler_create_profile_config(
|
||||
agent, collect_counters.data(), collect_counters.size(), &profile),
|
||||
"Could not construct profile cfg");
|
||||
|
||||
profile_cache.emplace(dispatch_data.dispatch_info.agent_id.handle, profile);
|
||||
// Return the profile to collect those counters for this dispatch
|
||||
*config = profile;
|
||||
return profile;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -227,6 +223,41 @@ tool_init(rocprofiler_client_finalize_t, void* user_data)
|
||||
&get_buffer()),
|
||||
"buffer creation failed");
|
||||
|
||||
std::vector<rocprofiler_agent_v0_t> agents;
|
||||
rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver,
|
||||
const void** agents_arr,
|
||||
size_t num_agents,
|
||||
void* udata) {
|
||||
if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0)
|
||||
throw std::runtime_error{"unexpected rocprofiler agent version"};
|
||||
auto* agents_v = static_cast<std::vector<rocprofiler_agent_v0_t>*>(udata);
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
agents_v->emplace_back(*static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]));
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
|
||||
iterate_cb,
|
||||
sizeof(rocprofiler_agent_t),
|
||||
const_cast<void*>(static_cast<const void*>(&agents))),
|
||||
"query available agents");
|
||||
|
||||
// Construct the profiles in advance for each agent that is a GPU
|
||||
for(const auto& agent : agents)
|
||||
{
|
||||
if(agent.type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
get_profile_cache().emplace(agent.id.handle, build_profile_for_agent(agent.id));
|
||||
}
|
||||
}
|
||||
|
||||
if(agents.empty())
|
||||
{
|
||||
std::cerr << "No agents found" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto client_thread = rocprofiler_callback_thread_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
|
||||
"failure creating callback thread");
|
||||
|
||||
@@ -697,6 +697,47 @@ get_agent(rocprofiler_agent_id_t id)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const std::vector<aqlprofile_agent_handle_t>&
|
||||
get_aql_handles()
|
||||
{
|
||||
static std::vector<aqlprofile_agent_handle_t> _v = []() {
|
||||
std::vector<aqlprofile_agent_handle_t> agent_handles;
|
||||
for(auto& agent : get_agents())
|
||||
{
|
||||
aqlprofile_agent_info_t agent_info = {
|
||||
.agent_gfxip = agent->name,
|
||||
.xcc_num = agent->num_xcc,
|
||||
.se_num = agent->num_shader_banks,
|
||||
.cu_num = agent->cu_count,
|
||||
.shader_arrays_per_se = agent->simd_arrays_per_engine};
|
||||
aqlprofile_agent_handle_t handle = {.handle = 0};
|
||||
if(aqlprofile_register_agent(&handle, &agent_info) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_WARNING << "Failed to register agent " << agent->name;
|
||||
}
|
||||
agent_handles.push_back(handle);
|
||||
}
|
||||
return agent_handles;
|
||||
}();
|
||||
|
||||
return _v;
|
||||
}
|
||||
|
||||
const aqlprofile_agent_handle_t*
|
||||
get_aql_agent(rocprofiler_agent_id_t id)
|
||||
{
|
||||
size_t pos = 0;
|
||||
for(const auto& itr : get_agents())
|
||||
{
|
||||
if(itr && itr->id.handle == id.handle)
|
||||
{
|
||||
return &get_aql_handles().at(pos);
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
construct_agent_cache(::HsaApiTable* table)
|
||||
{
|
||||
@@ -916,15 +957,15 @@ get_rocprofiler_agent(hsa_agent_t agent)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
const hsa::AgentCache*
|
||||
get_agent_cache(const rocprofiler_agent_t* agent)
|
||||
{
|
||||
for(const auto& itr : get_agent_caches())
|
||||
{
|
||||
if(itr == agent) return itr;
|
||||
if(itr == agent) return &itr;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
|
||||
#include "lib/rocprofiler-sdk/aql/aql_profile_v2.h"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
@@ -51,7 +52,7 @@ get_hsa_agent(const rocprofiler_agent_t* agent);
|
||||
const rocprofiler_agent_t*
|
||||
get_rocprofiler_agent(hsa_agent_t agent);
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
const hsa::AgentCache*
|
||||
get_agent_cache(const rocprofiler_agent_t* agent);
|
||||
|
||||
std::optional<hsa::AgentCache>
|
||||
@@ -66,6 +67,9 @@ get_agent_cache(hsa_agent_t agent);
|
||||
std::unordered_set<std::string>&
|
||||
get_agent_available_properties();
|
||||
|
||||
const aqlprofile_agent_handle_t*
|
||||
get_aql_agent(rocprofiler_agent_id_t id);
|
||||
|
||||
void
|
||||
construct_agent_cache(::HsaApiTable* table);
|
||||
} // namespace agent
|
||||
|
||||
@@ -1,34 +1,11 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#define PUBLIC_API
|
||||
|
||||
extern "C" {
|
||||
typedef struct
|
||||
{
|
||||
uint64_t handle;
|
||||
@@ -149,7 +126,7 @@ typedef struct
|
||||
* @retval HSA_STATUS_SUCCESS registration ok
|
||||
* @retval HSA_STATUS_ERROR registration failed
|
||||
*/
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
|
||||
const aqlprofile_agent_info_t* agent_info);
|
||||
|
||||
@@ -179,7 +156,7 @@ typedef enum
|
||||
// counters disable command buffer
|
||||
} aqlprofile_pmc_info_type_t;
|
||||
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
|
||||
aqlprofile_pmc_info_type_t attribute,
|
||||
void* value);
|
||||
@@ -244,7 +221,7 @@ typedef hsa_status_t (*aqlprofile_memory_copy_t)(void* dst,
|
||||
* @retval HSA_STATUS_SUCCESS if the event was validated.
|
||||
* @retval HSA_STATUS_ERROR if the event was not validated.
|
||||
*/
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
|
||||
const aqlprofile_pmc_event_t* event,
|
||||
bool* result);
|
||||
@@ -258,7 +235,7 @@ aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
|
||||
* @retval HSA_STATUS_ERROR if some callback returns an error
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
|
||||
*/
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_pmc_data_callback_t callback,
|
||||
void* userdata);
|
||||
@@ -282,7 +259,7 @@ typedef struct
|
||||
* @param[in] dealloc_cb Function to free memory allocated by alloc_cb
|
||||
* @param[in] userdata Data passed back to user via memory alloc callback
|
||||
*/
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_pmc_aql_packets_t* packets,
|
||||
aqlprofile_pmc_profile_t profile,
|
||||
@@ -295,7 +272,7 @@ aqlprofile_pmc_create_packets(aqlprofile_handle_t* handle,
|
||||
* @brief Function to delete AQL packets after creation by aqlprofile_pmc_create_packets
|
||||
* @param[in] handle Returned by aqlprofile_pmc_create_packets()
|
||||
*/
|
||||
void
|
||||
PUBLIC_API void
|
||||
aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle);
|
||||
|
||||
/**
|
||||
@@ -307,7 +284,7 @@ aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle);
|
||||
* @retval HSA_STATUS_ERROR if some callback returns an error
|
||||
* @retval HSA_STATUS_ERROR_INVALID_ARGUMENT if invalid handle is given
|
||||
*/
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_att_iterate_data(aqlprofile_handle_t handle,
|
||||
aqlprofile_att_data_callback_t callback,
|
||||
void* userdata);
|
||||
@@ -330,7 +307,7 @@ typedef struct
|
||||
* @retval HSA_STATUS_SUCCESS if all packets created succesfully
|
||||
* @retval HSA_STATUS_ERROR otherwise
|
||||
*/
|
||||
hsa_status_t
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_att_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_att_control_aql_packets_t* packets,
|
||||
aqlprofile_att_profile_t profile,
|
||||
@@ -339,7 +316,7 @@ aqlprofile_att_create_packets(aqlprofile_handle_t* handle,
|
||||
aqlprofile_memory_copy_t memcpy_cb,
|
||||
void* userdata);
|
||||
|
||||
void
|
||||
PUBLIC_API void
|
||||
aqlprofile_att_delete_packets(aqlprofile_handle_t handle);
|
||||
|
||||
/**
|
||||
@@ -476,21 +453,20 @@ enum WaveTrapStatus
|
||||
TRAP_STANDBY = 2
|
||||
};
|
||||
|
||||
typedef struct
|
||||
struct __attribute__((packed)) pcinfo_t
|
||||
{
|
||||
size_t addr;
|
||||
size_t marker_id;
|
||||
} pcinfo_t;
|
||||
int marker_id;
|
||||
};
|
||||
|
||||
typedef struct __attribute__((packed))
|
||||
{
|
||||
uint64_t category : 8;
|
||||
uint64_t hitcount : 56;
|
||||
uint64_t latency;
|
||||
pcinfo_t pc;
|
||||
int hitcount;
|
||||
size_t latency;
|
||||
} att_trace_event_t;
|
||||
|
||||
typedef struct
|
||||
struct wave_data_t
|
||||
{
|
||||
uint8_t simd;
|
||||
uint8_t wave_id;
|
||||
@@ -498,42 +474,65 @@ typedef struct
|
||||
uint8_t reserved;
|
||||
|
||||
// VMEM Pipeline: instrs and stalls
|
||||
int num_vmem_instrs;
|
||||
int num_vmem_stalls;
|
||||
int num_vmem_instrs = 0;
|
||||
int num_vmem_stalls = 0;
|
||||
// FLAT instrs and stalls
|
||||
int num_flat_instrs;
|
||||
int num_flat_stalls;
|
||||
int num_flat_instrs = 0;
|
||||
int num_flat_stalls = 0;
|
||||
|
||||
// LDS instr and stalls
|
||||
int num_lds_instrs;
|
||||
int num_lds_stalls;
|
||||
int num_lds_instrs = 0;
|
||||
int num_lds_stalls = 0;
|
||||
|
||||
// SCA instrs stalls
|
||||
int num_salu_instrs;
|
||||
int num_smem_instrs;
|
||||
int num_salu_stalls;
|
||||
int num_smem_stalls;
|
||||
int num_salu_instrs = 0;
|
||||
int num_smem_instrs = 0;
|
||||
int num_salu_stalls = 0;
|
||||
int num_smem_stalls = 0;
|
||||
|
||||
// Branch
|
||||
int num_branch_instrs;
|
||||
int num_branch_taken_instrs;
|
||||
int num_branch_stalls;
|
||||
int num_branch_instrs = 0;
|
||||
int num_branch_taken_instrs = 0;
|
||||
int num_branch_stalls = 0;
|
||||
|
||||
// total VMEM/FLAT/LDS/SMEM instructions issued
|
||||
int num_mem_instrs; // total issued memory instructions
|
||||
int num_valu_stalls;
|
||||
size_t num_valu_instrs;
|
||||
size_t num_issued_instrs; // total issued instructions (compute + memory)
|
||||
int num_mem_instrs = 0; // total issued memory instructions
|
||||
int num_valu_stalls = 0;
|
||||
size_t num_valu_instrs = 0;
|
||||
size_t num_issued_instrs = 0; // total issued instructions (compute + memory)
|
||||
|
||||
int64_t begin_time; // Begin and end cycle
|
||||
int64_t end_time;
|
||||
int64_t traceID;
|
||||
int64_t begin_time = 0; // Begin and end cycle
|
||||
int64_t end_time = 0;
|
||||
int64_t traceID = -1;
|
||||
|
||||
size_t timeline_size;
|
||||
size_t instructions_size;
|
||||
size_t timeline_size = 0;
|
||||
size_t instructions_size = 0;
|
||||
wave_state_t* timeline_array;
|
||||
wave_instruction_t* instructions_array;
|
||||
} wave_data_t;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Callback for iteration of all possible event coordinate IDs and coordinate names.
|
||||
* @param [in] id Integer identifying type ID.
|
||||
* @param [in] name Name of the trace type.
|
||||
* @param [in] userdata User data supplied to back caller
|
||||
* @retval HSA_STATUS_SUCCESS Continues iteration
|
||||
* @retval OTHERS Any other HSA return values stops iteration, passing back this value through
|
||||
* @ref aqlprofile_iterate_trace_type_ids
|
||||
*/
|
||||
typedef hsa_status_t (*aqlprofile_att_tracename_callback_t)(int id, const char* name, void* data);
|
||||
|
||||
/**
|
||||
* @brief Iterate over all possible event coordinate IDs and their names.
|
||||
* @param [in] callback Callback to use for iteration of trace types
|
||||
* @param [in] userdata Data to supply to callback @ref aqlprofile_tracename_callback_t
|
||||
* @retval HSA_STATUS_SUCCESS if successful
|
||||
* @retval HSA_STATUS_ERROR if error on interation
|
||||
* @retval OTHERS If @ref aqlprofile_eventname_callback_t returns non-HSA_STATUS_SUCCESS,
|
||||
* that value is returned.
|
||||
*/
|
||||
PUBLIC_API hsa_status_t
|
||||
aqlprofile_att_iterate_trace_type_ids(aqlprofile_att_tracename_callback_t callback, void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Callback for rocprofiler to return ISA to aqlprofile ATT parser.
|
||||
@@ -561,7 +560,7 @@ typedef hsa_status_t (*aqlprofile_att_isa_callback_t)(char* isa_instruction,
|
||||
uint64_t* isa_memory_size,
|
||||
uint64_t* isa_size,
|
||||
uint64_t* source_size,
|
||||
uint64_t marker_id,
|
||||
uint32_t marker_id,
|
||||
uint64_t offset,
|
||||
void* userdata);
|
||||
|
||||
@@ -604,11 +603,11 @@ typedef uint64_t (*aqlprofile_att_se_data_callback_t)(int* shader_engine_id
|
||||
/**
|
||||
* @brief Callback returning from aqlprofile_att_parser_iterate_event_list
|
||||
* @param[in] trace_event_id ID of the event.
|
||||
* @param[in] trace_event_metadata Null-terminated string, entries separated by ';'
|
||||
* @param[in] trace_event_name Event name.
|
||||
* @param[in] userdata userdata.
|
||||
*/
|
||||
typedef void (*aqlprofile_att_parser_iterate_event_cb_t)(int trace_event_id,
|
||||
const char* trace_event_metadata,
|
||||
const char* trace_event_name,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
@@ -616,7 +615,7 @@ typedef void (*aqlprofile_att_parser_iterate_event_cb_t)(int trace_event
|
||||
* @param[in] callback Callback where events are returned to.
|
||||
* @param[in] userdata userdata.
|
||||
*/
|
||||
void
|
||||
hsa_status_t
|
||||
aqlprofile_att_parser_iterate_event_list(aqlprofile_att_parser_iterate_event_cb_t callback,
|
||||
void* userdata);
|
||||
|
||||
@@ -635,17 +634,17 @@ aqlprofile_att_parse_data(aqlprofile_att_se_data_callback_t se_data_callback,
|
||||
void* userdata);
|
||||
|
||||
/**
|
||||
* @brief Contains flags for how code objects are interpreted
|
||||
* @brief Contains information of code objects. IDs can be reused for different load addresses.
|
||||
*/
|
||||
typedef union
|
||||
{
|
||||
uint32_t raw;
|
||||
struct
|
||||
{
|
||||
uint32_t isUnload : 1; // 0 if code object is being loaded, 1 for unload
|
||||
uint32_t bFromStart : 1; // Has this code object been loaded before thread trace started?
|
||||
uint32_t legacy_id : 30; // Legacy code object ID, if it fits in 30 bits.
|
||||
uint32_t id : 30; // To be passed back to isa_string_callback in marker_id
|
||||
};
|
||||
uint32_t raw;
|
||||
} aqlprofile_att_header_marker_t;
|
||||
|
||||
/**
|
||||
@@ -653,7 +652,6 @@ typedef union
|
||||
* @param[out] packets Returned packet
|
||||
* @param[in] handle The handle created from aqlprofile_att_create_packets()
|
||||
* @param[in] header Header containing code object information created from profiler
|
||||
* @param[in] id To be passed back to isa_string_callback in marker_id
|
||||
* @param[in] addr Code object loaded address.
|
||||
* @param[in] size Code object loaded size.
|
||||
*/
|
||||
@@ -661,10 +659,6 @@ hsa_status_t
|
||||
aqlprofile_att_codeobj_load_marker(hsa_ext_amd_aql_pm4_packet_t* packets,
|
||||
aqlprofile_handle_t handle,
|
||||
aqlprofile_att_header_marker_t header,
|
||||
uint64_t id,
|
||||
uint64_t addr,
|
||||
uint64_t size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -36,12 +36,12 @@ namespace rocprofiler
|
||||
namespace aql
|
||||
{
|
||||
hsa_ven_amd_aqlprofile_id_query_t
|
||||
get_query_info(hsa_agent_t agent, const counters::Metric& metric)
|
||||
get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric)
|
||||
{
|
||||
hsa_ven_amd_aqlprofile_profile_t profile{.agent = agent};
|
||||
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
|
||||
aqlprofile_pmc_profile_t profile{.agent = aql_agent};
|
||||
hsa_ven_amd_aqlprofile_id_query_t query = {metric.block().c_str(), 0, 0};
|
||||
if(hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
if(aqlprofile_get_pmc_info(&profile, AQLPROFILE_INFO_BLOCK_ID, &query) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_DFATAL << fmt::format("AQL failed to query info for counter {}", metric);
|
||||
throw std::runtime_error(fmt::format("AQL failed to query info for counter {}", metric));
|
||||
@@ -50,16 +50,13 @@ get_query_info(hsa_agent_t agent, const counters::Metric& metric)
|
||||
}
|
||||
|
||||
uint32_t
|
||||
get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event)
|
||||
get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& event)
|
||||
{
|
||||
hsa_ven_amd_aqlprofile_profile_t query = {.agent = agent,
|
||||
.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC,
|
||||
.events = &event,
|
||||
.event_count = 1};
|
||||
uint32_t max_block_counters = 0;
|
||||
if(hsa_ven_amd_aqlprofile_get_info(&query,
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS,
|
||||
&max_block_counters) != HSA_STATUS_SUCCESS)
|
||||
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
|
||||
aqlprofile_pmc_profile_t query = {.agent = aql_agent, .events = &event, .event_count = 1};
|
||||
uint32_t max_block_counters = 0;
|
||||
if(aqlprofile_get_pmc_info(&query, AQLPROFILE_INFO_BLOCK_COUNTERS, &max_block_counters) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
throw std::runtime_error(fmt::format("AQL failed to max block info for counter {}",
|
||||
static_cast<int64_t>(event.block_name)));
|
||||
@@ -94,10 +91,10 @@ set_dim_id_from_sample(rocprofiler_counter_instance_id_t& id,
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
get_dim_info(hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
std::map<int, uint64_t>& dims)
|
||||
get_dim_info(rocprofiler_agent_id_t agent,
|
||||
aqlprofile_pmc_event_t event,
|
||||
uint32_t sample_id,
|
||||
std::map<int, uint64_t>& dims)
|
||||
{
|
||||
auto callback = [](int, int id, int extent, int, const char*, void* userdata) -> hsa_status_t {
|
||||
auto& map = *static_cast<std::map<int, uint64_t>*>(userdata);
|
||||
@@ -105,8 +102,10 @@ get_dim_info(hsa_agent_t agent,
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
if(hsa_ven_amd_aqlprofile_iterate_event_coord(
|
||||
agent, event, sample_id, callback, static_cast<void*>(&dims)) != HSA_STATUS_SUCCESS)
|
||||
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
|
||||
|
||||
if(aqlprofile_iterate_event_coord(
|
||||
aql_agent, event, sample_id, callback, static_cast<void*>(&dims)) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
@@ -38,18 +39,18 @@ namespace aql
|
||||
{
|
||||
// Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID from aqlprofile
|
||||
hsa_ven_amd_aqlprofile_id_query_t
|
||||
get_query_info(hsa_agent_t agent, const counters::Metric& metric);
|
||||
get_query_info(rocprofiler_agent_id_t agent, const counters::Metric& metric);
|
||||
|
||||
// Query HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS from aqlprofiler
|
||||
uint32_t
|
||||
get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event);
|
||||
get_block_counters(rocprofiler_agent_id_t agent, const aqlprofile_pmc_event_t& event);
|
||||
|
||||
// Query dimimension ids for counter event. Returns AQLProfiler ID -> extent
|
||||
rocprofiler_status_t
|
||||
get_dim_info(hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
std::map<int, uint64_t>& dims);
|
||||
get_dim_info(rocprofiler_agent_id_t agent,
|
||||
aqlprofile_pmc_event_t event,
|
||||
uint32_t sample_id,
|
||||
std::map<int, uint64_t>& dims);
|
||||
|
||||
// Set dimension ids into id for sample
|
||||
rocprofiler_status_t
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/aql/packet_construct.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/details/fmt.hpp"
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
@@ -40,7 +41,7 @@ namespace rocprofiler
|
||||
{
|
||||
namespace aql
|
||||
{
|
||||
CounterPacketConstruct::CounterPacketConstruct(const hsa::AgentCache& agent,
|
||||
CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t agent,
|
||||
const std::vector<counters::Metric>& metrics)
|
||||
: _agent(agent)
|
||||
{
|
||||
@@ -48,21 +49,39 @@ CounterPacketConstruct::CounterPacketConstruct(const hsa::AgentCache&
|
||||
// for the counter.
|
||||
for(const auto& x : metrics)
|
||||
{
|
||||
auto query_info = get_query_info(_agent.get_hsa_agent(), x);
|
||||
auto query_info = get_query_info(_agent, x);
|
||||
_metrics.emplace_back().metric = x;
|
||||
uint32_t event_id = std::atoi(x.event().c_str());
|
||||
|
||||
ROCP_TRACE << fmt::format(
|
||||
"Fetching events for counter {} (id={}, instance_count={}) on agent {} (name:{})",
|
||||
x.name(),
|
||||
event_id,
|
||||
query_info.instance_count,
|
||||
agent.handle,
|
||||
rocprofiler::agent::get_agent(agent)->name);
|
||||
|
||||
for(unsigned block_index = 0; block_index < query_info.instance_count; ++block_index)
|
||||
{
|
||||
_metrics.back().instances.push_back(
|
||||
{static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id),
|
||||
block_index,
|
||||
event_id});
|
||||
|
||||
_metrics.back().events.push_back(
|
||||
{.block_index = block_index,
|
||||
.event_id = event_id,
|
||||
.flags = aqlprofile_pmc_event_flags_t{0},
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query_info.id)});
|
||||
|
||||
bool validate_event_result;
|
||||
|
||||
auto aql_agent = *CHECK_NOTNULL(rocprofiler::agent::get_aql_agent(agent));
|
||||
|
||||
LOG_IF(FATAL,
|
||||
hsa_ven_amd_aqlprofile_validate_event(_agent.get_hsa_agent(),
|
||||
&_metrics.back().instances.back(),
|
||||
&validate_event_result) !=
|
||||
HSA_STATUS_SUCCESS);
|
||||
aqlprofile_validate_pmc_event(aql_agent,
|
||||
&_metrics.back().events.back(),
|
||||
&validate_event_result) != HSA_STATUS_SUCCESS);
|
||||
LOG_IF(FATAL, !validate_event_result)
|
||||
<< "Invalid Metric: " << block_index << " " << event_id;
|
||||
_event_to_metric[std::make_tuple(
|
||||
@@ -84,12 +103,20 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
|
||||
auto& pkt = *pkt_ptr;
|
||||
if(_events.empty())
|
||||
{
|
||||
ROCP_TRACE << "No events for pkt";
|
||||
return pkt_ptr;
|
||||
}
|
||||
pkt.empty = false;
|
||||
|
||||
const auto* agent_cache =
|
||||
rocprofiler::agent::get_agent_cache(CHECK_NOTNULL(rocprofiler::agent::get_agent(_agent)));
|
||||
if(!agent_cache)
|
||||
{
|
||||
ROCP_FATAL << "No agent cache for agent id: " << _agent.handle;
|
||||
}
|
||||
|
||||
pkt.profile = hsa_ven_amd_aqlprofile_profile_t{
|
||||
_agent.get_hsa_agent(),
|
||||
agent_cache->get_hsa_agent(),
|
||||
HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC, // SPM?
|
||||
_events.data(),
|
||||
static_cast<uint32_t>(_events.size()),
|
||||
@@ -100,8 +127,8 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
|
||||
auto& profile = pkt.profile;
|
||||
|
||||
hsa_amd_memory_pool_access_t _access = HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
||||
ext.hsa_amd_agent_memory_pool_get_info_fn(_agent.get_hsa_agent(),
|
||||
_agent.kernarg_pool(),
|
||||
ext.hsa_amd_agent_memory_pool_get_info_fn(agent_cache->get_hsa_agent(),
|
||||
agent_cache->kernarg_pool(),
|
||||
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS,
|
||||
static_cast<void*>(&_access));
|
||||
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
|
||||
@@ -110,7 +137,7 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
fmt::format("Agent {} does not allow memory pool access for counter collection",
|
||||
_agent.get_hsa_agent().handle));
|
||||
agent_cache->get_hsa_agent().handle));
|
||||
}
|
||||
|
||||
CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, nullptr), "could not generate packet sizes");
|
||||
@@ -136,7 +163,7 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
|
||||
else
|
||||
{
|
||||
CHECK(*mem_loc);
|
||||
hsa_agent_t agent = _agent.get_hsa_agent();
|
||||
hsa_agent_t agent = agent_cache->get_hsa_agent();
|
||||
// Memory is accessable by both the GPU and CPU, unlock the command buffer for
|
||||
// sharing.
|
||||
LOG_IF(FATAL,
|
||||
@@ -149,9 +176,9 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
|
||||
|
||||
// Build command and output buffers
|
||||
pkt.command_buf_mallocd = alloc_and_check(
|
||||
_agent.cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size);
|
||||
agent_cache->cpu_pool(), &profile.command_buffer.ptr, profile.command_buffer.size);
|
||||
pkt.output_buffer_malloced = alloc_and_check(
|
||||
_agent.kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size);
|
||||
agent_cache->kernarg_pool(), &profile.output_buffer.ptr, profile.output_buffer.size);
|
||||
memset(profile.output_buffer.ptr, 0x0, profile.output_buffer.size);
|
||||
|
||||
CHECK_HSA(hsa_ven_amd_aqlprofile_start(&profile, &pkt.start), "failed to create start packet");
|
||||
@@ -160,6 +187,13 @@ CounterPacketConstruct::construct_packet(const AmdExtTable& ext)
|
||||
pkt.start.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
|
||||
pkt.stop.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
|
||||
pkt.read.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
|
||||
ROCP_TRACE << fmt::format("Following Packets Generated (output_buffer={}, output_size={}). "
|
||||
"Start Pkt: {}, Read Pkt: {}, Stop Pkt: {}",
|
||||
profile.output_buffer.ptr,
|
||||
profile.output_buffer.size,
|
||||
pkt.start,
|
||||
pkt.read,
|
||||
pkt.stop);
|
||||
return pkt_ptr;
|
||||
}
|
||||
|
||||
@@ -243,14 +277,14 @@ CounterPacketConstruct::event_to_metric(const hsa_ven_amd_aqlprofile_event_t& ev
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const std::vector<hsa_ven_amd_aqlprofile_event_t>&
|
||||
const std::vector<aqlprofile_pmc_event_t>&
|
||||
CounterPacketConstruct::get_counter_events(const counters::Metric& metric) const
|
||||
{
|
||||
for(const auto& prof_metric : _metrics)
|
||||
{
|
||||
if(prof_metric.metric.id() == metric.id())
|
||||
{
|
||||
return prof_metric.instances;
|
||||
return prof_metric.events;
|
||||
}
|
||||
}
|
||||
throw std::runtime_error(fmt::format("Cannot Find Events for {}", metric));
|
||||
@@ -264,15 +298,14 @@ CounterPacketConstruct::can_collect()
|
||||
std::map<std::pair<hsa_ven_amd_aqlprofile_block_name_t, uint32_t>, int64_t> max_allowed;
|
||||
for(auto& metric : _metrics)
|
||||
{
|
||||
for(auto& instance : metric.instances)
|
||||
for(auto& instance : metric.events)
|
||||
{
|
||||
auto block_pair = std::make_pair(instance.block_name, instance.block_index);
|
||||
auto [iter, inserted] = counter_count.emplace(block_pair, 0);
|
||||
iter->second++;
|
||||
if(inserted)
|
||||
{
|
||||
max_allowed.emplace(block_pair,
|
||||
get_block_counters(_agent.get_hsa_agent(), instance));
|
||||
max_allowed.emplace(block_pair, get_block_counters(_agent, instance));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,19 +52,17 @@ namespace aql
|
||||
class CounterPacketConstruct
|
||||
{
|
||||
public:
|
||||
CounterPacketConstruct(const hsa::AgentCache& agent,
|
||||
CounterPacketConstruct(rocprofiler_agent_id_t agent,
|
||||
const std::vector<counters::Metric>& metrics);
|
||||
std::unique_ptr<hsa::CounterAQLPacket> construct_packet(const AmdExtTable&);
|
||||
|
||||
const counters::Metric* event_to_metric(const hsa_ven_amd_aqlprofile_event_t& event) const;
|
||||
std::vector<hsa_ven_amd_aqlprofile_event_t> get_all_events() const;
|
||||
hsa_agent_t hsa_agent() const { return _agent.get_hsa_agent(); }
|
||||
const std::vector<aqlprofile_pmc_event_t>& get_counter_events(const counters::Metric&) const;
|
||||
|
||||
const std::vector<hsa_ven_amd_aqlprofile_event_t>& get_counter_events(
|
||||
const counters::Metric&) const;
|
||||
rocprofiler_agent_id_t agent() const { return _agent; }
|
||||
|
||||
private:
|
||||
const hsa::AgentCache& _agent;
|
||||
static constexpr size_t MEM_PAGE_ALIGN = 0x1000;
|
||||
static constexpr size_t MEM_PAGE_MASK = MEM_PAGE_ALIGN - 1;
|
||||
static size_t getPageAligned(size_t p) { return (p + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; }
|
||||
@@ -74,10 +72,12 @@ protected:
|
||||
{
|
||||
counters::Metric metric;
|
||||
std::vector<hsa_ven_amd_aqlprofile_event_t> instances;
|
||||
std::vector<aqlprofile_pmc_event_t> events;
|
||||
};
|
||||
|
||||
void can_collect();
|
||||
|
||||
rocprofiler_agent_id_t _agent;
|
||||
std::vector<AQLProfileMetric> _metrics;
|
||||
std::vector<hsa_ven_amd_aqlprofile_event_t> _events;
|
||||
std::map<std::tuple<hsa_ven_amd_aqlprofile_block_name_t, uint32_t, uint32_t>, counters::Metric>
|
||||
|
||||
@@ -2,7 +2,7 @@ rocprofiler_deactivate_clang_tidy()
|
||||
|
||||
include(GoogleTest)
|
||||
|
||||
set(ROCPROFILER_LIB_AQL_TEST_SOURCES "aql_test.cpp")
|
||||
set(ROCPROFILER_LIB_AQL_TEST_SOURCES "aql_test.cpp" "helpers.cpp")
|
||||
|
||||
add_executable(aql-test)
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ TEST(aql_profile, construct_packets)
|
||||
LOG(WARNING) << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle);
|
||||
auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"});
|
||||
ASSERT_EQ(metrics.size(), 1);
|
||||
CounterPacketConstruct(agent, metrics);
|
||||
CounterPacketConstruct(agent.get_rocp_agent()->id, metrics);
|
||||
}
|
||||
hsa_shut_down();
|
||||
}
|
||||
@@ -142,7 +142,7 @@ TEST(aql_profile, too_many_counters)
|
||||
{
|
||||
try
|
||||
{
|
||||
CounterPacketConstruct(agent, metrics);
|
||||
CounterPacketConstruct(agent.get_rocp_agent()->id, metrics);
|
||||
} catch(const std::exception& e)
|
||||
{
|
||||
EXPECT_NE(e.what(), nullptr) << e.what();
|
||||
@@ -164,7 +164,7 @@ TEST(aql_profile, packet_generation_single)
|
||||
for(const auto& [_, agent] : agents)
|
||||
{
|
||||
auto metrics = rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES"});
|
||||
CounterPacketConstruct pkt(agent, metrics);
|
||||
CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics);
|
||||
auto test_pkt = pkt.construct_packet(rocprofiler::get_ext_table());
|
||||
EXPECT_TRUE(test_pkt);
|
||||
}
|
||||
@@ -183,7 +183,7 @@ TEST(aql_profile, packet_generation_multi)
|
||||
{
|
||||
auto metrics =
|
||||
rocprofiler::findDeviceMetrics(agent, {"SQ_WAVES", "TA_FLAT_READ_WAVEFRONTS"});
|
||||
CounterPacketConstruct pkt(agent, metrics);
|
||||
CounterPacketConstruct pkt(agent.get_rocp_agent()->id, metrics);
|
||||
auto test_pkt = pkt.construct_packet(rocprofiler::get_ext_table());
|
||||
EXPECT_TRUE(test_pkt);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,357 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include "lib/rocprofiler-sdk/agent.hpp"
|
||||
#include "lib/rocprofiler-sdk/aql/helpers.hpp"
|
||||
#include "lib/rocprofiler-sdk/aql/packet_construct.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/id_decode.hpp"
|
||||
#include "lib/rocprofiler-sdk/counters/metrics.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/queue.hpp"
|
||||
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
|
||||
|
||||
using namespace rocprofiler;
|
||||
|
||||
namespace
|
||||
{
|
||||
AmdExtTable&
|
||||
get_ext_table()
|
||||
{
|
||||
static auto _v = []() {
|
||||
auto val = AmdExtTable{};
|
||||
val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info;
|
||||
val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools;
|
||||
val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate;
|
||||
val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free;
|
||||
val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info;
|
||||
val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access;
|
||||
return val;
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
CoreApiTable&
|
||||
get_api_table()
|
||||
{
|
||||
static auto _v = []() {
|
||||
auto val = CoreApiTable{};
|
||||
val.hsa_iterate_agents_fn = hsa_iterate_agents;
|
||||
val.hsa_agent_get_info_fn = hsa_agent_get_info;
|
||||
val.hsa_queue_create_fn = hsa_queue_create;
|
||||
val.hsa_queue_destroy_fn = hsa_queue_destroy;
|
||||
val.hsa_signal_wait_relaxed_fn = hsa_signal_wait_relaxed;
|
||||
return val;
|
||||
}();
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto
|
||||
findDeviceMetrics(const rocprofiler_agent_t& agent, const std::unordered_set<std::string>& metrics)
|
||||
{
|
||||
std::vector<counters::Metric> ret;
|
||||
auto all_counters = counters::getBaseHardwareMetrics();
|
||||
|
||||
ROCP_ERROR << "Looking up counters for " << std::string(agent.name);
|
||||
|
||||
auto gfx_metrics = common::get_val(all_counters, std::string(agent.name));
|
||||
if(!gfx_metrics)
|
||||
{
|
||||
ROCP_ERROR << "No counters found for " << std::string(agent.name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
for(auto& counter : *gfx_metrics)
|
||||
{
|
||||
if((metrics.count(counter.name()) > 0 || metrics.empty()) && !counter.block().empty())
|
||||
{
|
||||
ret.push_back(counter);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
hsa_ven_amd_aqlprofile_id_query_t
|
||||
v1_get_query_info(hsa_agent_t agent, const counters::Metric& metric)
|
||||
{
|
||||
hsa_ven_amd_aqlprofile_profile_t profile{.agent = agent};
|
||||
hsa_ven_amd_aqlprofile_id_query_t query = {metric.block().c_str(), 0, 0};
|
||||
if(hsa_ven_amd_aqlprofile_get_info(&profile, HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_ID, &query) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
{
|
||||
DLOG(FATAL) << fmt::format("AQL failed to query info for counter {}", metric);
|
||||
throw std::runtime_error(fmt::format("AQL failed to query info for counter {}", metric));
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
v1_get_block_counters(hsa_agent_t agent, const hsa_ven_amd_aqlprofile_event_t& event)
|
||||
{
|
||||
hsa_ven_amd_aqlprofile_profile_t query = {.agent = agent,
|
||||
.type = HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC,
|
||||
.events = &event,
|
||||
.event_count = 1};
|
||||
uint32_t max_block_counters = 0;
|
||||
if(hsa_ven_amd_aqlprofile_get_info(&query,
|
||||
HSA_VEN_AMD_AQLPROFILE_INFO_BLOCK_COUNTERS,
|
||||
&max_block_counters) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
throw std::runtime_error(fmt::format("AQL failed to max block info for counter {}",
|
||||
static_cast<int64_t>(event.block_name)));
|
||||
}
|
||||
return max_block_counters;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
v1_get_dim_info(hsa_agent_t agent,
|
||||
hsa_ven_amd_aqlprofile_event_t event,
|
||||
uint32_t sample_id,
|
||||
std::map<int, uint64_t>& dims)
|
||||
{
|
||||
auto callback = [](int, int id, int extent, int, const char*, void* userdata) -> hsa_status_t {
|
||||
auto& map = *static_cast<std::map<int, uint64_t>*>(userdata);
|
||||
map.emplace(id, extent);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
if(hsa_ven_amd_aqlprofile_iterate_event_coord(
|
||||
agent, event, sample_id, callback, static_cast<void*>(&dims)) != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD;
|
||||
}
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
test_init()
|
||||
{
|
||||
HsaApiTable table;
|
||||
table.amd_ext_ = &get_ext_table();
|
||||
table.core_ = &get_api_table();
|
||||
agent::construct_agent_cache(&table);
|
||||
ASSERT_TRUE(hsa::get_queue_controller() != nullptr);
|
||||
hsa::get_queue_controller()->init(get_api_table(), get_ext_table());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(aql_helpers, get_query_info)
|
||||
{
|
||||
auto agents = agent::get_agents();
|
||||
ASSERT_FALSE(agents.empty());
|
||||
|
||||
for(auto agent : agents)
|
||||
{
|
||||
// auto aql_agent = *CHECK_NOTNULL(agent::get_aql_agent(agent->id));
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
auto metrics = findDeviceMetrics(*agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
auto query = aql::get_query_info(agent->id, metric);
|
||||
ROCP_INFO << fmt::format("{},{},{}", query.id, query.name, query.instance_count);
|
||||
EXPECT_TRUE(query.name != nullptr);
|
||||
EXPECT_TRUE(query.instance_count != 0);
|
||||
EXPECT_TRUE(query.id < std::numeric_limits<uint32_t>().max());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(aql_helpers, get_query_info_compare_v1)
|
||||
{
|
||||
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
|
||||
test_init();
|
||||
auto agents = agent::get_agents();
|
||||
|
||||
ASSERT_FALSE(agents.empty());
|
||||
|
||||
for(auto agent : agents)
|
||||
{
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
auto metrics = findDeviceMetrics(*agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
auto query = aql::get_query_info(agent->id, metric);
|
||||
auto query_v1 =
|
||||
v1_get_query_info(agent::get_agent_cache(agent)->get_hsa_agent(), metric);
|
||||
// v1 query with hsa_agent
|
||||
|
||||
EXPECT_EQ(query.id, query_v1.id);
|
||||
EXPECT_EQ(std::string(query.name), std::string(query_v1.name));
|
||||
EXPECT_EQ(query.instance_count, query_v1.instance_count);
|
||||
}
|
||||
}
|
||||
hsa_shut_down();
|
||||
}
|
||||
|
||||
TEST(aql_helpers, get_block_counters)
|
||||
{
|
||||
auto agents = agent::get_agents();
|
||||
ASSERT_FALSE(agents.empty());
|
||||
|
||||
for(auto agent : agents)
|
||||
{
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
auto metrics = findDeviceMetrics(*agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
auto query = aql::get_query_info(agent->id, metric);
|
||||
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
|
||||
{
|
||||
aqlprofile_pmc_event_t event = {
|
||||
.block_index = block_index,
|
||||
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
|
||||
.flags = aqlprofile_pmc_event_flags_t{0},
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
|
||||
auto max_block_counters = aql::get_block_counters(agent->id, event);
|
||||
EXPECT_GT(max_block_counters, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(aql_helpers, get_block_counters_compare_v1)
|
||||
{
|
||||
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
|
||||
test_init();
|
||||
auto agents = agent::get_agents();
|
||||
|
||||
ASSERT_FALSE(agents.empty());
|
||||
|
||||
for(auto agent : agents)
|
||||
{
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
auto metrics = findDeviceMetrics(*agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
auto query = aql::get_query_info(agent->id, metric);
|
||||
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
|
||||
{
|
||||
aqlprofile_pmc_event_t event = {
|
||||
.block_index = block_index,
|
||||
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
|
||||
.flags = aqlprofile_pmc_event_flags_t{0},
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
|
||||
|
||||
hsa_ven_amd_aqlprofile_event_t event_v1 = {
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id),
|
||||
.block_index = block_index,
|
||||
.counter_id = static_cast<uint32_t>(std::atoi(metric.event().c_str()))};
|
||||
EXPECT_EQ(aql::get_block_counters(agent->id, event),
|
||||
v1_get_block_counters(agent::get_agent_cache(agent)->get_hsa_agent(),
|
||||
event_v1));
|
||||
}
|
||||
}
|
||||
}
|
||||
hsa_shut_down();
|
||||
}
|
||||
|
||||
TEST(aql_helpers, get_dim_info)
|
||||
{
|
||||
auto agents = agent::get_agents();
|
||||
ASSERT_FALSE(agents.empty());
|
||||
|
||||
for(auto agent : agents)
|
||||
{
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
auto metrics = findDeviceMetrics(*agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
auto query = aql::get_query_info(agent->id, metric);
|
||||
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
|
||||
{
|
||||
aqlprofile_pmc_event_t event = {
|
||||
.block_index = block_index,
|
||||
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
|
||||
.flags = aqlprofile_pmc_event_flags_t{0},
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
|
||||
std::map<int, uint64_t> dims;
|
||||
EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, aql::get_dim_info(agent->id, event, 0, dims));
|
||||
EXPECT_GT(dims.size(), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(aql_helpers, get_dim_info_compare_v1)
|
||||
{
|
||||
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
|
||||
test_init();
|
||||
auto agents = agent::get_agents();
|
||||
|
||||
ASSERT_FALSE(agents.empty());
|
||||
|
||||
for(auto agent : agents)
|
||||
{
|
||||
if(agent->type == ROCPROFILER_AGENT_TYPE_CPU) continue;
|
||||
auto metrics = findDeviceMetrics(*agent, {});
|
||||
ASSERT_FALSE(metrics.empty());
|
||||
|
||||
for(auto& metric : metrics)
|
||||
{
|
||||
std::map<int, uint64_t> dims;
|
||||
std::map<int, uint64_t> dims_v1;
|
||||
auto query = aql::get_query_info(agent->id, metric);
|
||||
for(unsigned block_index = 0; block_index < query.instance_count; ++block_index)
|
||||
{
|
||||
aqlprofile_pmc_event_t event = {
|
||||
.block_index = block_index,
|
||||
.event_id = static_cast<uint32_t>(std::atoi(metric.event().c_str())),
|
||||
.flags = aqlprofile_pmc_event_flags_t{0},
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id)};
|
||||
|
||||
hsa_ven_amd_aqlprofile_event_t event_v1 = {
|
||||
.block_name = static_cast<hsa_ven_amd_aqlprofile_block_name_t>(query.id),
|
||||
.block_index = block_index,
|
||||
.counter_id = static_cast<uint32_t>(std::atoi(metric.event().c_str()))};
|
||||
EXPECT_EQ(ROCPROFILER_STATUS_SUCCESS, aql::get_dim_info(agent->id, event, 0, dims));
|
||||
EXPECT_EQ(
|
||||
ROCPROFILER_STATUS_SUCCESS,
|
||||
v1_get_dim_info(
|
||||
agent::get_agent_cache(agent)->get_hsa_agent(), event_v1, 0, dims_v1));
|
||||
EXPECT_EQ(dims.size(), dims_v1.size());
|
||||
EXPECT_EQ(dims, dims_v1);
|
||||
}
|
||||
}
|
||||
}
|
||||
hsa_shut_down();
|
||||
}
|
||||
@@ -79,11 +79,6 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id,
|
||||
|
||||
/**
|
||||
* @brief This call returns the number of instances specific counter contains.
|
||||
* WARNING: There is a restriction on this call in the alpha/beta release
|
||||
* of rocprof. This call will not return correct instance information in
|
||||
* tool_init and must be called as part of the dispatch callback for accurate
|
||||
* instance counting information. The reason for this restriction is that HSA
|
||||
* is not yet loaded on tool_init.
|
||||
*
|
||||
* @param [in] agent rocprofiler agent
|
||||
* @param [in] counter_id counter id (obtained from iterate_agent_supported_counters)
|
||||
@@ -97,11 +92,6 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_id_t,
|
||||
{
|
||||
*instance_count = 0;
|
||||
|
||||
if(rocprofiler::counters::get_dimension_cache().empty())
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED;
|
||||
}
|
||||
|
||||
const auto* dims = rocprofiler::common::get_val(rocprofiler::counters::get_dimension_cache(),
|
||||
counter_id.handle);
|
||||
if(!dims) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
|
||||
@@ -174,11 +164,6 @@ rocprofiler_iterate_counter_dimensions(rocprofiler_counter_id_t id,
|
||||
rocprofiler_available_dimensions_cb_t info_cb,
|
||||
void* user_data)
|
||||
{
|
||||
if(rocprofiler::counters::get_dimension_cache().empty())
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED;
|
||||
}
|
||||
|
||||
const auto* dims =
|
||||
rocprofiler::common::get_val(rocprofiler::counters::get_dimension_cache(), id.handle);
|
||||
if(!dims) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND;
|
||||
|
||||
@@ -219,7 +219,7 @@ counter_callback_info::setup_profile_config(const hsa::AgentCache& age
|
||||
}
|
||||
|
||||
profile->pkt_generator = std::make_unique<rocprofiler::aql::CounterPacketConstruct>(
|
||||
agent,
|
||||
agent.get_rocp_agent()->id,
|
||||
std::vector<counters::Metric>{profile->reqired_hw_counters.begin(),
|
||||
profile->reqired_hw_counters.end()});
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
@@ -56,18 +56,17 @@ getBlockDimensions(std::string_view agent, const Metric& metric)
|
||||
|
||||
std::vector<MetricDimension> ret;
|
||||
|
||||
for(const auto& [_, maybe_agent] :
|
||||
CHECK_NOTNULL(hsa::get_queue_controller())->get_supported_agents())
|
||||
for(const auto* maybe_agent : rocprofiler::agent::get_agents())
|
||||
{
|
||||
if(maybe_agent.name() == agent)
|
||||
if(std::string(maybe_agent->name) == agent)
|
||||
{
|
||||
aql::CounterPacketConstruct pkt_gen(maybe_agent, {metric});
|
||||
aql::CounterPacketConstruct pkt_gen(maybe_agent->id, {metric});
|
||||
const auto& events = pkt_gen.get_counter_events(metric);
|
||||
|
||||
for(const auto& event : events)
|
||||
{
|
||||
std::map<int, uint64_t> dims;
|
||||
auto status = aql::get_dim_info(maybe_agent.get_hsa_agent(), event, 0, dims);
|
||||
auto status = aql::get_dim_info(maybe_agent->id, event, 0, dims);
|
||||
CHECK_EQ(status, ROCPROFILER_STATUS_SUCCESS)
|
||||
<< rocprofiler_get_status_string(status);
|
||||
|
||||
@@ -103,16 +102,6 @@ get_dimension_cache()
|
||||
common::static_object<std::unordered_map<uint64_t, std::vector<MetricDimension>>>::
|
||||
construct([]() -> std::unordered_map<uint64_t, std::vector<MetricDimension>> {
|
||||
std::unordered_map<uint64_t, std::vector<MetricDimension>> dims;
|
||||
/**
|
||||
* Fails if HSA is not loaded by retruning nothing. This should not remain after
|
||||
* AQL is transistioned away from HSA.
|
||||
*/
|
||||
if(CHECK_NOTNULL(rocprofiler::hsa::get_queue_controller())
|
||||
->get_supported_agents()
|
||||
.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto& asts = counters::get_ast_map();
|
||||
for(const auto& [gfx, metrics] : asts)
|
||||
|
||||
@@ -456,11 +456,15 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket
|
||||
{
|
||||
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>>* data;
|
||||
const aql::CounterPacketConstruct* pkt_gen;
|
||||
hsa_agent_t agent;
|
||||
};
|
||||
|
||||
auto agent = CHECK_NOTNULL(rocprofiler::agent::get_agent_cache(
|
||||
CHECK_NOTNULL(rocprofiler::agent::get_agent(pkt_gen->agent()))))
|
||||
->get_hsa_agent();
|
||||
std::unordered_map<uint64_t, std::vector<rocprofiler_record_counter_t>> ret;
|
||||
if(pkt.isEmpty()) return ret;
|
||||
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen};
|
||||
if(pkt.empty) return ret;
|
||||
it_data aql_data{.data = &ret, .pkt_gen = pkt_gen, .agent = agent};
|
||||
;
|
||||
hsa_status_t status = hsa_ven_amd_aqlprofile_iterate_data(
|
||||
&pkt.profile,
|
||||
@@ -477,10 +481,8 @@ EvaluateAST::read_pkt(const aql::CounterPacketConstruct* pkt_gen, hsa::AQLPacket
|
||||
auto& next_rec = vec.emplace_back();
|
||||
set_counter_in_rec(next_rec.id, {.handle = metric->id()});
|
||||
// Actual dimension info needs to be used here in the future
|
||||
auto aql_status = aql::set_dim_id_from_sample(next_rec.id,
|
||||
it.pkt_gen->hsa_agent(),
|
||||
info_data->pmc_data.event,
|
||||
info_data->sample_id);
|
||||
auto aql_status = aql::set_dim_id_from_sample(
|
||||
next_rec.id, it.agent, info_data->pmc_data.event, info_data->sample_id);
|
||||
CHECK_EQ(aql_status, ROCPROFILER_STATUS_SUCCESS)
|
||||
<< rocprofiler_get_status_string(aql_status);
|
||||
|
||||
|
||||
@@ -251,12 +251,12 @@ TEST(dimension, block_dim_test)
|
||||
}
|
||||
else
|
||||
{
|
||||
aql::CounterPacketConstruct pkt_gen(agent, {metric});
|
||||
aql::CounterPacketConstruct pkt_gen(agent.get_rocp_agent()->id, {metric});
|
||||
const auto& events = pkt_gen.get_counter_events(metric);
|
||||
for(const auto& event : events)
|
||||
{
|
||||
std::map<int, uint64_t> dims;
|
||||
auto status = aql::get_dim_info(agent.get_hsa_agent(), event, 0, dims);
|
||||
auto status = aql::get_dim_info(agent.get_rocp_agent()->id, event, 0, dims);
|
||||
CHECK_EQ(status, ROCPROFILER_STATUS_SUCCESS)
|
||||
<< rocprofiler_get_status_string(status);
|
||||
for(const auto& [id, extent] : dims)
|
||||
|
||||
@@ -62,16 +62,20 @@ public:
|
||||
AQLPacket(const AQLPacket&) = delete;
|
||||
AQLPacket& operator=(const AQLPacket&) = delete;
|
||||
|
||||
aqlprofile_handle_t pkt_handle = {.handle = 0};
|
||||
aqlprofile_pmc_aql_packets_t pkts = {.start_packet = null_amd_aql_pm4_packet,
|
||||
.stop_packet = null_amd_aql_pm4_packet,
|
||||
.read_packet = null_amd_aql_pm4_packet};
|
||||
|
||||
bool empty = {true};
|
||||
hsa_ven_amd_aqlprofile_profile_t profile = {};
|
||||
hsa_ext_amd_aql_pm4_packet_t start = null_amd_aql_pm4_packet;
|
||||
hsa_ext_amd_aql_pm4_packet_t stop = null_amd_aql_pm4_packet;
|
||||
hsa_ext_amd_aql_pm4_packet_t read = null_amd_aql_pm4_packet;
|
||||
|
||||
common::container::small_vector<hsa_ext_amd_aql_pm4_packet_t, 3> before_krn_pkt = {};
|
||||
common::container::small_vector<hsa_ext_amd_aql_pm4_packet_t, 2> after_krn_pkt = {};
|
||||
|
||||
bool isEmpty() const { return empty; }
|
||||
bool empty = true;
|
||||
};
|
||||
|
||||
class CounterAQLPacket : public AQLPacket
|
||||
|
||||
@@ -191,7 +191,7 @@ QueueController::init(CoreApiTable& core_table, AmdExtTable& ext_table)
|
||||
// Generate supported agents
|
||||
for(const auto* itr : agents)
|
||||
{
|
||||
auto cached_agent = agent::get_agent_cache(itr);
|
||||
const auto* cached_agent = agent::get_agent_cache(itr);
|
||||
if(cached_agent && cached_agent->get_rocp_agent()->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
{
|
||||
get_supported_agents().emplace(cached_agent->index(), *cached_agent);
|
||||
|
||||
Reference in New Issue
Block a user