Dosyalar
rocm-systems/projects/aqlprofile/src/core/counters.cpp
T
systems-assistant[bot] a68afa42a1 Add 'projects/aqlprofile/' from commit '6f236ffb5f1bd128e11ba0bf09e3c0a52b6527e4'
git-subtree-dir: projects/aqlprofile
git-subtree-mainline: 9011821e05
git-subtree-split: 6f236ffb5f
2025-07-22 22:52:36 +00:00

451 satır
17 KiB
C++

// MIT License
//
// Copyright (c) 2017-2025 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "core/aql_profile.hpp"
#include "aqlprofile-sdk/aql_profile_v2.h"
#include <array>
#include <cstddef>
#include <cstdint>
#include <future>
#include <map>
#include <string>
#include <vector>
#include "core/counter_dimensions.hpp"
#include "core/logger.h"
#include "core/pm4_factory.h"
#include "pm4/cmd_builder.h"
#include "pm4/pmc_builder.h"
#include "pm4/spm_builder.h"
#include "pm4/sqtt_builder.h"
#include "core/commandbuffermgr.hpp"
#include "memorymanager.hpp"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
#define ERR_CHECK(cond, err, msg) \
{ \
if (cond) { \
ERR_LOGGING << msg; \
return err; \
} \
}
#define HSA_TRY_WRAP try {
#define HSA_CATCH_WRAP \
} \
catch (std::exception & e) { \
return HSA_STATUS_ERROR; \
}
std::vector<std::string> EventDimension::dimension_list;
std::unordered_map<std::string, size_t> EventDimension::dimension_table;
namespace aql_profile_v2 {
// Command buffer partitioning manager
// Supports Pre/Post commands partitioning
// and prefix control partition
using aql_profile::event_exception;
using aql_profile::event_t;
using ::aql_profile::Pm4Factory;
uint32_t HandleSQFlagsBlock(Pm4Factory* pm4_factory, const aqlprofile_pmc_event_t& event) {
auto visible_id = event.event_id;
if (event.flags.sq_flags.accum == AQLPROFILE_ACCUMULATION_LO_RES)
visible_id = pm4_factory->GetAccumLowID();
if (event.flags.sq_flags.accum == AQLPROFILE_ACCUMULATION_HI_RES)
visible_id = pm4_factory->GetAccumHiID();
return visible_id;
}
counter_des_t GetCounter(Pm4Factory* pm4_factory, EventRequest& event,
std::map<block_des_t, uint32_t, lt_block_des>& index_map) {
const GpuBlockInfo* block_info = pm4_factory->GetBlockInfo(event.block_name);
const block_des_t block_des = {block_info->id, event.block_index};
const auto ret = index_map.insert({block_des, 0});
auto reg_index = ret.first->second;
auto visible_id = event.event_id;
if (pm4_builder::SPISkip(block_info->attr, visible_id)) {
event.bInternal = true;
return {visible_id, reg_index, block_des, block_info};
}
if (reg_index >= block_info->counter_count)
throw std::string("Event is out of block counter registers number limit");
if (event.flags.raw) {
if (event.block_name == HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_SQ) {
visible_id = HandleSQFlagsBlock(pm4_factory, event);
} else {
throw HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}
ret.first->second++;
return {visible_id, reg_index, block_des, block_info};
}
pm4_builder::counters_vector CountersVec(std::vector<EventRequest>& events,
Pm4Factory* pm4_factory) {
pm4_builder::counters_vector vec;
std::map<block_des_t, uint32_t, lt_block_des> index_map;
for (auto& event : events) vec.push_back(GetCounter(pm4_factory, event, index_map));
if (pm4_factory->IsGFX10() && (vec.get_attr() & CounterBlockGRBMAttr) == 0) {
EventRequest grbm_event{0};
grbm_event.block_name = HSA_VEN_AMD_AQLPROFILE_BLOCK_NAME_GRBM;
vec.push_back(GetCounter(pm4_factory, grbm_event, index_map));
}
return vec;
}
// Method for iterating the events output data
hsa_status_t _internal_aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
aqlprofile_pmc_data_callback_t callback,
void* userdata) {
auto counter_memorymgr = MemoryManager::GetManager(handle.handle);
auto agent = counter_memorymgr->AgentHandle();
CounterMemoryManager* memorymgr = dynamic_cast<CounterMemoryManager*>(counter_memorymgr.get());
if (!memorymgr) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(memorymgr->AgentHandle());
const uint32_t xcc_num = pm4_factory->GetXccNumber();
uint64_t* samples = reinterpret_cast<uint64_t*>(memorymgr->GetOutputBuf());
uint64_t* buffer_end_location = samples + memorymgr->GetOutputBufSize() / sizeof(uint64_t);
auto& events = memorymgr->GetEvents();
size_t umc_sample_id = 0;
if (xcc_num > 1)
for (auto& event : events) {
if (samples >= buffer_end_location) return HSA_STATUS_ERROR;
if (!(pm4_factory->GetBlockInfo(event.block_name)->attr & CounterBlockUmcAttr)) continue;
#if DEBUG_TRACE == 2
printf("DATA: sample index(%u) id(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n",
sample_index, sample_id, p->block_name, p->block_index, p->counter_id, *samples);
#endif
hsa_status_t status = callback(event, event.block_index, *samples, userdata);
samples++;
umc_sample_id++;
if (status == HSA_STATUS_INFO_BREAK) return HSA_STATUS_SUCCESS;
if (status != HSA_STATUS_SUCCESS) return status;
}
for (uint32_t xcc_index = 0; xcc_index < xcc_num; xcc_index++)
for (auto& event : events) {
if (samples >= buffer_end_location) return HSA_STATUS_ERROR;
if (pm4_factory->GetBlockInfo(event.block_name)->attr & CounterBlockUmcAttr) continue;
// non-MI300A-AID counter event.
uint32_t block_samples_count = pm4_factory->GetNumEvents(event.block_name);
const EventAttribDimension& attrib = EventAttribDimension::get(agent, event.block_name);
if (!attrib.get_num()) return HSA_STATUS_ERROR;
size_t xcc_sample_count = attrib.get_num_instances() * block_samples_count;
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
#if DEBUG_TRACE == 2
printf("DATA: xcc(%u) blk(%u) bloc id(%u) index(%u) counter id(%u) res(%lu)\n", xcc_index,
blk, event.block_name, event.block_index, event.event_id, *samples);
#endif
size_t xcc_sample_id = xcc_sample_count * xcc_index +
static_cast<size_t>(event.block_index) * block_samples_count + blk;
if (!event.bInternal) {
hsa_status_t status = callback(event, xcc_sample_id, *samples, userdata);
if (status == HSA_STATUS_INFO_BREAK)
return HSA_STATUS_SUCCESS;
else if (status != HSA_STATUS_SUCCESS)
return status;
}
samples++;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t _internal_aqlprofile_pmc_create_packets(
aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets,
aqlprofile_pmc_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t memcpy_cb,
void* userdata) {
pm4_builder::CmdBuffer commands;
auto memorymgr =
std::make_shared<CounterMemoryManager>(profile.agent, alloc_cb, dealloc_cb, userdata);
MemoryManager::RegisterManager(memorymgr);
memorymgr->CopyEvents(profile.events, profile.event_count);
pm4_builder::CmdBuffer read_cmd;
pm4_builder::CmdBuffer start_cmd;
pm4_builder::CmdBuffer stop_cmd;
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile.agent);
const pm4_builder::counters_vector countersVec = CountersVec(memorymgr->GetEvents(), pm4_factory);
pm4_builder::PmcBuilder* pmc_builder = pm4_factory->GetPmcBuilder();
// Start outputbuf ptr
size_t output_bytes = 8; // Extra space for GRBM block on gfx10
for (auto& event : memorymgr->GetEvents())
output_bytes += pm4_factory->GetBytesNeeded(event.block_name);
memorymgr->CreateOutputBuf(output_bytes);
// Generate read commands
size_t data_size = pmc_builder->Read(&read_cmd, countersVec, memorymgr->GetOutputBuf());
// Generate start commands
pmc_builder->Start(&start_cmd, countersVec);
// Generate stop commands
pmc_builder->Stop(&stop_cmd, countersVec);
ERR_CHECK(data_size == 0, HSA_STATUS_ERROR, "PMC Builder Stop(): data size set to zero");
if (memorymgr->GetOutputBufSize() < data_size) return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
// Copy generated commands
size_t start_size = aql_profile::CommandBufferMgr::Align(start_cmd.Size());
size_t stop_size = aql_profile::CommandBufferMgr::Align(stop_cmd.Size());
size_t read_size = aql_profile::CommandBufferMgr::Align(read_cmd.Size());
memorymgr->CreateCmdBuf(start_size + stop_size + read_size);
handle->handle = memorymgr->GetHandler();
pm4_builder::CmdBuilder* cmd_writer = pm4_factory->GetCmdBuilder();
uint8_t* cmdbuf = reinterpret_cast<uint8_t*>(memorymgr->GetCmdBuf());
memcpy_cb(cmdbuf, read_cmd.Data(), read_cmd.Size(), userdata);
aql_profile::PopulateAql(cmdbuf, read_cmd.Size(), cmd_writer, &packets->read_packet);
cmdbuf += read_size;
memcpy_cb(cmdbuf, start_cmd.Data(), start_cmd.Size(), userdata);
aql_profile::PopulateAql(cmdbuf, start_cmd.Size(), cmd_writer, &packets->start_packet);
cmdbuf += start_size;
memcpy_cb(cmdbuf, stop_cmd.Data(), stop_cmd.Size(), userdata);
aql_profile::PopulateAql(cmdbuf, stop_cmd.Size(), cmd_writer, &packets->stop_packet);
return HSA_STATUS_SUCCESS;
}
} // namespace aql_profile_v2
extern "C" {
PUBLIC_API hsa_status_t aqlprofile_pmc_create_packets(
aqlprofile_handle_t* handle, aqlprofile_pmc_aql_packets_t* packets,
aqlprofile_pmc_profile_t profile, aqlprofile_memory_alloc_callback_t alloc_cb,
aqlprofile_memory_dealloc_callback_t dealloc_cb, aqlprofile_memory_copy_t memcpy_cb,
void* userdata) {
try {
return aql_profile_v2::_internal_aqlprofile_pmc_create_packets(
handle, packets, profile, alloc_cb, dealloc_cb, memcpy_cb, userdata);
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (std::exception& e) {
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
} catch (...) {
return HSA_STATUS_ERROR;
}
}
PUBLIC_API void aqlprofile_pmc_delete_packets(aqlprofile_handle_t handle) {
try {
MemoryManager::DeleteManager(handle.handle);
} catch (std::exception& e) {
return;
} catch (...) {
return;
}
}
PUBLIC_API hsa_status_t aqlprofile_pmc_iterate_data(aqlprofile_handle_t handle,
aqlprofile_pmc_data_callback_t callback,
void* userdata) {
try {
return aql_profile_v2::_internal_aqlprofile_pmc_iterate_data(handle, callback, userdata);
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (std::exception& e) {
ERR_LOGGING << e.what();
return HSA_STATUS_ERROR;
} catch (...) {
return HSA_STATUS_ERROR;
}
}
PUBLIC_API hsa_status_t aqlprofile_iterate_event_ids(aqlprofile_eventname_callback_t callback,
void* user_data) {
try {
EventDimension::init();
for (auto& [name, id] : EventDimension::dimension_table) {
if (auto ret = callback(id, name.c_str(), user_data); ret != HSA_STATUS_SUCCESS) {
return ret;
}
}
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (...) {
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
PUBLIC_API hsa_status_t aqlprofile_iterate_event_coord(aqlprofile_agent_handle_t agent,
aqlprofile_pmc_event_t event,
uint64_t counter_id,
aqlprofile_coordinate_callback_t callback,
void* userdata) {
try {
const EventAttribDimension& attrib = EventAttribDimension::get(agent, event.block_name);
if (!attrib.get_num()) return HSA_STATUS_ERROR;
std::array<uint8_t, 32> coord;
assert(attrib.get_num() < coord.size());
attrib.get_coordinates(coord.data(), counter_id);
for (size_t i = 0; i < attrib.get_num(); i++) {
EventDimension dim = attrib.get_dim(i);
callback(i, dim.id, dim.extent, coord.at(i), dim.name.data(), userdata);
}
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (...) {
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
PUBLIC_API hsa_status_t aqlprofile_register_agent(aqlprofile_agent_handle_t* agent_id,
const aqlprofile_agent_info_t* agent_info) {
return aqlprofile_register_agent_info(agent_id, agent_info, AQLPROFILE_AGENT_VERSION_V0);
}
PUBLIC_API hsa_status_t aqlprofile_register_agent_info(aqlprofile_agent_handle_t* agent_id,
const void* agent_info,
aqlprofile_agent_version_t version) {
try {
if (agent_info == NULL) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
switch (version) {
case AQLPROFILE_AGENT_VERSION_V0: {
const auto* info = static_cast<const aqlprofile_agent_info_t*>(agent_info);
aqlprofile_agent_info_v1_t info_v1 = {
.agent_gfxip = info->agent_gfxip,
.xcc_num = info->xcc_num,
.se_num = info->se_num,
.cu_num = info->cu_num,
.shader_arrays_per_se = info->shader_arrays_per_se,
.domain = 0,
.location_id = 0,
};
*agent_id = aql_profile::RegisterAgent(&info_v1);
} break;
case AQLPROFILE_AGENT_VERSION_V1: {
*agent_id =
aql_profile::RegisterAgent(static_cast<const aqlprofile_agent_info_v1_t*>(agent_info));
} break;
case AQLPROFILE_AGENT_VERSION_NONE:
case AQLPROFILE_AGENT_VERSION_LAST:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (...) {
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
// Check if event is valid for the specific GPU
PUBLIC_API hsa_status_t aqlprofile_validate_pmc_event(aqlprofile_agent_handle_t agent,
const aqlprofile_pmc_event_t* event,
bool* result) {
hsa_status_t status = HSA_STATUS_SUCCESS;
*result = false;
try {
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(agent);
if (pm4_factory->GetBlockInfo(event) != NULL) *result = true;
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (...) {
return HSA_STATUS_ERROR;
}
return status;
}
PUBLIC_API hsa_status_t aqlprofile_get_pmc_info(const aqlprofile_pmc_profile_t* profile,
aqlprofile_pmc_info_type_t attribute, void* value) {
if (!profile) return HSA_STATUS_ERROR;
try {
aql_profile::Pm4Factory* pm4_factory = aql_profile::Pm4Factory::Create(profile->agent);
switch (attribute) {
case AQLPROFILE_INFO_BLOCK_ID: {
hsa_ven_amd_aqlprofile_id_query_t* query =
reinterpret_cast<hsa_ven_amd_aqlprofile_id_query_t*>(value);
const uint32_t block = pm4_factory->FindBlock(query->name);
const GpuBlockInfo* info = pm4_factory->GetBlockInfo(block);
if (!info) return HSA_STATUS_ERROR;
const auto& attrib =
EventAttribDimension::get(profile->agent, (hsa_ven_amd_aqlprofile_block_name_t)block);
if (!attrib.get_num()) return HSA_STATUS_ERROR;
query->id = block;
query->instance_count = attrib.get_num_instances();
} break;
case AQLPROFILE_INFO_BLOCK_COUNTERS: {
*reinterpret_cast<uint32_t*>(value) =
pm4_factory->GetBlockInfo(&profile->events[0])->counter_count;
} break;
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
} catch (hsa_status_t err) {
ERR_LOGGING << err;
return err;
} catch (...) {
return HSA_STATUS_ERROR;
}
return HSA_STATUS_SUCCESS;
}
} // extern "C"