Files
Kandula, Venkateshwar reddy 0ff0ffffa2 [SDK] Expose counter dims in rocprofiler_counter_info_v1_t and only show counters being profiled in metadata. (#325)
* expose dimensional info in rocprofiler_counter_info_v1_t.

* add counter_id in dim info.

* address review comments

* format.

* address comments.

* use array of pointers for dimensions_instaces.

* format and comments.

* address comments.

* new line.

* Update counter_defs.yaml

* Update counter_defs.yaml

* Update counter_defs.yaml

* counter_defs.

* format counter defs.

* format counter defs.

* format counter defs.

* show only counters being profiled in metadata.

* Format.

* use config for counters and fix warnings.

* add version for rocprofiler_counter_dimension_info_v1_t struct.

* rename rocprofiler_counter_record_dimension_instance_v1_info_t.

* account device id from pmc for counters metadata.

* move dim structs to counters.h.

* address comments to compare value.

* fix tests.

* Address comments. use pointer of arrays for ABI.

* rebase.

* fix build error.

* use separate metadata::init() for rocprofv3.

* also print not found counters.

* precompute all the perf counters needed to be in metadata.

* Misc.

* format

* Format.

* rocprofiler::sdk::container::c_array

* Address comments.

* source/lib/output/metadata.cpp

* lint.

* add unit test for c_array.

* add unit test and serialization support for c_array container.

* Misc.

* Clean files.

* Format.

* clang-tidy.

* add more checks to c_array.

* misc. typo

* Addr comments.

---------

Co-authored-by: Venkateshwar Reddy Kandula <vkandula@amd.com>
Co-authored-by: Jonathan R. Madsen <Jonathan.Madsen@amd.com>

[ROCm/rocprofiler-sdk commit: bf0fad1d54]
2025-07-22 14:24:25 -07:00

489 řádky
19 KiB
C++

// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "client.hpp"
#include <rocprofiler-sdk/registration.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <atomic>
#include <chrono>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <set>
#include <shared_mutex>
#include <sstream>
#include <stdexcept>
#include <thread>
#include <unordered_map>
#include <vector>
#define ROCPROFILER_CALL(result, msg) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
<< std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
<< status_msg << ")"; \
throw std::runtime_error(errmsg.str()); \
} \
}
int
start()
{
return 1;
}
namespace
{
// Class to sample counter values from the ROCProfiler API
// This class is not thread safe and should not be shared between threads.
// Only a single instance of this class should be created per agent.
class counter_sampler
{
public:
// Setup system profiling for an agent
counter_sampler(rocprofiler_agent_id_t agent);
// Decode the counter name of a record
std::string decode_record_name(const rocprofiler_counter_record_t& rec) const;
// Get the dimensions of a record (what CU/SE/etc the counter is for). High cost operation
// should be cached if possible.
static std::unordered_map<std::string, size_t> get_record_dimensions(
const rocprofiler_counter_record_t& rec);
// Sample the counter values for a set of counters, returns the records in the out parameter.
rocprofiler_status_t sample_counter_values(const std::vector<std::string>& counters,
std::vector<rocprofiler_counter_record_t>& out);
// Get the available agents on the system
static std::vector<rocprofiler_agent_v0_t> get_available_agents();
void flush() const { rocprofiler_flush_buffer(buf_); }
void stop() const { rocprofiler_stop_context(ctx_); }
private:
rocprofiler_agent_id_t agent_ = {};
rocprofiler_context_id_t ctx_ = {};
rocprofiler_buffer_id_t buf_ = {};
rocprofiler_counter_config_id_t profile_ = {.handle = 0};
std::map<std::vector<std::string>, rocprofiler_counter_config_id_t> cached_profiles_;
std::map<uint64_t, uint64_t> profile_sizes_;
mutable std::map<uint64_t, std::string> id_to_name_;
// Internal function used to set the profile for the agent when start_context is called
void set_profile(rocprofiler_context_id_t ctx, rocprofiler_device_counting_agent_cb_t cb) const;
// Get the size of a counter in number of records
static size_t get_counter_size(rocprofiler_counter_id_t counter);
// Get the supported counters for an agent
static std::unordered_map<std::string, rocprofiler_counter_id_t> get_supported_counters(
rocprofiler_agent_id_t agent);
// Get the dimensions of a counter
static std::vector<rocprofiler_counter_record_dimension_info_t> get_counter_dimensions(
rocprofiler_counter_id_t counter);
};
counter_sampler::counter_sampler(rocprofiler_agent_id_t agent)
: agent_(agent)
{
// Setup context (should only be done once per agent)
auto client_thread = rocprofiler_callback_thread_t{};
ROCPROFILER_CALL(rocprofiler_create_context(&ctx_), "context creation failed");
ROCPROFILER_CALL(rocprofiler_create_buffer(
ctx_,
4096,
2048,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
[](rocprofiler_context_id_t,
rocprofiler_buffer_id_t,
rocprofiler_record_header_t**,
size_t,
void*,
uint64_t) {},
nullptr,
&buf_),
"buffer creation failed");
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
"failure creating callback thread");
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(buf_, client_thread),
"failed to assign thread for buffer");
ROCPROFILER_CALL(rocprofiler_configure_device_counting_service(
ctx_,
buf_,
agent,
[](rocprofiler_context_id_t context_id,
rocprofiler_agent_id_t,
rocprofiler_device_counting_agent_cb_t set_config,
void* user_data) {
if(user_data)
{
auto* sampler = static_cast<counter_sampler*>(user_data);
sampler->set_profile(context_id, set_config);
}
},
this),
"Could not setup buffered service");
}
std::string
counter_sampler::decode_record_name(const rocprofiler_counter_record_t& rec) const
{
if(id_to_name_.empty())
{
auto name_to_id = counter_sampler::get_supported_counters(agent_);
for(const auto& [name, id] : name_to_id)
{
id_to_name_.emplace(id.handle, name);
}
}
rocprofiler_counter_id_t counter_id = {.handle = 0};
rocprofiler_query_record_counter_id(rec.id, &counter_id);
if(id_to_name_.find(counter_id.handle) == id_to_name_.end())
{
std::clog << "Unknown counter id = " << counter_id.handle << "\n";
return "UNKNOWN";
}
return id_to_name_.at(counter_id.handle);
}
std::unordered_map<std::string, size_t>
counter_sampler::get_record_dimensions(const rocprofiler_counter_record_t& rec)
{
std::unordered_map<std::string, size_t> out;
rocprofiler_counter_id_t counter_id = {.handle = 0};
rocprofiler_query_record_counter_id(rec.id, &counter_id);
auto dims = get_counter_dimensions(counter_id);
for(auto& dim : dims)
{
size_t pos = 0;
rocprofiler_query_record_dimension_position(rec.id, dim.id, &pos);
out.emplace(dim.name, pos);
}
return out;
}
rocprofiler_status_t
counter_sampler::sample_counter_values(const std::vector<std::string>& counters,
std::vector<rocprofiler_counter_record_t>& out)
{
auto profile_cached = cached_profiles_.find(counters);
if(profile_cached == cached_profiles_.end())
{
size_t expected_size = 0;
rocprofiler_counter_config_id_t profile = {};
std::vector<rocprofiler_counter_id_t> gpu_counters;
auto roc_counters = get_supported_counters(agent_);
for(const auto& counter : counters)
{
auto it = roc_counters.find(counter);
if(it == roc_counters.end())
{
std::cerr << "Counter " << counter << " not found\n";
continue;
}
gpu_counters.push_back(it->second);
expected_size += get_counter_size(it->second);
}
ROCPROFILER_CALL(rocprofiler_create_counter_config(
agent_, gpu_counters.data(), gpu_counters.size(), &profile),
"Could not create profile");
cached_profiles_.emplace(counters, profile);
profile_sizes_.emplace(profile.handle, expected_size);
profile_cached = cached_profiles_.find(counters);
}
try
{
out.resize(profile_sizes_.at(profile_cached->second.handle));
} catch(const std::exception& e)
{
std::cerr << "Caught exception: " << e.what() << "\n";
return ROCPROFILER_STATUS_ERROR;
}
profile_ = profile_cached->second;
rocprofiler_start_context(ctx_);
std::this_thread::sleep_for(std::chrono::milliseconds(50));
size_t out_size = out.size();
auto status = rocprofiler_sample_device_counting_service(
ctx_, {}, ROCPROFILER_COUNTER_FLAG_NONE, out.data(), &out_size);
rocprofiler_stop_context(ctx_);
out.resize(out_size);
return status;
}
std::vector<rocprofiler_agent_v0_t>
counter_sampler::get_available_agents()
{
std::vector<rocprofiler_agent_v0_t> agents;
rocprofiler_query_available_agents_cb_t iterate_cb = [](rocprofiler_agent_version_t agents_ver,
const void** agents_arr,
size_t num_agents,
void* udata) {
if(agents_ver != ROCPROFILER_AGENT_INFO_VERSION_0)
throw std::runtime_error{"unexpected rocprofiler agent version"};
auto* agents_v = static_cast<std::vector<rocprofiler_agent_v0_t>*>(udata);
for(size_t i = 0; i < num_agents; ++i)
{
const auto* rocp_agent = static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]);
if(rocp_agent->type == ROCPROFILER_AGENT_TYPE_GPU) agents_v->emplace_back(*rocp_agent);
}
return ROCPROFILER_STATUS_SUCCESS;
};
ROCPROFILER_CALL(
rocprofiler_query_available_agents(ROCPROFILER_AGENT_INFO_VERSION_0,
iterate_cb,
sizeof(rocprofiler_agent_t),
const_cast<void*>(static_cast<const void*>(&agents))),
"query available agents");
return agents;
}
void
counter_sampler::set_profile(rocprofiler_context_id_t ctx,
rocprofiler_device_counting_agent_cb_t cb) const
{
if(profile_.handle != 0)
{
cb(ctx, profile_);
}
}
size_t
counter_sampler::get_counter_size(rocprofiler_counter_id_t counter)
{
rocprofiler_counter_info_v1_t info;
ROCPROFILER_CALL(rocprofiler_query_counter_info(
counter, ROCPROFILER_COUNTER_INFO_VERSION_1, static_cast<void*>(&info)),
"Could not query info for counter");
return info.dimensions_instances_count;
}
std::unordered_map<std::string, rocprofiler_counter_id_t>
counter_sampler::get_supported_counters(rocprofiler_agent_id_t agent)
{
std::unordered_map<std::string, rocprofiler_counter_id_t> out;
std::vector<rocprofiler_counter_id_t> gpu_counters;
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
agent,
[](rocprofiler_agent_id_t,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
std::vector<rocprofiler_counter_id_t>* vec =
static_cast<std::vector<rocprofiler_counter_id_t>*>(user_data);
for(size_t i = 0; i < num_counters; i++)
{
vec->push_back(counters[i]);
}
return ROCPROFILER_STATUS_SUCCESS;
},
static_cast<void*>(&gpu_counters)),
"Could not fetch supported counters");
for(auto& counter : gpu_counters)
{
rocprofiler_counter_info_v0_t info;
ROCPROFILER_CALL(
rocprofiler_query_counter_info(
counter, ROCPROFILER_COUNTER_INFO_VERSION_0, static_cast<void*>(&info)),
"Could not query info for counter");
out.emplace(info.name, counter);
}
return out;
}
std::vector<rocprofiler_counter_record_dimension_info_t>
counter_sampler::get_counter_dimensions(rocprofiler_counter_id_t counter)
{
rocprofiler_counter_info_v1_t info;
ROCPROFILER_CALL(rocprofiler_query_counter_info(
counter, ROCPROFILER_COUNTER_INFO_VERSION_1, static_cast<void*>(&info)),
"Could not query info for counter");
return std::vector<rocprofiler_counter_record_dimension_info_t>{
*info.dimensions, *info.dimensions + info.dimensions_count};
}
std::atomic<bool>&
exit_toggle()
{
static std::atomic<bool> exit_toggle = false;
return exit_toggle;
}
rocprofiler_client_finalize_t finalize = nullptr;
rocprofiler_client_id_t* client_id = nullptr;
std::shared_ptr<counter_sampler> sampler = {};
std::thread* sampler_thread = nullptr;
} // namespace
int
tool_init(rocprofiler_client_finalize_t fini_func, void*)
{
finalize = fini_func;
std::atexit([]() {
if(client_id) finalize(*client_id);
});
// Get the agents available on the device
auto agents = counter_sampler::get_available_agents();
if(agents.empty())
{
std::cerr << "No agents found\n";
return -1;
}
// Use the first agent found
sampler = std::make_shared<counter_sampler>(agents[0].id);
sampler_thread = new std::thread{[=]() {
size_t count = 1;
std::vector<rocprofiler_counter_record_t> records;
while(sampler && exit_toggle().load() == false)
{
auto status = sampler->sample_counter_values({"SQ_WAVES"}, records);
if(status == ROCPROFILER_STATUS_ERROR_HSA_NOT_LOADED)
{
std::clog << "HSA not loaded yet....\n";
std::this_thread::sleep_for(std::chrono::milliseconds(50));
continue;
}
std::clog << "Sample " << count << ":\n";
if(status == ROCPROFILER_STATUS_SUCCESS)
{
for(const auto& record : records)
{
if(!sampler) break;
auto recname = sampler->decode_record_name(record);
std::clog << "\tCounter: " << record.id << " Name: " << recname
<< " Value: " << record.counter_value
<< " User data: " << record.user_data.value << "\n";
if(count == 1)
{
if(!sampler) break;
auto dims = sampler->get_record_dimensions(record);
for(const auto& [name, pos] : dims)
{
std::clog << "\t\tDimension Name: " << name << ": " << pos << "\n";
}
}
}
}
count++;
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
exit_toggle().store(false);
}};
// no errors
return 0;
}
void
tool_fini(void* user_data)
{
std::clog << "In tool fini\n" << std::flush;
client_id = nullptr;
exit_toggle().store(true);
while(exit_toggle().load() == true)
{};
sampler->stop();
sampler->flush();
sampler_thread->join();
auto* output_stream = static_cast<std::ostream*>(user_data);
*output_stream << std::flush;
if(output_stream != &std::cout && output_stream != &std::cerr) delete output_stream;
sampler.reset();
delete sampler_thread;
std::clog << "Completed tool fini\n" << std::flush;
}
extern "C" rocprofiler_tool_configure_result_t*
rocprofiler_configure(uint32_t version,
const char* runtime_version,
uint32_t priority,
rocprofiler_client_id_t* id)
{
// set the client name
id->name = "CounterClientSample";
client_id = id;
// compute major/minor/patch version info
uint32_t major = version / 10000;
uint32_t minor = (version % 10000) / 100;
uint32_t patch = version % 100;
// generate info string
auto info = std::stringstream{};
info << id->name << " (priority=" << priority << ") is using rocprofiler-sdk v" << major << "."
<< minor << "." << patch << " (" << runtime_version << ")";
std::clog << info.str() << std::endl;
std::ostream* output_stream = nullptr;
std::string filename = "counter_collection.log";
if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE"); outfile) filename = outfile;
if(filename == "stdout")
output_stream = &std::cout;
else if(filename == "stderr")
output_stream = &std::cerr;
else
output_stream = new std::ofstream{filename};
// create configure data
static auto cfg =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
&tool_init,
&tool_fini,
static_cast<void*>(output_stream)};
// return pointer to configure data
return &cfg;
}