[SDK] Expose counter dims in rocprofiler_counter_info_v1_t and only show counters being profiled in metadata. (#325)

* expose dimensional info in rocprofiler_counter_info_v1_t.

* add counter_id in dim info.

* address review comments

* format.

* address comments.

* use array of pointers for dimensions_instaces.

* format and comments.

* address comments.

* new line.

* Update counter_defs.yaml

* Update counter_defs.yaml

* Update counter_defs.yaml

* counter_defs.

* format counter defs.

* format counter defs.

* format counter defs.

* show only counters being profiled in metadata.

* Format.

* use config for counters and fix warnings.

* add version for rocprofiler_counter_dimension_info_v1_t struct.

* rename rocprofiler_counter_record_dimension_instance_v1_info_t.

* account device id from pmc for counters metadata.

* move dim structs to counters.h.

* address comments to compare value.

* fix tests.

* Address comments. use pointer of arrays for ABI.

* rebase.

* fix build error.

* use separate metadata::init() for rocprofv3.

* also print not found counters.

* precompute all the perf counters needed to be in metadata.

* Misc.

* format

* Format.

* rocprofiler::sdk::container::c_array

* Address comments.

* source/lib/output/metadata.cpp

* lint.

* add unit test for c_array.

* add unit test and serialization support for c_array container.

* Misc.

* Clean files.

* Format.

* clang-tidy.

* add more checks to c_array.

* misc. typo

* Addr comments.

---------

Co-authored-by: Venkateshwar Reddy Kandula <vkandula@amd.com>
Co-authored-by: Jonathan R. Madsen <Jonathan.Madsen@amd.com>

[ROCm/rocprofiler-sdk commit: bf0fad1d54]
Tento commit je obsažen v:
Kandula, Venkateshwar reddy
2025-07-22 16:24:25 -05:00
odevzdal GitHub
rodič b88018d24d
revize 0ff0ffffa2
19 změnil soubory, kde provedl 796 přidání a 103 odebrání
+1 -1
Zobrazit soubor
@@ -68,7 +68,7 @@ cmake \
-DCMAKE_PREFIX_PATH=/opt/rocm \
rocprofiler-sdk-source
cmake --build rocprofiler-sdk-build --target all --parallel $(nproc)
cmake --build rocprofiler-sdk-build --target all --parallel $(nproc)
```
To install ROCprofiler, run:
@@ -112,7 +112,7 @@ fill_dimension_cache(rocprofiler_counter_id_t counter)
(*dimension_cache())
->emplace(counter.handle,
std::vector<rocprofiler_counter_record_dimension_info_t>{
info.dimensions, info.dimensions + info.dimensions_count});
*info.dimensions, *info.dimensions + info.dimensions_count});
}
/**
@@ -294,7 +294,7 @@ counter_sampler::get_counter_size(rocprofiler_counter_id_t counter)
ROCPROFILER_CALL(rocprofiler_query_counter_info(
counter, ROCPROFILER_COUNTER_INFO_VERSION_1, static_cast<void*>(&info)),
"Could not query info for counter");
return info.instance_ids_count;
return info.dimensions_instances_count;
}
std::unordered_map<std::string, rocprofiler_counter_id_t>
@@ -339,7 +339,7 @@ counter_sampler::get_counter_dimensions(rocprofiler_counter_id_t counter)
counter, ROCPROFILER_COUNTER_INFO_VERSION_1, static_cast<void*>(&info)),
"Could not query info for counter");
return std::vector<rocprofiler_counter_record_dimension_info_t>{
info.dimensions, info.dimensions + info.dimensions_count};
*info.dimensions, *info.dimensions + info.dimensions_count};
}
std::atomic<bool>&
@@ -292,7 +292,7 @@ dispatch_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data,
"Could not query counter_id");
cap.expected_counter_names.emplace(found_counter.handle, std::string(info.name));
cap.remaining.push_back(found_counter);
cap.expected.emplace(found_counter.handle, info.instance_ids_count);
cap.expected.emplace(found_counter.handle, info.dimensions_instances_count);
auto& info_vector =
cap.expected_data_dims.emplace(found_counter.handle, validate_dim_presence{})
@@ -300,7 +300,7 @@ dispatch_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data,
for(uint64_t i = 0; i < info.dimensions_count; i++)
{
info_vector.maybe_forward(info.dimensions[i]);
info_vector.maybe_forward(*info.dimensions[i]);
}
}
if(cap.expected.empty())
+70 -10
Zobrazit soubor
@@ -50,24 +50,83 @@ typedef struct ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_counter_info_v0_t
uint8_t is_derived : 1; ///< If this counter is a derived counter
} rocprofiler_counter_info_v0_t;
/**
* @brief (experimental) Represents metadata about a single dimension of a counter instance.
*
* This structure provides the name of the dimension (e.g., "XCC", "SE", etc.)
* and the index indicating the position of a specific counter instance within that dimension.
*/
typedef struct ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_counter_dimension_info_t
{
uint64_t size; ///< Size of this structure. Used for versioning and validation.
const char* dimension_name;
size_t index;
/**
* @var dimension_name
* @brief Name of the dimension this instance belongs to.
* @var index
* @brief Position (zero-based) of the instance within the specified dimension.
*/
} rocprofiler_counter_dimension_info_t;
/**
* @brief (experimental) Describes a specific counter instance and its position across multiple
* dimensions.
*
* This structure provides the unique instance ID, associated counter ID, number of dimensions for
* the instance, and a pointer to an array of metadata describing each dimension's name and index.
*/
typedef struct ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_counter_record_dimension_instance_info_t
{
uint64_t size; ///< Size of this structure. Used for versioning and validation.
rocprofiler_counter_instance_id_t instance_id;
uint64_t counter_id;
uint64_t dimensions_count;
const rocprofiler_counter_dimension_info_t** dimensions;
/**
* @var instance_id
* @brief Encoded identifier for the instance, which includes the counter ID and all dimension
positions.
* @var counter_id
* @brief Identifier of the counter associated with this instance.
* @var dimensions_count
* @brief Number of dimensions associated with this instance.
* @var dimensions
* @brief Array of pointers to dimension info structures, each representing one dimension
* and the position of this instance within that dimension.
*
* The array has `dimensions_count` elements, and each element is a pointer to a
* `rocprofiler_counter_dimension_info_t` structure.
*/
} rocprofiler_counter_record_dimension_instance_info_t;
/**
* @brief (experimental) Counter info struct version 1. Combines information from
* ::rocprofiler_counter_info_v0_t with the dimension information.
*/
typedef struct ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_counter_info_v1_t
{
rocprofiler_counter_id_t id; ///< Id of this counter
const char* name; ///< Name of the counter
uint64_t size; ///< Size of this structure. Used for versioning and validation.
rocprofiler_counter_id_t id; ///< Id of this counter
const char* name; ///< Name of the counter
const char* description; ///< Description of the counter
const char* block; ///< Block of the counter (non-derived only)
const char* expression; ///< Counter expression (derived counters only)
uint8_t is_constant : 1; ///< If this counter is HW constant
uint8_t is_derived : 1; ///< If this counter is a derived counter
uint64_t dimensions_count;
const rocprofiler_counter_record_dimension_info_t* dimensions;
uint64_t instance_ids_count;
const rocprofiler_counter_instance_id_t* instance_ids;
uint64_t dimensions_count;
const rocprofiler_counter_record_dimension_info_t** dimensions;
uint64_t dimensions_instances_count;
const rocprofiler_counter_record_dimension_instance_info_t** dimensions_instances;
/// @var dimensions_count
/// @brief Number of dimensions for the counter
@@ -75,11 +134,12 @@ typedef struct ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_counter_info_v1_t
/// @var dimensions
/// @brief Dimension information of the counter
///
/// @var instance_ids_count
/// @brief Number of instance ids for the counter
/// @var dimensions_instances_count
/// @brief Number of unique instances for this counter, across all dimension combinations.
///
/// @var instance_ids
/// @brief Instance ids that can be generated by the counter
/// @var dimensions_instances
/// @brief Array of pointers to instance info structs, each describing a unique instance
/// and its specific dimension mapping.
} rocprofiler_counter_info_v1_t;
/**
@@ -14,3 +14,4 @@ install(
add_subdirectory(codeobj)
add_subdirectory(details)
add_subdirectory(serialization)
add_subdirectory(container)
@@ -0,0 +1,11 @@
#
#
# Installation of public C++ headers (implementations)
#
#
set(ROCPROFILER_CXX_CONTAINERS_HEADER_FILES c_array.hpp)
install(
FILES ${ROCPROFILER_CXX_CONTAINERS_HEADER_FILES}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/cxx/container
COMPONENT development)
@@ -0,0 +1,136 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
#pragma once
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <string>
namespace rocprofiler
{
namespace sdk
{
namespace container
{
template <typename Tp>
struct c_array
{
// Construct an array wrapper from a base pointer and array size
c_array(Tp* _base, size_t _size)
: m_base{_base}
, m_size{_size}
{}
~c_array() = default;
c_array(const c_array&) = default;
c_array(c_array&&) noexcept = default;
c_array& operator=(const c_array&) = default;
c_array& operator=(c_array&&) noexcept = default;
// Get the size of the wrapped array
size_t size() const { return m_size; }
// Access an element by index
Tp& operator[](size_t i) { return m_base[i]; }
// Access an element by index
const Tp& operator[](size_t i) const { return m_base[i]; }
// Access an element by index with bounds check
Tp& at(size_t i)
{
if(i < m_size) return m_base[i];
throw std::out_of_range(std::string{typeid(*this).name()} + std::to_string(i) +
" exceeds size " + std::to_string(m_size));
}
// Access an element by index with bounds check
const Tp& at(size_t i) const
{
if(i < m_size) return m_base[i];
throw std::out_of_range(std::string{typeid(*this).name()} + std::to_string(i) +
" exceeds size " + std::to_string(m_size));
}
// Get a slice of this array, from a start index (inclusive) to end index (exclusive)
c_array<Tp> slice(size_t start, size_t end) { return c_array<Tp>(&m_base[start], end - start); }
void pop_front()
{
++m_base;
--m_size;
}
void pop_back() { --m_size; }
operator Tp*() const { return m_base; }
// Iterator class for convenient range-based for loop support
template <typename Up>
struct iterator
{
// Start the iterator at a given pointer
iterator(Tp* p)
: m_ptr{p}
{}
// Advance to the next element
void operator++() { ++m_ptr; }
void operator++(int) { m_ptr++; }
// Get the current element
Up& operator*() const { return *m_ptr; }
// Compare iterators
bool operator==(const iterator& rhs) const { return m_ptr == rhs.m_ptr; }
bool operator!=(const iterator& rhs) const { return m_ptr != rhs.m_ptr; }
private:
Tp* m_ptr = nullptr;
};
// Get an iterator positioned at the beginning of the wrapped array
iterator<Tp> begin() { return iterator<Tp>{m_base}; }
iterator<const Tp> begin() const { return iterator<const Tp>{m_base}; }
// Get an iterator positioned at the end of the wrapped array
iterator<Tp> end() { return iterator<Tp>{&m_base[m_size]}; }
iterator<const Tp> end() const { return iterator<const Tp>{&m_base[m_size]}; }
private:
Tp* m_base = nullptr;
size_t m_size = 0;
};
// Function for automatic template argument deduction
template <typename Tp>
c_array<Tp>
make_c_array(Tp* base, size_t size)
{
return c_array<Tp>(base, size);
}
} // namespace container
} // namespace sdk
} // namespace rocprofiler
+102 -6
Zobrazit soubor
@@ -24,11 +24,15 @@
#pragma once
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/counters.h>
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/internal_threading.h>
#include "rocprofiler-sdk/cxx/details/mpl.hpp"
#include <string_view>
#include <tuple>
#define ROCPROFILER_CXX_DECLARE_OPERATORS(TYPE) \
@@ -88,6 +92,37 @@ less(Tp lhs, Tp rhs)
static_assert(sizeof(Tp) == sizeof(uint64_t), "error! only for opaque handle types");
return lhs.handle < rhs.handle;
}
namespace detail
{
template <typename Tp>
struct sv_trait
{
static constexpr auto is_string_type() noexcept
{
return mpl::is_string_type<mpl::unqualified_identity_t<Tp>>::value;
}
using type = std::conditional_t<is_string_type(), std::string_view, Tp&>;
constexpr type operator()(Tp& v) noexcept
{
if constexpr(is_string_type())
return std::string_view{v};
else
return v;
}
};
} // namespace detail
// tie_sv(): deduce each Tp from your lvalues, then build tuple<...> out of
// either Tp& or std::string_view, calling operator() on each.
template <typename... Tp>
constexpr auto
tie_sv(Tp&... vs) noexcept
{
return std::make_tuple(detail::sv_trait<Tp>{}(vs)...);
}
} // namespace operators
} // namespace sdk
} // namespace rocprofiler
@@ -110,6 +145,8 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_dim3_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(hsa_region_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(hsa_amd_memory_pool_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_info_t&)
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_instance_info_t&)
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_dimension_info_t&)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_version_triplet_t)
// definitions of operator==
@@ -141,11 +178,43 @@ operator==(rocprofiler_dim3_t lhs, rocprofiler_dim3_t rhs)
}
inline bool
operator==(rocprofiler_counter_record_dimension_info_t lhs,
rocprofiler_counter_record_dimension_info_t rhs)
operator==(const rocprofiler_counter_record_dimension_info_t& lhs,
const rocprofiler_counter_record_dimension_info_t& rhs)
{
return std::tie(lhs.id, lhs.instance_size, lhs.name) ==
std::tie(rhs.id, rhs.instance_size, rhs.name);
namespace op = ::rocprofiler::sdk::operators;
return op::tie_sv(lhs.id, lhs.instance_size, lhs.name) ==
op::tie_sv(rhs.id, rhs.instance_size, rhs.name);
}
inline bool
operator==(const rocprofiler_counter_dimension_info_t& lhs,
const rocprofiler_counter_dimension_info_t& rhs)
{
namespace op = ::rocprofiler::sdk::operators;
return op::tie_sv(lhs.dimension_name, lhs.index, lhs.size) ==
op::tie_sv(rhs.dimension_name, rhs.index, rhs.size);
}
inline bool
operator==(const rocprofiler_counter_record_dimension_instance_info_t& lhs,
const rocprofiler_counter_record_dimension_instance_info_t& rhs)
{
if(std::tie(lhs.instance_id, lhs.counter_id, lhs.dimensions_count) !=
std::tie(rhs.instance_id, rhs.counter_id, rhs.dimensions_count))
{
return false;
}
for(uint64_t i = 0; i < lhs.dimensions_count; ++i)
{
if(lhs.dimensions[i] == nullptr && rhs.dimensions[i] == nullptr) continue;
if(!lhs.dimensions[i] || !rhs.dimensions[i] || *lhs.dimensions[i] != *rhs.dimensions[i])
{
return false;
}
}
return true;
}
inline bool
@@ -193,8 +262,35 @@ inline bool
operator<(const rocprofiler_counter_record_dimension_info_t& lhs,
const rocprofiler_counter_record_dimension_info_t& rhs)
{
return std::tie(lhs.id, lhs.instance_size, lhs.name) <
std::tie(rhs.id, rhs.instance_size, rhs.name);
namespace op = ::rocprofiler::sdk::operators;
return op::tie_sv(lhs.id, lhs.instance_size, lhs.name) <
op::tie_sv(rhs.id, rhs.instance_size, rhs.name);
}
inline bool
operator<(const rocprofiler_counter_dimension_info_t& lhs,
const rocprofiler_counter_dimension_info_t& rhs)
{
namespace op = ::rocprofiler::sdk::operators;
return op::tie_sv(lhs.dimension_name, lhs.index, lhs.size) <
op::tie_sv(rhs.dimension_name, rhs.index, rhs.size);
}
inline bool
operator<(const rocprofiler_counter_record_dimension_instance_info_t& lhs,
const rocprofiler_counter_record_dimension_instance_info_t& rhs)
{
if(lhs.instance_id != rhs.instance_id) return lhs.instance_id < rhs.instance_id;
if(lhs.counter_id != rhs.counter_id) return lhs.counter_id < rhs.counter_id;
if(lhs.dimensions_count != rhs.dimensions_count)
return lhs.dimensions_count < rhs.dimensions_count;
for(uint64_t i = 0; i < lhs.dimensions_count; ++i)
{
if(!lhs.dimensions[i] || !rhs.dimensions[i]) return *lhs.dimensions[i] < *rhs.dimensions[i];
}
return false;
}
inline bool
@@ -40,6 +40,7 @@
#include <rocprofiler-sdk/internal_threading.h>
#include <rocprofiler-sdk/marker.h>
#include <rocprofiler-sdk/pc_sampling.h>
#include <rocprofiler-sdk/cxx/container/c_array.hpp>
#include <rocprofiler-sdk/cxx/name_info.hpp>
#include <rocprofiler-sdk/cxx/utility.hpp>
@@ -1236,16 +1237,33 @@ save(ArchiveT& ar, rocprofiler_counter_info_v1_t data)
ROCP_SDK_SAVE_DATA_CSTR(block);
ROCP_SDK_SAVE_DATA_CSTR(expression);
auto convert = [](const auto* val, uint64_t sz) {
using data_type = std::remove_cv_t<std::remove_pointer_t<decltype(val)>>;
auto retval = std::vector<data_type>{};
for(uint64_t i = 0; i < sz; ++i)
retval.emplace_back(val[i]);
return retval;
};
ROCP_SDK_SAVE_VALUE(
"dimensions",
rocprofiler::sdk::container::make_c_array(data.dimensions, data.dimensions_count));
ROCP_SDK_SAVE_VALUE("dims", convert(data.dimensions, data.dimensions_count));
ROCP_SDK_SAVE_VALUE("instances", convert(data.instance_ids, data.instance_ids_count));
ROCP_SDK_SAVE_VALUE("instances",
rocprofiler::sdk::container::make_c_array(data.dimensions_instances,
data.dimensions_instances_count));
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_counter_dimension_info_t data)
{
ROCP_SDK_SAVE_DATA_CSTR(dimension_name);
ROCP_SDK_SAVE_DATA_FIELD(index);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_counter_record_dimension_instance_info_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(instance_id);
ROCP_SDK_SAVE_DATA_FIELD(counter_id);
ROCP_SDK_SAVE_VALUE(
"dimensions",
rocprofiler::sdk::container::make_c_array(data.dimensions, data.dimensions_count));
}
template <typename ArchiveT>
@@ -1303,6 +1321,28 @@ save(ArchiveT& ar, const rocprofiler::sdk::utility::name_info_impl<EnumT, ValueT
ar(cereal::make_nvp("operations", _ops));
}
template <typename ArchiveT, typename Tp>
void
save(ArchiveT& ar, const rocprofiler::sdk::container::c_array<Tp>& data)
{
ar(make_size_tag(data.size()));
for(auto itr : data)
ar(itr);
}
template <typename ArchiveT, typename Tp>
void
save(ArchiveT& ar, const rocprofiler::sdk::container::c_array<Tp*>& data)
{
size_type _sz = 0;
for(auto* itr : data)
if(itr) ++_sz;
ar(make_size_tag(_sz));
for(auto* itr : data)
ar(*itr);
}
ROCPROFILER_SDK_CEREAL_NAMESPACE_END
#undef ROCP_SDK_SAVE_DATA_FIELD
+165 -35
Zobrazit soubor
@@ -46,8 +46,10 @@
#include <dlfcn.h>
#include <unistd.h>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
namespace rocprofiler
@@ -71,6 +73,81 @@ query_pc_sampling_configuration(const rocprofiler_pc_sampling_configuration_t* c
}
return ROCPROFILER_STATUS_SUCCESS;
}
/**
* @brief Processes and stores the supported counters for a given agent.
*
* This function iterates over all counters supported by the given agent.
* If a filter set is provided, only counters present in the filter set are processed.
* Otherwise, all counters are collected.
*
* @param agent_id The ID of the agent whose counters are being processed.
* @param filter_set Optional set of counter names to filter which counters to load. If null,
* all supported counters will be loaded.
* @param output_map A reference to the map where the processed counter information will be stored.
*
* @return ROCPROFILER_STATUS_SUCCESS on success, or an appropriate error status on failure.
*/
rocprofiler_status_t
process_agent_counters(rocprofiler_agent_id_t agent_id,
std::set<std::string>* filter_set,
agent_counter_info_map_t& output_map)
{
struct callback_data_t
{
std::set<std::string>* counters_set = nullptr;
agent_counter_info_map_t* agent_counter_info = nullptr;
};
auto cb_data = callback_data_t{filter_set, &output_map};
return rocprofiler_iterate_agent_supported_counters(
agent_id,
[](rocprofiler_agent_id_t id,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
auto* data = static_cast<callback_data_t*>(user_data);
auto* counters_set_data = data->counters_set;
auto* agent_counter_info_data = data->agent_counter_info;
agent_counter_info_data->emplace(id, counter_info_vec_t{});
for(size_t i = 0; i < num_counters; ++i)
{
auto _info = rocprofiler_counter_info_v1_t{};
auto _dim_ids = std::vector<rocprofiler_counter_dimension_id_t>{};
auto _dim_info = std::vector<rocprofiler_counter_record_dimension_info_t>{};
ROCPROFILER_CHECK(rocprofiler_query_counter_info(
counters[i], ROCPROFILER_COUNTER_INFO_VERSION_1, &_info));
if(counters_set_data != nullptr && counters_set_data->count(_info.name) == 0)
continue;
for(uint64_t j = 0; j < _info.dimensions_count; ++j)
{
if(_info.dimensions[j] == nullptr)
{
ROCP_WARNING << fmt::format(
"nullptr dimension encountered for counter '{}' at index {}",
_info.name,
j);
continue;
}
_dim_ids.emplace_back(_info.dimensions[j]->id);
_dim_info.emplace_back(*_info.dimensions[j]);
}
agent_counter_info_data->at(id).emplace_back(
id, _info, std::move(_dim_ids), std::move(_dim_info));
}
return ROCPROFILER_STATUS_SUCCESS;
},
&cb_data);
}
} // namespace
kernel_symbol_info::kernel_symbol_info()
@@ -155,52 +232,105 @@ metadata::metadata(inprocess)
add_kernel_symbol(std::move(info));
}
/**
* @brief Initializes the metadata by loading all counters supported on GPU agents.
*
* This method is used by the `rocprofv3-avail` tool to enumerate all available counters
* for each GPU agent in the system.
*
* For each non-CPU agent, this function calls `process_agent_counters` with a null filter,
* ensuring that all counters supported by that agent are queried and stored in metadata.
*/
void metadata::init(inprocess)
{
if(inprocess_init) return;
inprocess_init = true;
for(auto itr : agents)
for(const auto& agent : agents)
{
if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) continue;
if(agent.type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto status = rocprofiler_iterate_agent_supported_counters(
itr.id,
[](rocprofiler_agent_id_t id,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* user_data) {
auto* data_v = static_cast<agent_counter_info_map_t*>(user_data);
data_v->emplace(id, counter_info_vec_t{});
auto status = process_agent_counters(agent.id, nullptr, agent_counter_info);
ROCP_WARNING_IF(status != ROCPROFILER_STATUS_SUCCESS) << fmt::format(
"rocprofiler_iterate_agent_supported_counters failed for agent {} ({}) :: {}",
agent.node_id,
agent.name,
rocprofiler_get_status_string(status));
}
}
for(size_t i = 0; i < num_counters; ++i)
{
auto _info = rocprofiler_counter_info_v1_t{};
auto _dim_ids = std::vector<rocprofiler_counter_dimension_id_t>{};
auto _dim_info = std::vector<rocprofiler_counter_record_dimension_info_t>{};
/**
* @brief Initializes the metadata by loading only selected counters for GPU agents.
*
* This method is used by the `rocprofv3` tool when profiling with a list
* of performance counters which are profiled by user.
*
* This filtering reduces runtime overhead and significantly shrinks the size of
* generated JSON metadata in profiling output.
*/
void
metadata::init(inprocess_with_counters&& data)
{
if(inprocess_init) return;
inprocess_init = true;
ROCPROFILER_CHECK(rocprofiler_query_counter_info(
counters[i],
ROCPROFILER_COUNTER_INFO_VERSION_1,
&static_cast<rocprofiler_counter_info_v1_t&>(_info)));
// No counters to process, exit early. Kernel trace doesn't have to iterate counters.
if(data.counters.empty()) return;
for(uint64_t j = 0; j < _info.dimensions_count; ++j)
{
_dim_ids.emplace_back(_info.dimensions[j].id);
_dim_info.emplace_back(_info.dimensions[j]);
}
data_v->at(id).emplace_back(
id, _info, std::move(_dim_ids), std::move(_dim_info));
}
return ROCPROFILER_STATUS_SUCCESS;
},
&agent_counter_info);
auto gpu_index_to_counters_map = std::map<int, std::set<std::string>>{};
for(const auto& agent : agents)
{
gpu_index_to_counters_map[agent.gpu_index] = {};
}
// Used to parse counters like "SQ_WAVES:device=0".
constexpr auto device_qualifier = std::string_view{":device="};
for(const auto& pmc_counter : data.counters)
{
auto name_v = pmc_counter;
if(auto pos = pmc_counter.find(device_qualifier); pos != std::string::npos)
{
name_v = pmc_counter.substr(0, pos);
auto device_id_s = pmc_counter.substr(pos + device_qualifier.length());
ROCP_FATAL_IF(device_id_s.empty() ||
device_id_s.find_first_not_of("0123456789") != std::string::npos)
<< fmt::format("Invalid device qualifier format (expected ':device=N') in '{}'",
pmc_counter);
auto device_id_l = std::stol(device_id_s);
if(gpu_index_to_counters_map.find(device_id_l) == gpu_index_to_counters_map.end())
{
ROCP_WARNING << fmt::format("Device ID not found for PMC counter '{}'",
pmc_counter);
continue;
}
// Add the counter to the corresponding device.
gpu_index_to_counters_map[device_id_l].emplace(name_v);
}
else
{
// No device qualifier — add to all devices.
for(auto& [_, counters] : gpu_index_to_counters_map)
counters.emplace(name_v);
}
}
// Process selected counters for each GPU agent.
for(const auto& agent : agents)
{
if(agent.type == ROCPROFILER_AGENT_TYPE_CPU) continue;
auto* filter = &gpu_index_to_counters_map[agent.gpu_index];
auto status = process_agent_counters(agent.id, filter, agent_counter_info);
ROCP_WARNING_IF(status != ROCPROFILER_STATUS_SUCCESS) << fmt::format(
"rocprofiler_iterate_agent_supported_counters returned {} for agent {} ({}) :: {}",
rocprofiler_get_status_name(status),
itr.node_id,
itr.name,
"rocprofiler_iterate_agent_supported_counters failed for agent {} ({}) :: {}",
agent.node_id,
agent.name,
rocprofiler_get_status_string(status));
}
}
+9
Zobrazit soubor
@@ -139,6 +139,11 @@ struct metadata
struct inprocess
{};
// Tag type for initializing with specific counters for rocprofv3 tool.
struct inprocess_with_counters
{
std::set<std::string> counters = {};
};
pid_t process_id = 0;
pid_t parent_process_id = 0;
@@ -177,8 +182,12 @@ struct metadata
metadata& operator=(const metadata&) = delete;
metadata& operator=(metadata&&) noexcept = delete;
// Loads all counters supported on agents. Used by the 'rocprofv3-avail' tool.
void init(inprocess);
// Loads only selected counters into metadata to reduce JSON size. Used by the 'rocprofv3' tool.
void init(inprocess_with_counters&&);
const agent_info* get_agent(rocprofiler_agent_id_t _val) const;
const code_object_info* get_code_object(uint64_t code_obj_id) const;
const kernel_symbol_info* get_kernel_symbol(uint64_t kernel_id) const;
+27 -3
Zobrazit soubor
@@ -1128,6 +1128,7 @@ construct_counter_collection_profile(rocprofiler_agent_id_t agent_id,
auto profile = std::optional<rocprofiler_counter_config_id_t>{};
auto counters_v = counter_vec_t{};
auto found_v = std::vector<std::string_view>{};
auto not_found_counters_v = std::vector<std::string_view>{};
const auto* agent_v = tool_metadata->get_agent(agent_id);
auto expected_v = counters.size();
@@ -1158,24 +1159,31 @@ construct_counter_collection_profile(rocprofiler_agent_id_t agent_id,
}
// search the gpu agent counter info for a counter with a matching name
bool counter_found = false;
for(const auto& citr : gpu_agents_counter_info.at(agent_id))
{
if(name_v == std::string_view{citr.name})
{
counters_v.emplace_back(citr.id);
found_v.emplace_back(itr);
counter_found = true;
}
}
if(!counter_found) not_found_counters_v.emplace_back(itr);
}
if(expected_v != counters_v.size())
{
auto requested_counters =
fmt::format("{}", fmt::join(counters.begin(), counters.end(), ", "));
auto found_counters = fmt::format("{}", fmt::join(found_v.begin(), found_v.end(), ", "));
auto found_counters = fmt::format("{}", fmt::join(found_v.begin(), found_v.end(), ", "));
auto missing_counters = fmt::format(
"{}", fmt::join(not_found_counters_v.begin(), not_found_counters_v.end(), ", "));
ROCP_WARNING << "Unable to find all counters for agent " << agent_v->node_id << " (gpu-"
<< agent_v->gpu_index << ", " << agent_v->name << ") in ["
<< requested_counters << "]. Found: [" << found_counters << "]";
<< requested_counters << "]. Found: [" << found_counters << "]. Missing: ["
<< missing_counters << "]";
}
if(!counters_v.empty())
@@ -1246,6 +1254,22 @@ get_instruction_index(rocprofiler_pc_t pc)
return CHECK_NOTNULL(tool_metadata)->get_instruction_index(pc);
}
std::set<std::string>
get_config_perf_counters()
{
auto tool_pmc_counters = std::set<std::string>{};
for(const auto& counters_group : tool::config().counters)
{
for(const auto& counter : counters_group)
tool_pmc_counters.emplace(counter);
}
for(const auto& att_counter : tool::config().att_param_perfcounters)
{
tool_pmc_counters.emplace(att_counter.counter_name);
}
return tool_pmc_counters;
}
} // namespace
std::vector<rocprofiler_thread_trace_parameter_t>
@@ -1679,7 +1703,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
const uint64_t buffer_size = 16 * common::units::get_page_size();
const uint64_t buffer_watermark = 15 * common::units::get_page_size();
tool_metadata->init(tool::metadata::inprocess{});
tool_metadata->init(tool::metadata::inprocess_with_counters{get_config_perf_counters()});
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed");
+65 -19
Zobrazit soubor
@@ -25,6 +25,7 @@
#include "lib/common/static_object.hpp"
#include "lib/common/string_entry.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/counters/dimensions.hpp"
#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp"
@@ -53,25 +54,37 @@ get_static_string(std::string_view str)
return common::get_string_entry(str)->c_str();
}
template <class T>
const std::vector<T>*
get_static_ptr(const std::vector<T>& vec)
template <typename Tp>
const Tp**
get_static_ptr_array(const std::vector<Tp>& vec)
{
// The use of std::map is purposeful. Keys can be vectors in map and cannot be in unordered_map.
// Simplifying the code to create these static objects. Given that they are not created often (
// or looked up often), the performance difference between map and unordered_map is negligible.
using static_ptr_map = std::map<std::vector<T>, std::unique_ptr<std::vector<T>>>;
using static_ptr_map = std::map<std::vector<Tp>, std::vector<const Tp*>>;
static auto*& static_ptrs =
common::static_object<common::Synchronized<static_ptr_map>>::construct();
return static_ptrs->wlock([&](auto& data) {
return static_ptrs->wlock([&](auto& data) -> const Tp** {
if(auto it = data.find(vec); it != data.end())
{
return it->second.get();
return it->second.data();
}
data[vec] = std::make_unique<std::vector<T>>(vec);
return data[vec].get();
auto [inserted_it, success] = data.emplace(vec, std::vector<const Tp*>{});
const std::vector<Tp>& stored_vec = inserted_it->first;
auto& ptr_vec = inserted_it->second;
ptr_vec.reserve(stored_vec.size());
for(const auto& item : stored_vec)
{
ptr_vec.push_back(&item);
}
return ptr_vec.data();
});
}
} // namespace
} // namespace counters
} // namespace rocprofiler
@@ -139,7 +152,7 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id,
return true;
}
out_struct.dimensions = counters::get_static_ptr(_dim_info)->data();
out_struct.dimensions = counters::get_static_ptr_array(_dim_info);
out_struct.dimensions_count = _dim_info.size();
return true;
};
@@ -152,20 +165,33 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id,
const auto* dims = common::get_val(dim_ptr->id_to_dim, counter_id.handle);
if(!dims) return false;
std::vector<rocprofiler_counter_instance_id_t> instances;
auto instances = std::vector<rocprofiler_counter_record_dimension_instance_info_t>{};
auto _dim_info = std::vector<rocprofiler_counter_record_dimension_info_t>{};
constexpr auto rocprofiler_counter_record_dimension_instance_v1_info_t_rt_size =
common::compute_runtime_sizeof<rocprofiler_counter_record_dimension_instance_info_t>();
constexpr auto rocprofiler_counter_dimension_info_v1_t_rt_size =
common::compute_runtime_sizeof<rocprofiler_counter_dimension_info_t>();
for(const auto& metric_dim : *dims)
{
if(metric_dim.size() == 0) continue;
std::vector<rocprofiler_counter_instance_id_t> tmp;
_dim_info.emplace_back(rocprofiler_counter_record_dimension_info_t{
.name = counters::get_static_string(metric_dim.name()),
.instance_size = metric_dim.size(),
.id = static_cast<rocprofiler_counter_dimension_id_t>(metric_dim.type())});
auto tmp = std::vector<rocprofiler_counter_record_dimension_instance_info_t>{};
// If no instances are found, create the first set of instances
if(instances.empty())
{
for(size_t i = 0; i < metric_dim.size(); i++)
{
auto& rec = instances.emplace_back();
counters::set_dim_in_rec(rec, metric_dim.type(), i);
counters::set_counter_in_rec(rec, counter_id);
counters::set_dim_in_rec(rec.instance_id, metric_dim.type(), i);
counters::set_counter_in_rec(rec.instance_id, counter_id);
}
}
else
@@ -177,8 +203,8 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id,
for(const auto& instance : instances)
{
auto& rec = tmp.emplace_back(instance);
counters::set_dim_in_rec(rec, metric_dim.type(), i);
counters::set_counter_in_rec(rec, counter_id);
counters::set_dim_in_rec(rec.instance_id, metric_dim.type(), i);
counters::set_counter_in_rec(rec.instance_id, counter_id);
}
}
instances = tmp;
@@ -186,13 +212,33 @@ rocprofiler_query_counter_info(rocprofiler_counter_id_t counter_id,
}
if(instances.empty())
{
out_struct.instance_ids = nullptr;
out_struct.instance_ids_count = 0;
out_struct.dimensions_instances = nullptr;
out_struct.dimensions_instances_count = 0;
return true;
}
out_struct.instance_ids = counters::get_static_ptr(instances)->data();
out_struct.instance_ids_count = instances.size();
for(auto& instance : instances)
{
auto dimensions = std::vector<rocprofiler_counter_dimension_info_t>{};
auto instance_id = instance.instance_id;
for(const auto& dimension : _dim_info)
{
auto& curr = dimensions.emplace_back(rocprofiler_counter_dimension_info_t{});
curr.index = counters::rec_to_dim_pos(
instance_id,
static_cast<counters::rocprofiler_profile_counter_instance_types>(
dimension.id));
curr.dimension_name = dimension.name;
curr.size = rocprofiler_counter_dimension_info_v1_t_rt_size;
}
instance.dimensions = counters::get_static_ptr_array(dimensions);
instance.dimensions_count = std::size(_dim_info);
instance.counter_id = counters::rec_to_counter_id(instance_id).handle;
instance.size = rocprofiler_counter_record_dimension_instance_v1_info_t_rt_size;
}
out_struct.dimensions_instances = counters::get_static_ptr_array(instances);
out_struct.dimensions_instances_count = instances.size();
out_struct.size = sizeof(rocprofiler_counter_info_v1_t);
return true;
};
@@ -241,9 +241,9 @@ TEST(metrics, check_public_api_query)
for(size_t i = 0; i < info.dimensions_count; i++)
{
const auto& dim = dims->at(i);
EXPECT_EQ(dim.size(), info.dimensions[i].instance_size);
EXPECT_EQ(dim.type(), info.dimensions[i].id);
EXPECT_EQ(std::string(info.dimensions[i].name), dim.name());
EXPECT_EQ(dim.size(), info.dimensions[i]->instance_size);
EXPECT_EQ(dim.type(), info.dimensions[i]->id);
EXPECT_EQ(std::string(info.dimensions[i]->name), dim.name());
}
size_t instance_count = 0;
@@ -256,18 +256,20 @@ TEST(metrics, check_public_api_query)
instance_count = metric_dim.size() * instance_count;
}
EXPECT_EQ(info.instance_ids_count, instance_count);
EXPECT_EQ(info.dimensions_instances_count, instance_count);
std::set<std::vector<size_t>> dim_permutations;
for(size_t i = 0; i < info.instance_ids_count; i++)
for(size_t i = 0; i < info.dimensions_instances_count; i++)
{
std::vector<size_t> dim_ids;
ASSERT_EQ(rocprofiler::counters::rec_to_counter_id(info.instance_ids[i]).handle,
metric.id());
ASSERT_EQ(
rocprofiler::counters::rec_to_counter_id(info.dimensions_instances[i]->instance_id)
.handle,
metric.id());
for(const auto& metric_dim : *dims)
{
dim_ids.push_back(
rocprofiler::counters::rec_to_dim_pos(info.instance_ids[i], metric_dim.type()));
dim_ids.push_back(rocprofiler::counters::rec_to_dim_pos(
info.dimensions_instances[i]->instance_id, metric_dim.type()));
}
// Ensure that the premutation is unique
ASSERT_EQ(dim_permutations.insert(dim_ids).second, true);
@@ -262,7 +262,7 @@ pc_sample_config(uint64_t agent_handle,
}
bool
is_counter_set(uint64_t* counter_handles, uint64_t agent_handle, size_t num_counters)
is_counter_set(const uint64_t* counter_handles, uint64_t agent_handle, size_t num_counters)
{
rocprofiler_profile_config_id_t cfg_id = {.handle = 0};
for(size_t itr = 0; itr < num_counters; itr++)
@@ -74,9 +74,9 @@ pc_sample_config(uint64_t agent_handle,
uint64_t* max_interval,
uint64_t* flags) ROCPROFILER_EXPORT;
bool
is_counter_set(uint64_t* counter_handles,
uint64_t agent_handle,
size_t num_counters) ROCPROFILER_EXPORT;
is_counter_set(const uint64_t* counter_handles,
uint64_t agent_handle,
size_t num_counters) ROCPROFILER_EXPORT;
void
agent_info(uint64_t agent_handle, const char** agent_info_str) ROCPROFILER_EXPORT;
+2 -2
Zobrazit soubor
@@ -5,8 +5,8 @@ project(rocprofiler-sdk-tests-common LANGUAGES C CXX)
include(GoogleTest)
set(common_sources demangling.cpp environment.cpp md5sum.cpp mpl.cpp parse.cpp sha256.cpp
uuid_v7.cpp)
set(common_sources c_array.cpp demangling.cpp environment.cpp md5sum.cpp mpl.cpp
parse.cpp sha256.cpp uuid_v7.cpp)
add_executable(common-tests)
target_sources(common-tests PRIVATE ${common_sources})
+138
Zobrazit soubor
@@ -0,0 +1,138 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler-sdk/cxx/container/c_array.hpp>
#include <rocprofiler-sdk/cxx/serialization.hpp>
#include <gtest/gtest.h>
#include <sstream>
#include <vector>
namespace
{
template <typename Tp>
void
compare_serialization_of_vector_and_c_array(std::vector<Tp>& vec)
{
std::stringstream ss_vec;
{
cereal::BinaryOutputArchive oarchive(ss_vec);
save(oarchive, vec);
}
std::stringstream ss_arr;
{
cereal::BinaryOutputArchive oarchive(ss_arr);
auto arr = rocprofiler::sdk::container::make_c_array(vec.data(), vec.size());
save(oarchive, arr);
}
ASSERT_EQ(ss_vec.str(), ss_arr.str());
}
template <typename Tp>
void
compare_serialization_of_vector_and_c_array_of_pointers(std::vector<Tp>& vec)
{
std::vector<Tp*> ptrs;
ptrs.reserve(vec.size());
for(auto& val : vec)
ptrs.emplace_back(&val);
std::stringstream ss_vec;
{
cereal::BinaryOutputArchive oarchive(ss_vec);
save(oarchive, vec);
}
std::stringstream ss_arr;
{
cereal::BinaryOutputArchive oarchive(ss_arr);
auto arr = rocprofiler::sdk::container::make_c_array(ptrs.data(), ptrs.size());
save(oarchive, arr);
}
ASSERT_EQ(ss_vec.str(), ss_arr.str());
}
} // namespace
TEST(common, c_array)
{
std::vector<int> vec{1, 2, 3, 4, 5};
auto arr = rocprofiler::sdk::container::make_c_array(vec.data(), vec.size());
// Validate size
ASSERT_EQ(arr.size(), vec.size());
// Test operator[] and at()
for(size_t i = 0; i < arr.size(); ++i)
{
EXPECT_EQ(arr[i], vec[i]);
EXPECT_EQ(arr.at(i), vec.at(i));
}
// Bounds checking on at()
EXPECT_THROW(arr.at(arr.size()), std::out_of_range);
// Test slice
auto sub_arr = arr.slice(1, 4); // should contain 2, 3, 4
ASSERT_EQ(sub_arr.size(), 3);
EXPECT_EQ(sub_arr[0], 2);
EXPECT_EQ(sub_arr[2], 4);
// Test pop_front()
arr.pop_front(); // drops 1
ASSERT_EQ(arr.size(), 4);
EXPECT_EQ(arr[0], 2);
// Test pop_back()
arr.pop_back(); // drops 5
ASSERT_EQ(arr.size(), 3);
EXPECT_EQ(arr[2], 4);
// Test iterators
int expected[] = {2, 3, 4};
int idx = 0;
for(auto v : arr)
{
EXPECT_EQ(v, expected[idx++]);
}
// Const iterator
const auto& const_arr = arr;
idx = 0;
for(auto v : const_arr)
{
EXPECT_EQ(v, expected[idx++]);
}
// Test implicit cast to pointer
int* ptr = arr;
EXPECT_EQ(ptr[0], 2);
EXPECT_EQ(ptr[2], 4);
// Test Serialization.
compare_serialization_of_vector_and_c_array(vec);
compare_serialization_of_vector_and_c_array_of_pointers(vec);
}