[Rocprofiler-systems] : Refactor papi enumeration to fix a hang on Intel systems (#1672)

* Refactor papi enumeration to fix a hang on Intel systems

- Add an exclude argument to available_events_info() for
  perf_event_uncore causing hang like case on Intel systems with large
number of uncore events.
- Enumerate papi available events only when papi events are specified by
  users inside early initialization logic
- Move papi available event query for ROCPROFSYS_SAMPLING_OVERFLOW_EVENT
  config setting to the avail component, to move the heavy logic outside
initialization.
- Make category option for rocprof-sys-avail -H -c case insensitive
- Provide new option to query available overflow events that can be
  specified for ROCPROFSYS_SAMPLING_OVERFLOW_EVENT using new command
option rocprof-sys-avail -H -c overflow

* Update projects/rocprofiler-systems/source/bin/rocprof-sys-avail/common.cpp

Co-authored-by: Milan Radosavljevic <milan.radosavljevic@amd.com>

* Update timemory submodule pointer

Signed-off-by: David Galiffi <David.Galiffi@amd.com>

* Fix errors on compile

* Change 1: Optimization for the category matching lambda

Optmization changes.

* Modify the rocprof-sys-avail -c option for overflow

Overflow should not be displayed as a device in rocprof-sys-avail -H -c CPU

Users can instead do regex on summary where overflow is appended in description

User can do rocprof-sys-avail -H -c CPU -d -r overflow

* Revert change to column width

---------

Signed-off-by: David Galiffi <David.Galiffi@amd.com>
Co-authored-by: Milan Radosavljevic <milan.radosavljevic@amd.com>
Co-authored-by: David Galiffi <David.Galiffi@amd.com>
This commit is contained in:
Sajina PK
2025-11-21 00:19:58 -05:00
gecommit door GitHub
bovenliggende 4f4352acd0
commit d77b245730
6 gewijzigde bestanden met toevoegingen van 106 en 69 verwijderingen
@@ -1075,19 +1075,33 @@ write_hw_counter_info(std::ostream& os, const array_t<bool, N>& options,
using width_bool = array_t<bool, N>;
using hwcounter_info_t = std::vector<tim::hardware_counters::info>;
auto _papi_events = tim::papi::available_events_info();
auto _papi_events = tim::papi::available_events_info({ "perf_event_uncore" });
auto _rocm_events =
(gpu_count > 0) ? rocprofsys::rocm::rocm_events() : hwcounter_info_t{};
if(alphabetical)
// Tag overflow events by modifying both short and long descriptions upfront
{
auto _sorter = [](const auto& lhs, const auto& rhs) {
return (lhs.symbol() < rhs.symbol());
};
std::sort(_papi_events.begin(), _papi_events.end(), _sorter);
std::sort(_rocm_events.begin(), _rocm_events.end(), _sorter);
namespace regex_const = ::std::regex_constants;
auto _regex =
std::regex{ "^(perf::|)PERF_COUNT_(HW|SW|HW_CACHE)_([A-Z_]+)(|:[A-Z]+)$",
regex_const::optimize };
for(auto& itr : _papi_events)
{
if(std::regex_match(itr.symbol(), _regex))
{
itr.short_description() += " (overflow event)";
itr.long_description() += " (overflow event)";
}
}
}
// sort the events alphabetically
auto _sorter = [](const auto& lhs, const auto& rhs) {
return (lhs.symbol() < rhs.symbol());
};
std::sort(_papi_events.begin(), _papi_events.end(), _sorter);
std::sort(_rocm_events.begin(), _rocm_events.end(), _sorter);
auto _process_counters = [](auto& _events_v, int32_t _offset_v) {
for(auto& iitr : _events_v)
iitr.offset() += _offset_v;
@@ -26,8 +26,12 @@
#include <timemory/settings/settings.hpp>
#include <timemory/variadic/macros.hpp>
#include <algorithm>
#include <array>
#include <string>
#include <string_view>
#include <sys/stat.h>
#include <unordered_map>
using settings = ::tim::settings;
@@ -307,29 +311,73 @@ process_categories(parser_t& p, const str_set_t& _category_options)
{
category_view = p.get<str_set_t>("categories");
std::vector<std::function<void()>> _shorthand_patches{};
// Helper to do case-insensitive string comparison
auto _tolower = [](std::string_view in) {
std::string out(in);
std::transform(out.begin(), out.end(), out.begin(),
[](unsigned char c) { return std::tolower(c); });
return out;
};
// Cache lowercase -> original category mapping to avoid repeated string conversions
// Also pre-compute shorthand mappings (e.g., "wallclock" -> "component::WallClock")
std::unordered_map<std::string, std::string> _category_map;
constexpr std::array<std::string_view, 3> _prefixes = { "component::", "settings::",
"hw_counters::" };
for(const auto& opt : _category_options)
{
auto opt_lower = _tolower(opt);
_category_map[opt_lower] = opt;
// Add shorthand mappings if the option starts with a known prefix
for(auto prefix : _prefixes)
{
if(opt_lower.size() > prefix.size() &&
opt_lower.compare(0, prefix.size(), _tolower(prefix)) == 0)
{
// Map the shorthand (without prefix) to the full canonical form
auto shorthand = opt_lower.substr(prefix.size());
_category_map[shorthand] = opt;
break;
}
}
}
// Helper to find case-insensitive match in category options
auto _find_category = [&_category_map,
&_tolower](std::string_view input) -> std::string_view {
auto input_lower = _tolower(input);
auto it = _category_map.find(input_lower);
if(it != _category_map.end()) return it->second;
return "";
};
// Process categories - now handles both full names and shorthands via the pre-built
// map
for(const auto& itr : category_view)
{
auto _is_shorthand = [&_shorthand_patches, &_category_options,
itr](const std::string& _prefix) {
auto _opt = TIMEMORY_JOIN("::", _prefix, itr);
if(_category_options.count(_opt) > 0)
{
_shorthand_patches.emplace_back([itr, _opt]() {
category_view.erase(itr);
category_view.emplace(_opt);
});
return true;
}
return false;
};
if(_category_options.count(itr) == 0)
auto _matched = _find_category(itr);
if(!_matched.empty())
{
if(!_is_shorthand("component") && !_is_shorthand("settings") &&
!_is_shorthand("hw_counters"))
throw std::runtime_error(
itr + " is not a valid category. Use --list-categories to view "
"valid categories");
// Only create patch if the matched form differs from input (normalization
// needed)
if(_matched != itr)
{
// Explicitly convert string_view to string for safe capture
std::string _matched_str(_matched);
_shorthand_patches.emplace_back([itr, _matched_str]() {
category_view.erase(itr);
category_view.emplace(_matched_str);
});
}
}
else
{
throw std::runtime_error(
itr + " is not a valid category. Use --list-categories to view "
"valid categories");
}
}
for(auto&& itr : _shorthand_patches)
@@ -623,11 +623,12 @@ configure_settings(bool _init)
"the same signal (SIGRTMIN + 1)",
SIGRTMIN + 1, "sampling", "advanced");
ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_SAMPLING_OVERFLOW_EVENT",
"Metric for overflow sampling",
std::string{ "perf::PERF_COUNT_HW_CACHE_REFERENCES" },
"sampling", "hardware_counters")
->set_choices(perf::get_config_choices());
ROCPROFSYS_CONFIG_SETTING(
std::string, "ROCPROFSYS_SAMPLING_OVERFLOW_EVENT",
"Metric for overflow sampling. Defaults to perf::PERF_COUNT_HW_CACHE_REFERENCES. "
"For full list of events see: rocprof-sys-avail -H -c CPU -r overflow",
std::string{ "perf::PERF_COUNT_HW_CACHE_REFERENCES" }, "sampling",
"hardware_counters");
rocprofiler_sdk::config_settings(_config);
amd_smi::config_settings(_config);
@@ -942,12 +943,18 @@ configure_settings(bool _init)
{
auto _papi_events = _config->find("ROCPROFSYS_PAPI_EVENTS");
_add_rocprofsys_category(_papi_events);
std::vector<std::string> _papi_choices = {};
for(auto itr : tim::papi::available_events_info())
// Only enumerate PAPI events if the user has specified them
if(_papi_events->second->get_config_updated() ||
!_config->get_papi_events().empty())
{
if(itr.available()) _papi_choices.emplace_back(itr.symbol());
std::vector<std::string> _papi_choices = {};
for(const auto& itr :
tim::papi::available_events_info({ "perf_event_uncore" }))
{
if(itr.available()) _papi_choices.emplace_back(itr.symbol());
}
_papi_events->second->set_choices(_papi_choices);
}
_papi_events->second->set_choices(_papi_choices);
}
#else
_config->find("ROCPROFSYS_PAPI_EVENTS")->second->set_hidden(true);
@@ -31,35 +31,6 @@ namespace perf
{
namespace units = ::tim::units;
std::vector<std::string>
get_config_choices()
{
namespace regex_const = ::std::regex_constants;
auto _data = std::vector<std::string>{};
auto _papi_events = tim::papi::available_events_info();
const auto _prefix = std::string_view{ "perf::" };
auto _regex =
std::regex{ "^(perf::|)PERF_COUNT_(HW|SW|HW_CACHE)_([A-Z_]+)(|:[A-Z]+)$",
regex_const::optimize };
for(const auto& itr : _papi_events)
{
if(std::regex_match(itr.symbol(), _regex))
{
auto _symbol = itr.symbol();
auto _pos = _symbol.find(_prefix);
if(_pos == 0) _symbol = _symbol.substr(_prefix.length());
_data.emplace_back(_symbol);
}
}
std::sort(_data.begin(), _data.end());
_data.erase(std::unique(_data.begin(), _data.end()), _data.end());
return _data;
}
event_type
get_event_type(std::string_view _v)
{
@@ -277,9 +277,6 @@ enum class record_type
#endif
};
std::vector<std::string>
get_config_choices();
event_type get_event_type(std::string_view);
hw_config get_hw_config(std::string_view);
sw_config get_sw_config(std::string_view);