[Rocprofiler-systems] : Refactor papi enumeration to fix a hang on Intel systems (#1672)
* Refactor papi enumeration to fix a hang on Intel systems - Add an exclude argument to available_events_info() for perf_event_uncore causing hang like case on Intel systems with large number of uncore events. - Enumerate papi available events only when papi events are specified by users inside early initialization logic - Move papi available event query for ROCPROFSYS_SAMPLING_OVERFLOW_EVENT config setting to the avail component, to move the heavy logic outside initialization. - Make category option for rocprof-sys-avail -H -c case insensitive - Provide new option to query available overflow events that can be specified for ROCPROFSYS_SAMPLING_OVERFLOW_EVENT using new command option rocprof-sys-avail -H -c overflow * Update projects/rocprofiler-systems/source/bin/rocprof-sys-avail/common.cpp Co-authored-by: Milan Radosavljevic <milan.radosavljevic@amd.com> * Update timemory submodule pointer Signed-off-by: David Galiffi <David.Galiffi@amd.com> * Fix errors on compile * Change 1: Optimization for the category matching lambda Optmization changes. * Modify the rocprof-sys-avail -c option for overflow Overflow should not be displayed as a device in rocprof-sys-avail -H -c CPU Users can instead do regex on summary where overflow is appended in description User can do rocprof-sys-avail -H -c CPU -d -r overflow * Revert change to column width --------- Signed-off-by: David Galiffi <David.Galiffi@amd.com> Co-authored-by: Milan Radosavljevic <milan.radosavljevic@amd.com> Co-authored-by: David Galiffi <David.Galiffi@amd.com>
This commit is contained in:
gecommit door
GitHub
bovenliggende
4f4352acd0
commit
d77b245730
+1
-1
Submodule projects/rocprofiler-systems/external/timemory updated: b5e41aa9e4...4daa81b7a4
@@ -1075,19 +1075,33 @@ write_hw_counter_info(std::ostream& os, const array_t<bool, N>& options,
|
||||
using width_bool = array_t<bool, N>;
|
||||
using hwcounter_info_t = std::vector<tim::hardware_counters::info>;
|
||||
|
||||
auto _papi_events = tim::papi::available_events_info();
|
||||
auto _papi_events = tim::papi::available_events_info({ "perf_event_uncore" });
|
||||
auto _rocm_events =
|
||||
(gpu_count > 0) ? rocprofsys::rocm::rocm_events() : hwcounter_info_t{};
|
||||
|
||||
if(alphabetical)
|
||||
// Tag overflow events by modifying both short and long descriptions upfront
|
||||
{
|
||||
auto _sorter = [](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.symbol() < rhs.symbol());
|
||||
};
|
||||
std::sort(_papi_events.begin(), _papi_events.end(), _sorter);
|
||||
std::sort(_rocm_events.begin(), _rocm_events.end(), _sorter);
|
||||
namespace regex_const = ::std::regex_constants;
|
||||
auto _regex =
|
||||
std::regex{ "^(perf::|)PERF_COUNT_(HW|SW|HW_CACHE)_([A-Z_]+)(|:[A-Z]+)$",
|
||||
regex_const::optimize };
|
||||
for(auto& itr : _papi_events)
|
||||
{
|
||||
if(std::regex_match(itr.symbol(), _regex))
|
||||
{
|
||||
itr.short_description() += " (overflow event)";
|
||||
itr.long_description() += " (overflow event)";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sort the events alphabetically
|
||||
auto _sorter = [](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.symbol() < rhs.symbol());
|
||||
};
|
||||
std::sort(_papi_events.begin(), _papi_events.end(), _sorter);
|
||||
std::sort(_rocm_events.begin(), _rocm_events.end(), _sorter);
|
||||
|
||||
auto _process_counters = [](auto& _events_v, int32_t _offset_v) {
|
||||
for(auto& iitr : _events_v)
|
||||
iitr.offset() += _offset_v;
|
||||
|
||||
@@ -26,8 +26,12 @@
|
||||
#include <timemory/settings/settings.hpp>
|
||||
#include <timemory/variadic/macros.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <sys/stat.h>
|
||||
#include <unordered_map>
|
||||
|
||||
using settings = ::tim::settings;
|
||||
|
||||
@@ -307,29 +311,73 @@ process_categories(parser_t& p, const str_set_t& _category_options)
|
||||
{
|
||||
category_view = p.get<str_set_t>("categories");
|
||||
std::vector<std::function<void()>> _shorthand_patches{};
|
||||
|
||||
// Helper to do case-insensitive string comparison
|
||||
auto _tolower = [](std::string_view in) {
|
||||
std::string out(in);
|
||||
std::transform(out.begin(), out.end(), out.begin(),
|
||||
[](unsigned char c) { return std::tolower(c); });
|
||||
return out;
|
||||
};
|
||||
|
||||
// Cache lowercase -> original category mapping to avoid repeated string conversions
|
||||
// Also pre-compute shorthand mappings (e.g., "wallclock" -> "component::WallClock")
|
||||
std::unordered_map<std::string, std::string> _category_map;
|
||||
constexpr std::array<std::string_view, 3> _prefixes = { "component::", "settings::",
|
||||
"hw_counters::" };
|
||||
|
||||
for(const auto& opt : _category_options)
|
||||
{
|
||||
auto opt_lower = _tolower(opt);
|
||||
_category_map[opt_lower] = opt;
|
||||
|
||||
// Add shorthand mappings if the option starts with a known prefix
|
||||
for(auto prefix : _prefixes)
|
||||
{
|
||||
if(opt_lower.size() > prefix.size() &&
|
||||
opt_lower.compare(0, prefix.size(), _tolower(prefix)) == 0)
|
||||
{
|
||||
// Map the shorthand (without prefix) to the full canonical form
|
||||
auto shorthand = opt_lower.substr(prefix.size());
|
||||
_category_map[shorthand] = opt;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper to find case-insensitive match in category options
|
||||
auto _find_category = [&_category_map,
|
||||
&_tolower](std::string_view input) -> std::string_view {
|
||||
auto input_lower = _tolower(input);
|
||||
auto it = _category_map.find(input_lower);
|
||||
if(it != _category_map.end()) return it->second;
|
||||
return "";
|
||||
};
|
||||
|
||||
// Process categories - now handles both full names and shorthands via the pre-built
|
||||
// map
|
||||
for(const auto& itr : category_view)
|
||||
{
|
||||
auto _is_shorthand = [&_shorthand_patches, &_category_options,
|
||||
itr](const std::string& _prefix) {
|
||||
auto _opt = TIMEMORY_JOIN("::", _prefix, itr);
|
||||
if(_category_options.count(_opt) > 0)
|
||||
{
|
||||
_shorthand_patches.emplace_back([itr, _opt]() {
|
||||
category_view.erase(itr);
|
||||
category_view.emplace(_opt);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if(_category_options.count(itr) == 0)
|
||||
auto _matched = _find_category(itr);
|
||||
if(!_matched.empty())
|
||||
{
|
||||
if(!_is_shorthand("component") && !_is_shorthand("settings") &&
|
||||
!_is_shorthand("hw_counters"))
|
||||
throw std::runtime_error(
|
||||
itr + " is not a valid category. Use --list-categories to view "
|
||||
"valid categories");
|
||||
// Only create patch if the matched form differs from input (normalization
|
||||
// needed)
|
||||
if(_matched != itr)
|
||||
{
|
||||
// Explicitly convert string_view to string for safe capture
|
||||
std::string _matched_str(_matched);
|
||||
_shorthand_patches.emplace_back([itr, _matched_str]() {
|
||||
category_view.erase(itr);
|
||||
category_view.emplace(_matched_str);
|
||||
});
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error(
|
||||
itr + " is not a valid category. Use --list-categories to view "
|
||||
"valid categories");
|
||||
}
|
||||
}
|
||||
for(auto&& itr : _shorthand_patches)
|
||||
|
||||
@@ -623,11 +623,12 @@ configure_settings(bool _init)
|
||||
"the same signal (SIGRTMIN + 1)",
|
||||
SIGRTMIN + 1, "sampling", "advanced");
|
||||
|
||||
ROCPROFSYS_CONFIG_SETTING(std::string, "ROCPROFSYS_SAMPLING_OVERFLOW_EVENT",
|
||||
"Metric for overflow sampling",
|
||||
std::string{ "perf::PERF_COUNT_HW_CACHE_REFERENCES" },
|
||||
"sampling", "hardware_counters")
|
||||
->set_choices(perf::get_config_choices());
|
||||
ROCPROFSYS_CONFIG_SETTING(
|
||||
std::string, "ROCPROFSYS_SAMPLING_OVERFLOW_EVENT",
|
||||
"Metric for overflow sampling. Defaults to perf::PERF_COUNT_HW_CACHE_REFERENCES. "
|
||||
"For full list of events see: rocprof-sys-avail -H -c CPU -r overflow",
|
||||
std::string{ "perf::PERF_COUNT_HW_CACHE_REFERENCES" }, "sampling",
|
||||
"hardware_counters");
|
||||
|
||||
rocprofiler_sdk::config_settings(_config);
|
||||
amd_smi::config_settings(_config);
|
||||
@@ -942,12 +943,18 @@ configure_settings(bool _init)
|
||||
{
|
||||
auto _papi_events = _config->find("ROCPROFSYS_PAPI_EVENTS");
|
||||
_add_rocprofsys_category(_papi_events);
|
||||
std::vector<std::string> _papi_choices = {};
|
||||
for(auto itr : tim::papi::available_events_info())
|
||||
// Only enumerate PAPI events if the user has specified them
|
||||
if(_papi_events->second->get_config_updated() ||
|
||||
!_config->get_papi_events().empty())
|
||||
{
|
||||
if(itr.available()) _papi_choices.emplace_back(itr.symbol());
|
||||
std::vector<std::string> _papi_choices = {};
|
||||
for(const auto& itr :
|
||||
tim::papi::available_events_info({ "perf_event_uncore" }))
|
||||
{
|
||||
if(itr.available()) _papi_choices.emplace_back(itr.symbol());
|
||||
}
|
||||
_papi_events->second->set_choices(_papi_choices);
|
||||
}
|
||||
_papi_events->second->set_choices(_papi_choices);
|
||||
}
|
||||
#else
|
||||
_config->find("ROCPROFSYS_PAPI_EVENTS")->second->set_hidden(true);
|
||||
|
||||
@@ -31,35 +31,6 @@ namespace perf
|
||||
{
|
||||
namespace units = ::tim::units;
|
||||
|
||||
std::vector<std::string>
|
||||
get_config_choices()
|
||||
{
|
||||
namespace regex_const = ::std::regex_constants;
|
||||
|
||||
auto _data = std::vector<std::string>{};
|
||||
auto _papi_events = tim::papi::available_events_info();
|
||||
const auto _prefix = std::string_view{ "perf::" };
|
||||
auto _regex =
|
||||
std::regex{ "^(perf::|)PERF_COUNT_(HW|SW|HW_CACHE)_([A-Z_]+)(|:[A-Z]+)$",
|
||||
regex_const::optimize };
|
||||
|
||||
for(const auto& itr : _papi_events)
|
||||
{
|
||||
if(std::regex_match(itr.symbol(), _regex))
|
||||
{
|
||||
auto _symbol = itr.symbol();
|
||||
auto _pos = _symbol.find(_prefix);
|
||||
if(_pos == 0) _symbol = _symbol.substr(_prefix.length());
|
||||
_data.emplace_back(_symbol);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(_data.begin(), _data.end());
|
||||
_data.erase(std::unique(_data.begin(), _data.end()), _data.end());
|
||||
|
||||
return _data;
|
||||
}
|
||||
|
||||
event_type
|
||||
get_event_type(std::string_view _v)
|
||||
{
|
||||
|
||||
@@ -277,9 +277,6 @@ enum class record_type
|
||||
#endif
|
||||
};
|
||||
|
||||
std::vector<std::string>
|
||||
get_config_choices();
|
||||
|
||||
event_type get_event_type(std::string_view);
|
||||
hw_config get_hw_config(std::string_view);
|
||||
sw_config get_sw_config(std::string_view);
|
||||
|
||||
Verwijs in nieuw issue
Block a user