[SDK] Remove std::regex usage from rocprofiler-sdk library and common library (#421)

* Remove std::regex usage from rocprofiler-sdk and common library

- See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118408
- std::regex usage produces exceptions or segfaults when used when on applications compiled with dual ABI
- Add code restrictions workflow
  - simple workflow ensuring code restrictions (such as std::regex) are not used

* Update CHANGELOG

* Explicitly set permissions for restrictions workflow

* Fix handling of /proc/cpuinfo entries with no info

- e.g. "power_management:" (colon is last character)

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Este commit está contenido en:
Madsen, Jonathan
2025-05-29 23:11:13 -05:00
cometido por GitHub
padre 7f22b66a9b
commit dbb2e52216
Se han modificado 12 ficheros con 212 adiciones y 47 borrados
@@ -6,11 +6,13 @@ on:
branches: [ amd-staging, amd-mainline ]
paths-ignore:
- '*.md'
- '**/README.md'
- 'source/docs/**'
- 'CODEOWNERS'
pull_request:
paths-ignore:
- '*.md'
- '**/README.md'
- 'source/docs/**'
- 'CODEOWNERS'
+2
Ver fichero
@@ -8,6 +8,8 @@ on:
- '.github/workflows/pull_*.yml'
- '.github/workflows/linting.yml'
- '.github/workflows/markdown_lint.yml'
- '*.md'
- '**/README.md'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
+53
Ver fichero
@@ -0,0 +1,53 @@
name: Code Restrictions
permissions:
contents: read
# This workflow ensures that certain code restrictions are applied.
# For examples, rocprofiler-sdk cannot use std::regex because of issues
# when loaded into an application compiled with C++ dual ABI support because
# while std::regex itself (and std::regex_traits) being ABI-tagged,
# std::__detail::_Scanner is not. Applications compiled with dual ABI support
# will either throw an exception or produce a segfault and thus, this workflow
# attempts to catch usage of std::regex during CI.
on:
workflow_dispatch:
pull_request:
paths-ignore:
- '.github/workflows/pull_*.yml'
- '.github/workflows/linting.yml'
- '.github/workflows/markdown_lint.yml'
- '*.md'
- '**/README.md'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
regex:
runs-on: ubuntu-latest
env:
FOLDERS: "source/lib/common source/lib/rocprofiler-sdk source/lib/rocprofiler-sdk-roctx"
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y python3-pip
python3 -m pip install -U cmake-format
- name: Apply restriction
run: |
set +e
FILES="$(find ${FOLDERS} -type f)"
GREP="$(grep -E -n 'std::regex|<regex>' ${FILES})"
if [ "${GREP}" != "" ]; then
echo -e "\nError! std::regex is not allowed in ${FOLDERS}...\n"
echo -e "\nResults:\n"
echo "${GREP}"
exit 1
fi
+1
Ver fichero
@@ -187,6 +187,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- Fixed missing callbacks around internal thread creation within counter collection service
- Fixed potential data race in rocprofiler-sdk double buffering scheme
- Usage of std::regex in core rocprofiler-sdk library which causes segfaults/exceptions when used under dual ABI
### Removed
+3 -4
Ver fichero
@@ -34,7 +34,6 @@
#include <iomanip>
#include <iostream>
#include <map>
#include <regex>
#include <set>
#include <string>
#include <vector>
@@ -78,16 +77,16 @@ ElfInfo::ElfInfo(std::string _fname)
{}
bool
ElfInfo::has_symbol(std::regex&& _re) const
ElfInfo::has_symbol(const std::function<bool(std::string_view)>& _checker) const
{
for(const auto& itr : symbol_entries)
{
if(!itr.name.empty() && std::regex_search(itr.name, _re)) return true;
if(!itr.name.empty() && _checker(itr.name)) return true;
}
// For stripped binaries
for(const auto& itr : dynamic_symbol_entries)
{
if(!itr.name.empty() && std::regex_search(itr.name, _re)) return true;
if(!itr.name.empty() && _checker(itr.name)) return true;
}
return false;
+2 -2
Ver fichero
@@ -25,9 +25,9 @@
#include <elfio/elfio.hpp>
#include <cstdint>
#include <functional>
#include <map>
#include <ostream>
#include <regex>
#include <set>
#include <sstream>
#include <string>
@@ -94,7 +94,7 @@ struct ElfInfo
std::vector<DynamicEntry> dynamic_entries = {};
std::vector<RelocationEntry> reloc_entries = {};
bool has_symbol(std::regex&&) const;
bool has_symbol(const std::function<bool(std::string_view)>&) const;
friend bool operator==(const ElfInfo& lhs, const ElfInfo& rhs)
{
+72 -36
Ver fichero
@@ -46,11 +46,12 @@
#include <iomanip>
#include <limits>
#include <random>
#include <regex>
#include <set>
#include <shared_mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -112,6 +113,7 @@ parse_cpu_info()
break;
}
line = sdk::parse::strip(std::move(line), " \t\n\v\f\r");
if(line.empty())
{
if(!current_block.empty()) blocks.emplace_back(std::move(current_block));
@@ -134,46 +136,68 @@ parse_cpu_info()
auto info_v = cpu_info{};
for(const auto& itr : bitr)
{
auto match = std::smatch{};
const std::regex re{".*: (.*)$"};
if(std::regex_match(itr, match, re))
auto match = sdk::parse::tokenize(itr, std::vector<std::string_view>{": "});
if(match.size() == 2)
{
if(match.size() == 2)
{
std::ssub_match value = match[1];
if(itr.find("vendor_id") == 0)
info_v.vendor_id = value.str();
else if(itr.find("model name") == 0)
auto get_stol = [_label = std::string_view{itr}](const auto& _value) -> long {
try
{
info_v.model_name = value.str();
size_t first_colon_pos = match.str().find(':');
// This handles the case where the model name has multiple colons
// Example "model name : AMD EPYC : 100-000000248"
if(first_colon_pos != std::string::npos)
{
// Extract the model name after the first colon
info_v.model_name = match.str().substr(first_colon_pos + 1);
// Remove leading and trailing whitespaces
info_v.model_name = std::regex_replace(
info_v.model_name, std::regex("^\\s+|\\s+$"), "");
}
return std::stol(_value);
} catch(std::exception& e)
{
ROCP_CI_LOG(WARNING) << fmt::format("rocprofiler-sdk agent encountered "
"error while parsing CPU info '{}': {}",
_label,
e.what());
}
return 0;
};
auto value = match.back();
if(itr.find("vendor_id") == 0)
info_v.vendor_id = value;
else if(itr.find("model name") == 0)
{
info_v.model_name = value;
size_t first_colon_pos = value.find(':');
// This handles the case where the model name has multiple colons
// Example "model name : AMD EPYC : 100-000000248"
if(first_colon_pos != std::string::npos)
{
// Extract the model name after the first colon
info_v.model_name = value.substr(first_colon_pos + 1);
// Remove leading and trailing whitespaces
info_v.model_name =
sdk::parse::strip(std::string{info_v.model_name}, " \t\n\v\f\r");
}
else if(itr.find("processor") == 0)
info_v.processor = std::stol(value.str());
else if(itr.find("cpu family") == 0)
info_v.family = std::stol(value.str());
else if(itr.find("model") == 0 && itr.find("model name") != 0)
info_v.model = std::stol(value.str());
else if(itr.find("physical id") == 0)
info_v.physical_id = std::stol(value.str());
else if(itr.find("core id") == 0)
info_v.core_id = std::stol(value.str());
else if(itr.find("apicid") == 0)
info_v.apicid = std::stol(value.str());
}
else if(itr.find("processor") == 0)
info_v.processor = get_stol(value);
else if(itr.find("cpu family") == 0)
info_v.family = get_stol(value);
else if(itr.find("model") == 0 && itr.find("model name") != 0)
info_v.model = get_stol(value);
else if(itr.find("physical id") == 0)
info_v.physical_id = get_stol(value);
else if(itr.find("core id") == 0)
info_v.core_id = get_stol(value);
else if(itr.find("apicid") == 0)
info_v.apicid = get_stol(value);
}
else
{
// Each processor_block is grouped by the presence of an empty line in /proc/cpuinfo
// so no checks for empty lines are performed inside this loop. If an empty line is
// found, that should be considered an error. Entries like "power management:" with
// no info (i.e. where the ":" is the last character on the line) can be ignored
auto last_colon_pos = itr.find_last_of(':');
ROCP_CI_LOG_IF(
INFO, last_colon_pos < itr.length() && (last_colon_pos + 1) != itr.length())
<< fmt::format("Encountered unexpected /proc/cpuinfo line format: '{}'", itr);
}
}
if(info_v.is_valid())
processor_info.emplace_back(info_v);
else
@@ -633,7 +657,19 @@ read_topology()
else
agent_info.model_name = "";
if(!gpu_id_prop.empty()) agent_info.gpu_id = std::stoull(gpu_id_prop.front());
if(!gpu_id_prop.empty())
{
try
{
agent_info.gpu_id = std::stoull(gpu_id_prop.front());
} catch(std::exception& e)
{
ROCP_CI_LOG(WARNING) << fmt::format("rocprofiler-sdk agent encountered error while "
"parsing gpu id property '{}': {}",
gpu_id_prop.front(),
e.what());
}
}
read_property(properties, "cpu_cores_count", agent_info.cpu_cores_count);
read_property(properties, "simd_count", agent_info.simd_count);
@@ -33,7 +33,6 @@
#include <atomic>
#include <cstdint>
#include <cstdlib>
#include <regex>
#include <string_view>
#include <vector>
@@ -33,7 +33,6 @@
#include <atomic>
#include <cstdint>
#include <cstdlib>
#include <regex>
#include <string_view>
#include <vector>
+6 -2
Ver fichero
@@ -310,7 +310,9 @@ find_clients()
if(fs::exists(itr) && resolved_exists(itr))
{
auto elfinfo = common::elf_utils::read(itr);
if(!elfinfo.has_symbol(std::regex{"^rocprofiler_configure$"}))
if(!elfinfo.has_symbol([](std::string_view symname) {
return (symname == "rocprofiler_configure");
}))
{
ROCP_CI_LOG(WARNING) << fmt::format(
"[ROCP_TOOL_LIBRARIES] rocprofiler-sdk tool library '{}' did not "
@@ -379,7 +381,9 @@ find_clients()
if(fs::exists(itr) && resolved_exists(itr))
{
auto elfinfo = common::elf_utils::read(itr);
if(!elfinfo.has_symbol(std::regex{"^rocprofiler_configure$"}))
if(!elfinfo.has_symbol([](std::string_view symname) {
return (symname == "rocprofiler_configure");
}))
{
ROCP_INFO << fmt::format(
"Shared library '{}' did not contain the 'rocprofiler_configure' symbol "
+1 -1
Ver fichero
@@ -5,7 +5,7 @@ project(rocprofiler-sdk-tests-common LANGUAGES C CXX)
include(GoogleTest)
set(common_sources demangling.cpp environment.cpp md5sum.cpp mpl.cpp)
set(common_sources demangling.cpp environment.cpp md5sum.cpp mpl.cpp parse.cpp)
add_executable(common-tests)
target_sources(common-tests PRIVATE ${common_sources})
+70
Ver fichero
@@ -0,0 +1,70 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
#include <fmt/format.h>
#include <gtest/gtest.h>
#include <cstddef>
#include <cstdint>
#include <sstream>
#include <string>
namespace sdk = ::rocprofiler::sdk;
TEST(parse, tokenize_characters)
{
auto tokenized = sdk::parse::tokenize(std::string_view{"0, GPU-DEADBEEFDEADBEEF,, -1"}, ", ");
ASSERT_EQ(tokenized.size(), 3)
<< fmt::format("TOKENIZED: '{}'", fmt::join(tokenized.begin(), tokenized.end(), "' | '"));
EXPECT_EQ(tokenized.at(0), "0");
EXPECT_EQ(tokenized.at(1), "GPU-DEADBEEFDEADBEEF");
EXPECT_EQ(tokenized.at(2), "-1");
}
TEST(parse, tokenize_strings)
{
auto tokenized = sdk::parse::tokenize(std::string_view{"0, GPU-DEADBEEFDEADBEEF, -1, 8"},
std::vector<std::string_view>{", "});
ASSERT_EQ(tokenized.size(), 4)
<< fmt::format("TOKENIZED: '{}'", fmt::join(tokenized.begin(), tokenized.end(), "' | '"));
EXPECT_EQ(tokenized.at(0), "0");
EXPECT_EQ(tokenized.at(1), "GPU-DEADBEEFDEADBEEF");
EXPECT_EQ(tokenized.at(2), "-1");
EXPECT_EQ(tokenized.at(3), "8");
}
TEST(parse, strip)
{
auto test_message = std::string{" \t\v\f TEST MESSAGE\n\r\n "};
auto stripped_message = sdk::parse::strip(std::string{test_message}, " \t\n\v\f\r");
auto expected_message = std::string{"TEST MESSAGE"};
EXPECT_EQ(stripped_message, expected_message)
<< fmt::format("Expected '{}' after stripping '{}'\nResult: '{}'",
expected_message,
test_message,
stripped_message);
}