Change how cache manager handles child process trace cache for rocpd (#1033)

* Change how cache manager handles child process trace cache

* Sampling and backtrace metrics to cache

* Apply cmake formatting

* Fix parsing of metadata json

* Code clean up

* Fix build nlohmann json from source

* Fix storage parsed finished callback

* Revert sampling for child process

* Change cache file name generating

* Fix thread start stop

* Fix process start end timestamp

* Applied suggestions from code review

* Try with late start of flushing task thread

* Change dockerfiles for ci

* Revert changes on github workflows

* Remove json_fwd.hpp include

* fix dump

* Build nlohmann/json by default

Signed-off-by: David Galiffi <David.Galiffi@amd.com>

* Update location of build artifacts for nlohmann/json

Signed-off-by: David Galiffi <David.Galiffi@amd.com>

* Revert use_output_suffix

* Remove unused logs

* Fix cache store inside counter due to structure change

* Remove decode tests from debian ci

* Fix issue where all databases have the same UUID (#1499)

Co-authored-by: Aleksandar Djordjevic <adjordje@amd.com>

* Removing the cpack and install steps to save space

* Revert "Remove decode tests from debian ci"

This reverts commit ddabf6dd142dcf438e6b8997b8abe86f2c868468.

* Revert "Removing the cpack and install steps to save space"

This reverts commit 973da3a1ba99d99d529af5269d30e177092f9bfa.

* Add prepare-runner job as dependency to clean up the space

* Fix formatting

* Free up even more space

* Remove verbose for workflows

* remove hw_counters from ext_data

* move space clean up inside container

* try to remove external folder to free up space

* Check space

* Refactor Cleanup to it's own step

---------

Signed-off-by: David Galiffi <David.Galiffi@amd.com>
Co-authored-by: David Galiffi <David.Galiffi@amd.com>
Co-authored-by: Aleksandar Djordjevic <aleksandar.djordjevic@amd.com>
Co-authored-by: Aleksandar Djordjevic <adjordje@amd.com>
Этот коммит содержится в:
Milan Radosavljevic
2025-10-24 17:47:15 +02:00
коммит произвёл GitHub
родитель 4f075902fc
Коммит 8806be162c
46 изменённых файлов: 1502 добавлений и 1014 удалений
+12 -1
Просмотреть файл
@@ -127,18 +127,29 @@ jobs:
-DROCPROFSYS_PYTHON_PREFIX=/opt/conda/envs \
-DROCPROFSYS_PYTHON_ENVS="py3.8;py3.9;py3.10;py3.11;py3.12;py3.13" \
-DROCPROFSYS_MAX_THREADS=64 \
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv-offload" \
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv-offload;videodecode;jpegdecode;network" \
-DROCPROFSYS_BUILD_NUMBER=1 \
-DUSE_CLANG_OMP=OFF \
$CMAKE_PREFIX_PATH_ARG \
-- \
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run: |
set -v
du /tmp -d 1 -h
du build/rocprof-sys-tests-output -d 1 -h
df -h
rm -fr /tmp/* build/rocprof-sys-tests-output/*
- name: Install
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run: |
cmake --build build --target install --parallel 2
rm -rf /opt/rocprofiler-systems
- name: CPack and Install
working-directory: projects/rocprofiler-systems/
+9
Просмотреть файл
@@ -136,6 +136,15 @@ jobs:
--
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run: |
du /tmp -d 1 -h
du build/rocprof-sys-tests-output -d 1 -h
df -h
rm -fr /tmp/* build/rocprof-sys-tests-output/*
- name: Install
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
+13 -2
Просмотреть файл
@@ -311,17 +311,28 @@ jobs:
-DROCPROFSYS_PYTHON_ENVS="py3.7;py3.8;py3.9;py3.10;py3.11" \
-DROCPROFSYS_STRIP_LIBRARIES=${{ matrix.strip }} \
-DROCPROFSYS_MAX_THREADS=64 \
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv" \
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv;videodecode;jpegdecode;network" \
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} \
-DUSE_CLANG_OMP=OFF \
-- \
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run: |
set -v
du /tmp -d 1 -h
du build/rocprof-sys-tests-output -d 1 -h
df -h
rm -fr /tmp/* build/rocprof-sys-tests-output/*
- name: Install
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run:
run: |
cmake --build build --target install --parallel 2
rm -fr /opt/rocprofiler-systems-dev/
- name: CPack and Install
working-directory: projects/rocprofiler-systems/
+11
Просмотреть файл
@@ -133,11 +133,22 @@ jobs:
-- \
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run: |
set -v
du /tmp -d 1 -h
du build/rocprof-sys-tests-output -d 1 -h
df -h
rm -fr /tmp/* build/rocprof-sys-tests-output/*
- name: Install
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
run: |
cmake --build build --target install --parallel 2
rm -fr /opt/rocprofiler-systems/
- name: CPack and Install
working-directory: projects/rocprofiler-systems/
+3
Просмотреть файл
@@ -217,6 +217,9 @@ rocprofiler_systems_add_option(ROCPROFSYS_BUILD_DYNINST "Build dyninst from subm
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_LIBUNWIND
"Build libunwind from submodule" ON
)
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_NLOHMANN_JSON
"Build nlohmann/json from submodule" ON
)
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_CODECOV "Build for code coverage" OFF)
rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS
"Install perfetto tools (i.e. traced, perfetto, etc.)" OFF
+29
Просмотреть файл
@@ -0,0 +1,29 @@
include_guard(GLOBAL)
if(ROCPROFSYS_BUILD_NLOHMANN_JSON)
message(STATUS "Building nlohmann/json from source")
include(FetchContent)
FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.11.3
SOURCE_DIR
${PROJECT_BINARY_DIR}/external/nlohmann/src
BINARY_DIR
${PROJECT_BINARY_DIR}/external/nlohmann/lib
SUBBUILD_DIR
${PROJECT_BINARY_DIR}/external/nlohmann/subdir
)
FetchContent_MakeAvailable(nlohmann_json)
target_include_directories(
rocprofiler-systems-json
SYSTEM
INTERFACE $<TARGET_PROPERTY:nlohmann_json,INTERFACE_INCLUDE_DIRECTORIES>
)
target_link_libraries(rocprofiler-systems-json INTERFACE nlohmann_json)
else()
message(STATUS "Using system nlohmann/json library")
find_package(nlohmann_json REQUIRED)
target_link_libraries(rocprofiler-systems-json INTERFACE nlohmann_json::nlohmann_json)
endif()
+11
Просмотреть файл
@@ -56,6 +56,9 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-perfetto
rocprofiler_systems_add_interface_library(rocprofiler-systems-sqlite3
"Use SQLite3 for rocpd data storage"
)
rocprofiler_systems_add_interface_library(rocprofiler-systems-json
"Use nlohmann/json for json data handling"
)
rocprofiler_systems_add_interface_library(rocprofiler-systems-timemory
"Provides timemory libraries"
)
@@ -543,6 +546,14 @@ include(Perfetto)
include(SQLite3)
# ----------------------------------------------------------------------------------------#
#
# NlohmannJson
#
# ----------------------------------------------------------------------------------------#
include(NlohmannJson)
# ----------------------------------------------------------------------------------------#
#
# ELFIO
+1
Просмотреть файл
@@ -41,6 +41,7 @@ target_link_libraries(
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-definitions>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-perfetto>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-json>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-timemory>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-elfutils>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-bfd>
+1
Просмотреть файл
@@ -123,6 +123,7 @@ target_link_libraries(
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-options>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-perfetto>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-json>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-timemory>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-mpi>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-rocm>
-3
Просмотреть файл
@@ -55,9 +55,6 @@ struct agent
size_t device_type_index{ 0 };
size_t base_id{ 0 };
#if ROCPROFSYS_USE_ROCM > 0
amdsmi_processor_handle smi_handle = nullptr;
#endif
};
} // namespace rocprofsys
+7 -3
Просмотреть файл
@@ -29,12 +29,16 @@ namespace rocprofsys
{
agent_manager&
agent_manager::get_instance()
get_agent_manager_instance()
{
static agent_manager instance;
return instance;
static agent_manager _instance;
return _instance;
}
agent_manager::agent_manager(std::vector<std::shared_ptr<agent>> agents)
: _agents(std::move(agents))
{}
void
agent_manager::insert_agent(agent& _agent)
{
+5 -3
Просмотреть файл
@@ -32,8 +32,8 @@ namespace rocprofsys
struct agent_manager
{
static agent_manager& get_instance();
agent_manager() = default;
agent_manager(std::vector<std::shared_ptr<agent>> agents);
agent_manager(const agent_manager&) = delete;
agent_manager& operator=(const agent_manager&) = delete;
agent_manager(agent_manager&&) = delete;
@@ -57,7 +57,9 @@ private:
std::vector<std::shared_ptr<agent>> _agents;
size_t _gpu_agents_cnt{ 0 };
size_t _cpu_agents_cnt{ 0 };
agent_manager() = default;
};
agent_manager&
get_agent_manager_instance();
} // namespace rocprofsys
+5 -5
Просмотреть файл
@@ -2365,14 +2365,13 @@ get_tmpdir()
}
std::string
get_database_absolute_path(std::string_view database_name)
get_database_absolute_path(std::string_view database_name, std::string_view suffix)
{
const auto* _existing_path = std::getenv("ROCPROFSYS_DATABASE_DIR");
auto _dir = _existing_path ? std::string{ _existing_path } : std::string{};
auto _ext = std::string{ "db" };
auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(),
settings::default_process_suffix(),
auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(), suffix,
false, _dir };
const auto get_path = [](const std::string& path) {
@@ -2381,8 +2380,9 @@ get_database_absolute_path(std::string_view database_name)
: std::string{};
};
auto _val = settings::compose_output_filename(std::string(database_name), _ext, _cfg);
_dir = get_path(_val);
auto _val =
settings::compose_output_filename(std::string{ database_name }, _ext, _cfg);
_dir = get_path(_val);
setenv("ROCPROFSYS_DATABASE_DIR", _dir.c_str(), 1);
+1 -1
Просмотреть файл
@@ -362,7 +362,7 @@ std::string
get_tmpdir();
std::string
get_database_absolute_path(std::string_view database_name);
get_database_absolute_path(std::string_view database_name, std::string_view tag);
bool&
get_use_rocpd() ROCPROFSYS_HOT;
+1 -1
Просмотреть файл
@@ -166,7 +166,7 @@ query_cpu_agents()
return;
}
auto& _agent_manager = agent_manager::get_instance();
auto& _agent_manager = get_agent_manager_instance();
auto cpu_data = get_cpu_info();
for(auto& cpu : cpu_data)
+2 -2
Просмотреть файл
@@ -121,7 +121,7 @@ query_rocm_agents()
auto iterator = []([[maybe_unused]] rocprofiler_agent_version_t version,
const void** agents, size_t num_agents,
[[maybe_unused]] void* user_data) -> rocprofiler_status_t {
auto& _agent_manager = agent_manager::get_instance();
auto& _agent_manager = get_agent_manager_instance();
for(size_t i = 0; i < num_agents; ++i)
{
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
@@ -153,7 +153,7 @@ query_rocm_agents()
1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%ld\n", _e.what(),
_dev_cnt);
}
_dev_cnt = agent_manager::get_instance().get_gpu_agents_count();
_dev_cnt = get_agent_manager_instance().get_gpu_agents_count();
#endif
return _dev_cnt;
}
+2 -8
Просмотреть файл
@@ -20,15 +20,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set(rocpd_sources
${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp
${CMAKE_CURRENT_LIST_DIR}/json.cpp
)
set(rocpd_sources ${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp)
set(rocpd_headers
${CMAKE_CURRENT_LIST_DIR}/data_processor.hpp
${CMAKE_CURRENT_LIST_DIR}/json.hpp
)
set(rocpd_headers ${CMAKE_CURRENT_LIST_DIR}/data_processor.hpp)
target_sources(rocprofiler-systems-core-library PRIVATE ${rocpd_sources} ${rocpd_headers})
+79 -95
Просмотреть файл
@@ -24,17 +24,23 @@
#include "core/rocpd/data_storage/database.hpp"
#include "core/rocpd/data_storage/table_insert_query.hpp"
#include "debug.hpp"
#include <memory>
#include <stdexcept>
namespace rocprofsys
{
namespace rocpd
{
data_processor::data_processor()
data_processor::data_processor(std::shared_ptr<data_storage::database> database)
: _database(std::move(database))
{
data_storage::database::get_instance().initialize_schema();
_upid = data_storage::database::get_instance().get_upid();
if(!_database)
{
throw std::invalid_argument("Provided pointer to a non-existing database!");
}
_database->initialize_schema();
_upid = _database->get_upid();
// Initialize event statement
initialize_event_stmt();
initialize_pmc_event_stmt();
initialize_sample_stmt();
@@ -48,22 +54,14 @@ data_processor::data_processor()
initialize_memory_alloc_stmt();
}
data_processor&
data_processor::get_instance()
{
static data_processor _instance;
return _instance;
}
void
data_processor::initialize_metadata()
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_metadata_" + _upid)
.set_columns("tag", "value")
.set_values("upid", _upid)
.get_query_string());
_database->execute_query(query.set_table_name("rocpd_metadata_" + _upid)
.set_columns("tag", "value")
.set_values("upid", _upid)
.get_query_string());
}
size_t
@@ -73,13 +71,12 @@ data_processor::insert_string(const char* str)
if(it != _string_map.end()) return _string_map.at(str);
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_string_" + _upid)
.set_columns("guid", "string")
.set_values(_upid, str)
.get_query_string());
_database->execute_query(query.set_table_name("rocpd_string_" + _upid)
.set_columns("guid", "string")
.set_values(_upid, str)
.get_query_string());
const auto string_id = data_storage::database::get_instance().get_last_insert_id();
const auto string_id = _database->get_last_insert_id();
_string_map.emplace(str, string_id);
return string_id;
}
@@ -91,7 +88,7 @@ data_processor::insert_node_info(size_t node_id, size_t hash, const char* machin
const char* hardware_name, const char* domain_name)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
_database->execute_query(
query.set_table_name("rocpd_info_node_" + _upid)
.set_columns("id", "guid", "hash", "machine_id", "system_name", "hostname",
"release", "version", "hardware_name", "domain_name")
@@ -107,13 +104,13 @@ data_processor::insert_process_info(size_t nid, size_t ppid, size_t pid, size_t
const char* extdata)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_process_" + _upid)
.set_columns("id", "guid", "nid", "ppid", "pid", "init", "fini", "start",
"end", "command", "environment", "extdata")
.set_values(pid, _upid, nid, ppid, pid, init, fini, start, end, command,
environment, extdata)
.get_query_string());
_database->execute_query(query.set_table_name("rocpd_info_process_" + _upid)
.set_columns("id", "guid", "nid", "ppid", "pid", "init",
"fini", "start", "end", "command",
"environment", "extdata")
.set_values(pid, _upid, nid, ppid, pid, init, fini,
start, end, command, environment, extdata)
.get_query_string());
}
size_t
@@ -125,7 +122,7 @@ data_processor::insert_agent(size_t node_id, size_t pid, const char* agent_type,
const char* extdata)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
_database->execute_query(
query.set_table_name("rocpd_info_agent_" + _upid)
.set_columns("guid", "nid", "pid", "type", "absolute_index", "logical_index",
"type_index", "uuid", "name", "model_name", "vendor_name",
@@ -135,7 +132,7 @@ data_processor::insert_agent(size_t node_id, size_t pid, const char* agent_type,
user_name, extdata)
.get_query_string());
return data_storage::database::get_instance().get_last_insert_id();
return _database->get_last_insert_id();
}
void
@@ -151,13 +148,13 @@ data_processor::insert_track(const char* track_name, size_t node_id, size_t proc
auto name_id = insert_string(track_name);
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
_database->execute_query(
query.set_table_name("rocpd_track_" + _upid)
.set_columns("guid", "nid", "pid", "tid", "name_id", "extdata")
.set_values(_upid, node_id, process_id, thread_id, name_id, extdata)
.get_query_string());
auto track_id = data_storage::database::get_instance().get_last_insert_id();
auto track_id = _database->get_last_insert_id();
_tracks[track_name] = track_name_map{ track_id, name_id };
}
@@ -191,9 +188,9 @@ data_processor::insert_pmc_description(
component, units, value_type, block, expression, is_constant,
is_derived, extdata)
.get_query_string();
data_storage::database::get_instance().execute_query(query);
_database->execute_query(query);
auto pmc_id = data_storage::database::get_instance().get_last_insert_id();
auto pmc_id = _database->get_last_insert_id();
_pmc_descriptor_map.emplace(
std::pair<pmc_identifier, size_t>{ { agent_id, name }, pmc_id });
}
@@ -248,7 +245,7 @@ data_processor::insert_event(size_t string_primary_key, size_t stack_id,
{
_insert_event_statement(_upid.c_str(), string_primary_key, stack_id, parent_stack_id,
correlation_id, call_stack, line_info, extdata);
return data_storage::database::get_instance().get_last_insert_id();
return _database->get_last_insert_id();
}
void
@@ -261,9 +258,9 @@ data_processor::initialize_event_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_event_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, size_t,
const char*, const char*, const char*>(query);
_database->create_statement_executor<const char*, size_t, size_t, size_t, size_t,
const char*, const char*, const char*>(
query);
}
void
@@ -275,8 +272,8 @@ data_processor::initialize_pmc_event_stmt()
.set_values('?', '?', '?', '?', '?')
.get_query_string();
_insert_pmc_event_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, double, const char*>(
_database
->create_statement_executor<const char*, size_t, size_t, double, const char*>(
query);
}
@@ -289,9 +286,8 @@ data_processor::initialize_sample_stmt()
.set_values('?', '?', '?', '?', '?')
.get_query_string();
_insert_sample_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, uint64_t, size_t,
const char*>(query);
_database->create_statement_executor<const char*, size_t, uint64_t, size_t,
const char*>(query);
}
void
@@ -304,9 +300,9 @@ data_processor::initialize_region_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_region_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
uint64_t, size_t, size_t, const char*>(query);
_database
->create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
uint64_t, size_t, size_t, const char*>(query);
}
void
@@ -323,13 +319,10 @@ data_processor::initialize_kernel_dispatch_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_kernel_dispatch_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, uint64_t, uint64_t,
size_t, size_t, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, const char*>(
query);
_insert_kernel_dispatch_statement = _database->create_statement_executor<
const char*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t,
uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, const char*>(query);
}
void
@@ -344,12 +337,9 @@ data_processor::initialize_memory_copy_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_memory_copy_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, uint64_t,
uint64_t, size_t, size_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, size_t,
const char*>(query);
_insert_memory_copy_statement = _database->create_statement_executor<
const char*, size_t, size_t, size_t, uint64_t, uint64_t, size_t, size_t, size_t,
size_t, size_t, size_t, size_t, size_t, size_t, size_t, const char*>(query);
}
void
@@ -367,11 +357,11 @@ data_processor::initialize_kernel_symbol_stmt()
'?', '?', '?')
.get_query_string();
_insert_kernel_symbol_statement =
data_storage::database::get_instance()
.create_statement_executor<size_t, const char*, size_t, size_t, uint64_t,
const char*, const char*, uint64_t, uint32_t,
uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
uint32_t, const char*>(query);
_database->create_statement_executor<size_t, const char*, size_t, size_t,
uint64_t, const char*, const char*, uint64_t,
uint32_t, uint32_t, uint32_t, uint32_t,
uint32_t, uint32_t, uint32_t, const char*>(
query);
}
void
@@ -385,10 +375,9 @@ data_processor::initialize_code_object_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_code_object_statement =
data_storage::database::get_instance()
.create_statement_executor<size_t, const char*, size_t, size_t, size_t,
const char*, uint64_t, uint64_t, uint64_t,
const char*, const char*>(query);
_database->create_statement_executor<size_t, const char*, size_t, size_t, size_t,
const char*, uint64_t, uint64_t, uint64_t,
const char*, const char*>(query);
}
void
@@ -401,9 +390,9 @@ data_processor::initialize_args_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?')
.get_query_string();
_insert_args_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, const char*,
const char*, const char*, const char*>(query);
_database->create_statement_executor<const char*, size_t, size_t, const char*,
const char*, const char*, const char*>(
query);
}
void
@@ -417,12 +406,9 @@ data_processor::initialize_memory_alloc_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?', '?', '?')
.get_query_string();
_insert_memory_alloc_statement =
data_storage::database::get_instance()
.create_statement_executor<
const char*, size_t, size_t, size_t, size_t, const char*, const char*,
uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, const char*>(
query);
_insert_memory_alloc_statement = _database->create_statement_executor<
const char*, size_t, size_t, size_t, size_t, const char*, const char*, uint64_t,
uint64_t, size_t, size_t, size_t, size_t, size_t, const char*>(query);
// Statement without agent_id
query = query_builder.set_table_name("rocpd_memory_allocate_" + _upid)
@@ -432,11 +418,9 @@ data_processor::initialize_memory_alloc_stmt()
.set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
'?', '?')
.get_query_string();
_insert_memory_alloc_no_agent_statement =
data_storage::database::get_instance()
.create_statement_executor<const char*, size_t, size_t, size_t, const char*,
const char*, uint64_t, uint64_t, size_t, size_t,
size_t, size_t, size_t, const char*>(query);
_insert_memory_alloc_no_agent_statement = _database->create_statement_executor<
const char*, size_t, size_t, size_t, const char*, const char*, uint64_t, uint64_t,
size_t, size_t, size_t, size_t, size_t, const char*>(query);
}
void
@@ -451,7 +435,7 @@ data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t proc
const char* name, const char* extdata)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
_database->execute_query(
query.set_table_name("rocpd_info_stream_" + _upid)
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
.set_values(stream_id, _upid, node_id, process_id, name, extdata)
@@ -463,7 +447,7 @@ data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t proces
const char* name, const char* extdata)
{
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
_database->execute_query(
query.set_table_name("rocpd_info_queue_" + _upid)
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
.set_values(queue_id, _upid, node_id, process_id, name, extdata)
@@ -575,15 +559,15 @@ data_processor::insert_thread_info(size_t node_id, size_t parent_process_id,
}
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_thread_" + _upid)
.set_columns("guid", "nid", "ppid", "pid", "tid", "name", "start", "end",
"extdata")
.set_values(_upid.c_str(), node_id, parent_process_id, process_id, thread_id,
name, start, end, extdata)
.get_query_string());
_database->execute_query(query.set_table_name("rocpd_info_thread_" + _upid)
.set_columns("guid", "nid", "ppid", "pid", "tid", "name",
"start", "end", "extdata")
.set_values(_upid.c_str(), node_id, parent_process_id,
process_id, thread_id, name, start, end,
extdata)
.get_query_string());
auto thread_idx = data_storage::database::get_instance().get_last_insert_id();
auto thread_idx = _database->get_last_insert_id();
_thread_id_map.emplace(thread_id, thread_idx);
return thread_idx;
}
@@ -604,7 +588,7 @@ void
data_processor::flush()
{
// Flush all pending data to the database
data_storage::database::get_instance().flush();
_database->flush();
}
} // namespace rocpd
+10 -7
Просмотреть файл
@@ -22,11 +22,11 @@
#pragma once
#include "core/rocpd/data_storage/database.hpp"
#include <cstdint>
#include <functional>
#include <mutex>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
@@ -102,7 +102,13 @@ private:
};
public:
static data_processor& get_instance();
explicit data_processor(std::shared_ptr<data_storage::database> database);
data_processor() = delete;
data_processor(const data_processor&) = delete;
data_processor& operator=(const data_processor&) = delete;
data_processor(const data_processor&&) = delete;
data_processor& operator=(const data_processor&&) = delete;
size_t insert_string(const char* str);
@@ -203,10 +209,6 @@ public:
void flush();
private:
data_processor();
data_processor(data_processor&) = delete;
data_processor& operator=(const data_processor&) = delete;
void initialize_pmc_event_stmt();
void initialize_event_stmt();
void initialize_sample_stmt();
@@ -220,6 +222,7 @@ private:
void initialize_memory_alloc_stmt();
private:
std::shared_ptr<data_storage::database> _database;
std::unordered_map<std::string, track_name_map> _tracks;
std::unordered_map<pmc_identifier, size_t, pmc_identifier_hash, pmc_identifier_equal>
_pmc_descriptor_map;
+19 -18
Просмотреть файл
@@ -28,6 +28,7 @@
#include <config.hpp>
#include <fstream>
#include <regex>
#include <string>
#include <timemory/environment/types.hpp>
#include <timemory/utility/filepath.hpp>
#include <unistd.h>
@@ -50,17 +51,11 @@ namespace rocpd
{
namespace data_storage
{
database&
database::get_instance()
database::database(int pid, int ppid)
{
static database _instance;
return _instance;
}
database::database()
{
auto db_name = std::string_view{ "rocpd.db" };
auto abs_db_path = rocprofsys::get_database_absolute_path(db_name);
auto _tag = std::to_string(pid);
auto db_name = std::string{ "rocpd" };
auto abs_db_path = rocprofsys::get_database_absolute_path(db_name, _tag);
create_directory_for_database_file(abs_db_path);
ROCPROFSYS_VERBOSE(0, "Database: %s\r\n", abs_db_path.c_str());
@@ -68,6 +63,7 @@ database::database()
"database open failed!");
validate_sqlite3_result(sqlite3_open(abs_db_path.c_str(), &_sqlite3_db), "",
"database open failed!");
m_upid = generate_upid(pid, ppid);
}
database::~database()
@@ -120,8 +116,10 @@ database::initialize_schema()
std::regex guid_pattern("\\{\\{guid\\}\\}");
std::regex view_upid_pattern("\\{\\{view_upid\\}\\}");
query = std::regex_replace(query, upid_pattern, "_" + get_upid());
query = std::regex_replace(query, guid_pattern, get_upid());
auto upid = get_upid();
query = std::regex_replace(query, upid_pattern, "_" + upid);
query = std::regex_replace(query, guid_pattern, upid);
query = std::regex_replace(query, view_upid_pattern, "");
validate_sqlite3_result(
@@ -141,12 +139,15 @@ database::execute_query(const std::string& query)
std::string
database::get_upid()
{
static std::string _upid = []() {
auto n_info = node_info::get_instance();
auto guid = common::md5sum{ n_info.id, getpid(), getppid() };
return guid.hexdigest();
}();
return _upid;
return m_upid;
}
std::string
database::generate_upid(const int pid, const int ppid)
{
auto n_info = node_info::get_instance();
auto guid = common::md5sum{ n_info.id, pid, ppid };
return guid.hexdigest();
}
size_t
+29 -23
Просмотреть файл
@@ -38,27 +38,29 @@ static std::mutex _mutex;
class database
{
public:
static database& get_instance();
database(database&) = delete;
database& operator=(database&) = delete;
explicit database(int pid, int ppid);
database() = delete;
database(database&) = delete;
database& operator=(database&) = delete;
database(database&&) = default;
database& operator=(database&&) = default;
void flush();
~database();
private:
database();
template <typename... Args>
inline void validate_sqlite3_result(int sqlite3_error_code, const char* query,
Args&&... args)
void validate_sqlite3_result(int sqlite3_error_code, const char* query,
Args&&... args)
{
std::stringstream ss;
ss << "\n===========================================================\n";
ss << "Database Error\n";
((ss << args << " "), ...);
ss << "\nQuery: " << query << "\n";
// Fetch error message of last sqlite3_* call
const auto* error_message = sqlite3_errstr(sqlite3_error_code);
switch(sqlite3_error_code)
{
case SQLITE_OK:
@@ -98,7 +100,7 @@ private:
}
break;
}
ss << " [Sqlite3 error: " << sqlite3_errstr(sqlite3_error_code);
ss << " [Sqlite3 error: " << error_message;
ss << " (Extended error message: " << sqlite3_errmsg(_sqlite3_db_temp) << ")]";
throw std::runtime_error(ss.str());
}
@@ -110,17 +112,16 @@ private:
std::is_same_v<std::decay_t<T>, int32_t> ||
std::is_same_v<std::decay_t<T>, uint32_t>),
int> = 0>
inline void bind_value([[maybe_unused]] sqlite3_stmt* stmt,
[[maybe_unused]] int position, [[maybe_unused]] T& _value,
[[maybe_unused]] const std::string& query)
void bind_value([[maybe_unused]] sqlite3_stmt* stmt, [[maybe_unused]] int position,
[[maybe_unused]] T& _value, [[maybe_unused]] const std::string& query)
{
throw std::runtime_error("Unsupported type for binding!");
}
template <typename T,
std::enable_if_t<common::traits::is_string_literal<T>(), int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(
sqlite3_bind_text(stmt, position, _value, -1, SQLITE_STATIC), query.c_str(),
@@ -129,8 +130,8 @@ private:
template <typename T,
std::enable_if_t<std::is_floating_point_v<std::decay_t<T>>, int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(
sqlite3_bind_double(stmt, position, _value), query.c_str(),
@@ -140,8 +141,8 @@ private:
template <typename T, std::enable_if_t<std::is_same_v<std::decay_t<T>, int64_t> ||
std::is_same_v<std::decay_t<T>, uint64_t>,
int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(sqlite3_bind_int64(stmt, position, _value), query.c_str(),
"Failed to bind int64_t/uint64_t! Position: ", position,
@@ -151,8 +152,8 @@ private:
template <typename T, std::enable_if_t<std::is_same_v<std::decay_t<T>, int32_t> ||
std::is_same_v<std::decay_t<T>, uint32_t>,
int> = 0>
inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
void bind_value(sqlite3_stmt* stmt, int position, T&& _value,
const std::string& query)
{
validate_sqlite3_result(sqlite3_bind_int(stmt, position, _value), query.c_str(),
"Failed to bind int32_t/uint32_t! Position: ", position,
@@ -192,11 +193,16 @@ public:
};
}
static std::string get_upid();
std::string get_upid();
private:
sqlite3* _sqlite3_db{ nullptr };
sqlite3* _sqlite3_db_temp{ nullptr };
static std::string generate_upid(const int pid, const int ppid);
private:
sqlite3* _sqlite3_db{ nullptr };
sqlite3* _sqlite3_db_temp{ nullptr };
std::string m_tag;
std::string m_upid;
};
} // namespace data_storage
-99
Просмотреть файл
@@ -1,99 +0,0 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "json.hpp"
#include <sstream>
namespace rocpd
{
std::shared_ptr<json>
json::create()
{
return std::shared_ptr<json>(new json());
}
void
json::set(const std::string& key, const json_value& value)
{
data[key] = std::make_shared<json_value>(value);
}
std::string
json::to_string() const
{
std::ostringstream oss;
oss << "{";
bool first = true;
for(const auto& [key, value] : data)
{
if(!first) oss << ", ";
first = false;
oss << "\"" << key << "\": " << stringify(value);
}
oss << "}";
return oss.str();
}
std::string
json::stringify(const std::shared_ptr<json_value>& value)
{
std::ostringstream oss;
std::visit(
[&oss](auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr(std::is_same_v<T, std::string>)
oss << "\"" << arg << "\"";
else if constexpr(std::is_same_v<T, bool>)
oss << (arg ? "true" : "false");
else if constexpr(std::is_same_v<T, std::nullptr_t>)
oss << "null";
else if constexpr(std::is_same_v<T, std::vector<json>>)
{
oss << "[";
bool first = true;
for(const auto& item : arg)
{
if(!first) oss << ", ";
first = false;
oss << item.to_string();
}
oss << "]";
}
else if constexpr(std::is_same_v<T, std::shared_ptr<json>>)
{
oss << arg->to_string();
}
else
{
// handle int + double
oss << arg;
}
},
*value);
return oss.str();
}
} // namespace rocpd
-57
Просмотреть файл
@@ -1,57 +0,0 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>
namespace rocpd
{
class json
{
public:
static std::shared_ptr<json> create();
using json_value =
std::variant<std::string, int, double, long long, bool, std::vector<json>,
std::nullptr_t, std::shared_ptr<json>>;
void set(const std::string& key, const json_value& value);
std::string to_string() const;
private:
json() = default;
private:
static std::string stringify(const std::shared_ptr<json_value>& value);
private:
std::unordered_map<std::string, std::shared_ptr<json_value>> data;
};
} // namespace rocpd
+30 -4
Просмотреть файл
@@ -30,6 +30,7 @@
#include <memory>
#include <mutex>
#include <stdexcept>
#include <thread>
#include <unistd.h>
using namespace std::chrono_literals;
@@ -45,20 +46,27 @@ constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms;
constexpr auto NUM_OF_THREADS = 1;
} // namespace
buffer_storage::buffer_storage(pid_t _pid)
buffer_storage::buffer_storage()
{
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
m_thread_pool = std::make_unique<PTL::ThreadPool>(NUM_OF_THREADS);
m_thread_pool->initialize_threadpool(NUM_OF_THREADS);
m_task_group = std::make_unique<PTL::TaskGroup<void>>(m_thread_pool.get());
}
void
buffer_storage::start_flushing_thread(pid_t _pid)
{
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
m_task_group->exec([this, _pid]() {
std::ofstream _ofs(filename, std::ios::binary | std::ios::out);
auto filepath = get_buffered_storage_filename(get_root_process_id(), getpid());
std::ofstream _ofs(filepath, std::ios::binary | std::ios::out);
if(!_ofs)
{
std::stringstream _ss;
_ss << "Error opening file for writing: " << filename;
_ss << "Error opening file for writing: " << filepath;
throw std::runtime_error(_ss.str());
}
@@ -114,9 +122,23 @@ buffer_storage::buffer_storage(pid_t _pid)
});
}
buffer_storage::~buffer_storage()
{
shutdown();
if(m_thread_pool && m_thread_pool->is_alive())
{
m_thread_pool->destroy_threadpool();
}
}
void
buffer_storage::shutdown()
{
if(!m_running)
{
return;
}
ROCPROFSYS_DEBUG("Buffer storage shutting down..");
m_running = false;
m_shutdown_condition.notify_all();
@@ -131,7 +153,11 @@ buffer_storage::shutdown()
std::mutex _exit_mutex;
std::unique_lock _exit_lock{ _exit_mutex };
m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; });
m_thread_pool->destroy_threadpool();
if(m_thread_pool && m_thread_pool->is_alive())
{
m_thread_pool->destroy_threadpool();
}
}
void
+9 -6
Просмотреть файл
@@ -36,7 +36,6 @@
#include <stdexcept>
#include <stdint.h>
#include <string.h>
#include <thread>
#include <type_traits>
#include <unistd.h>
@@ -62,9 +61,10 @@ public:
}
constexpr bool is_supported_type = (supported_types::is_supported<T> && ...);
static_assert(is_supported_type, "Supported types are const char*, char*, "
"unsigned long, unsigned int, long, unsigned "
"char, std::vector<unsigned char> and int.");
static_assert(is_supported_type,
"Supported types are const char*, char*, "
"unsigned long, unsigned int, long, unsigned "
"char, std::vector<unsigned char>, double, and int.");
auto arg_size = get_size(values...);
auto total_size = arg_size + sizeof(type) + sizeof(size_t);
@@ -102,9 +102,12 @@ public:
(store_value(values), ...);
}
void start_flushing_thread(pid_t pid);
~buffer_storage();
private:
friend class cache_manager;
buffer_storage(pid_t _pid);
buffer_storage();
void shutdown();
bool is_running() const;
void fragment_memory();
@@ -119,7 +122,7 @@ private:
};
using supported_types = typelist<const char*, char*, uint64_t, int32_t, uint32_t,
std::vector<uint8_t>, uint8_t, int64_t>;
std::vector<uint8_t>, uint8_t, int64_t, double>;
template <typename T>
static constexpr bool is_string_literal_v =
+147 -24
Просмотреть файл
@@ -21,15 +21,91 @@
// SOFTWARE.
#include "cache_manager.hpp"
#include "agent_manager.hpp"
#include "core/config.hpp"
#include "core/trace_cache/storage_parser.hpp"
#include "debug.hpp"
#include "library/runtime.hpp"
#include "trace_cache/cache_utility.hpp"
#include "trace_cache/metadata_registry.hpp"
#include "trace_cache/rocpd_post_processing.hpp"
#include <algorithm>
#include <memory>
#include <vector>
namespace rocprofsys
{
namespace trace_cache
{
namespace
{
std::vector<std::string>
list_dir_files(const std::string& path)
{
DIR* dir = opendir(path.c_str());
if(dir == nullptr)
{
ROCPROFSYS_THROW("Error opening directory: %s", path.c_str());
}
std::vector<std::string> result{};
dirent* entry;
while((entry = readdir(dir)) != nullptr)
{
if(std::string(entry->d_name) != "." && std::string(entry->d_name) != "..")
{
result.emplace_back(entry->d_name);
}
}
closedir(dir);
return result;
}
struct cache_files
{
std::string buff_storage;
std::string metadata;
};
std::map<pid_t, cache_files>
get_cache_files()
{
const auto root_pid = get_root_process_id();
const auto tmp_files = list_dir_files("/tmp/");
std::map<int, cache_files> cache_map{};
auto parse_and_fill_cache = [&](const std::string& filename) {
const std::regex buff_regex(R"(buffered_storage_(\d+)_(\d+)\.bin)");
const std::regex meta_regex(R"(metadata_(\d+)_(\d+)\.json)");
std::smatch match;
if(std::regex_match(filename, match, buff_regex))
{
int parent_pid = std::stoi(match[1]);
int pid = std::stoi(match[2]);
if(parent_pid == root_pid)
{
cache_map[pid].buff_storage = "/tmp/" + filename;
}
}
else if(std::regex_match(filename, match, meta_regex))
{
int parent_pid = std::stoi(match[1]);
int pid = std::stoi(match[2]);
if(parent_pid == root_pid)
{
cache_map[pid].metadata = "/tmp/" + filename;
}
}
};
std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache);
return cache_map;
}
} // namespace
cache_manager&
cache_manager::get_instance()
@@ -38,35 +114,82 @@ cache_manager::get_instance()
return instance;
}
cache_manager::cache_manager()
: m_postprocessing{ m_metadata }
{
m_postprocessing.register_parser_callback(m_parser);
}
void
cache_manager::post_process()
cache_manager::post_process_bulk()
{
if(m_storage.is_running())
if(is_root_process())
{
ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down "
"cache storage. Calling shutdown explicitly..\n");
shutdown();
}
if(m_storage.is_running())
{
ROCPROFSYS_WARNING(2,
"Postprocessing called without previously shutting down "
"cache storage. Calling shutdown explicitly..\n");
shutdown();
}
if(get_use_rocpd())
{
ROCPROFSYS_PRINT(
"Generating rocpd with collected data. This may take a while..\n");
}
post_process_metadata();
m_parser.consume_storage();
}
if(get_use_rocpd())
{
ROCPROFSYS_PRINT(
"Generating rocpd with collected data. This may take a while..\n");
void
cache_manager::post_process_metadata()
{
m_postprocessing.post_process_metadata();
auto _cache_files = get_cache_files();
std::vector<std::thread> rocpd_threads;
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
rocpd_threads.emplace_back([this]() {
auto pid = getpid();
auto ppid = get_root_process_id();
rocpd_post_processing _post_processing(
m_metadata, get_agent_manager_instance(), pid, ppid);
storage_parser _parser(
get_buffered_storage_filename(get_root_process_id(), getpid()));
_post_processing.register_parser_callback(_parser);
_post_processing.post_process_metadata();
_parser.consume_storage();
});
for(const auto& [pid, files] : _cache_files)
{
if(!files.buff_storage.empty() && !files.metadata.empty())
{
rocpd_threads.emplace_back([pid = pid, files = files]() {
ROCPROFSYS_DEBUG(
"Creating database for [%d] from buffered storage "
"file: %s and from metadata file: %s\n",
pid, files.buff_storage.c_str(), files.metadata.c_str());
std::vector<std::shared_ptr<agent>> _agents;
metadata_registry _metadata;
auto res = _metadata.load_from_file(files.metadata, _agents);
if(!res)
{
ROCPROFSYS_WARNING(0,
"Load from file for metadata failed: %s\n",
files.metadata.c_str());
return;
}
agent_manager _agent_manager{ _agents };
auto ppid = get_root_process_id();
rocpd_post_processing _post_processing(_metadata, _agent_manager,
pid, ppid);
storage_parser _parser(files.buff_storage);
_post_processing.register_parser_callback(_parser);
_post_processing.post_process_metadata();
_parser.consume_storage();
std::remove(files.metadata.c_str()); // Remove metadata file
});
}
}
for(auto& thread : rocpd_threads)
{
thread.join();
}
}
}
}
void
+6 -9
Просмотреть файл
@@ -23,7 +23,6 @@
#pragma once
#include "buffer_storage.hpp"
#include "core/trace_cache/rocpd_post_processing.hpp"
#include "metadata_registry.hpp"
#include "storage_parser.hpp"
@@ -37,24 +36,22 @@ class cache_manager
public:
static cache_manager& get_instance();
buffer_storage& get_buffer_storage() { return m_storage; }
metadata_registry& get_metadata_regsitry() { return m_metadata; }
metadata_registry& get_metadata_registry() { return m_metadata; }
void shutdown();
void post_process();
void post_process_bulk();
private:
void post_process_metadata();
cache_manager();
cache_manager() = default;
buffer_storage m_storage{ getpid() };
metadata_registry m_metadata;
storage_parser m_parser{ getpid() };
rocpd_post_processing m_postprocessing;
buffer_storage m_storage;
metadata_registry m_metadata;
};
inline metadata_registry&
get_metadata_registry()
{
return cache_manager::get_instance().get_metadata_regsitry();
return cache_manager::get_instance().get_metadata_registry();
}
inline buffer_storage&
+13 -1
Просмотреть файл
@@ -21,6 +21,7 @@
// SOFTWARE.
#pragma once
#include "library/runtime.hpp"
#include "sample_type.hpp"
#include <array>
#include <string>
@@ -33,7 +34,18 @@ namespace trace_cache
{
constexpr size_t buffer_size = 100 * tim::units::megabyte;
constexpr size_t flush_threshold = 80 * tim::units::megabyte;
const auto filename = "/tmp/buffered_storage_" + std::to_string(getpid()) + ".bin";
const auto tmp_directory = std::string{ "/tmp/" };
const auto get_buffered_storage_filename = [](const int& ppid, const int& pid) {
return std::string{ tmp_directory + "buffered_storage_" + std::to_string(ppid) + "_" +
std::to_string(pid) + ".bin" };
};
const auto get_metadata_filepath = [](const int& ppid, const int& pid) {
return std::string{ tmp_directory + "metadata_" + std::to_string(ppid) + "_" +
std::to_string(pid) + ".json" };
};
constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t);
using buffer_array_t = std::array<uint8_t, buffer_size>;
+477 -2
Просмотреть файл
@@ -21,10 +21,15 @@
// SOFTWARE.
#include "metadata_registry.hpp"
#include "agent_manager.hpp"
#include "core/debug.hpp"
#include <algorithm>
#include <cstdint>
#include <fstream>
#include <nlohmann/json.hpp>
namespace rocprofsys
{
namespace trace_cache
@@ -51,6 +56,428 @@ assign_set_to_vector(T& result)
{
return [&result](const auto& _data) { result.assign(_data.cbegin(), _data.cend()); };
}
nlohmann::json
to_json(const info::process& process)
{
nlohmann::json result;
result["pid"] = process.pid;
result["ppid"] = process.ppid;
result["command"] = process.command;
result["start"] = process.start;
result["end"] = process.end;
return result;
}
info::process
from_json_process(const nlohmann::json& _json)
{
info::process p;
p.pid = _json["pid"].get<pid_t>();
p.ppid = _json["ppid"].get<pid_t>();
p.command = _json["command"].get<std::string>();
p.start = _json["start"].get<int32_t>();
p.end = _json["end"].get<int32_t>();
return p;
}
nlohmann::json
to_json(const info::pmc& pmc)
{
nlohmann::json result;
result["type"] = static_cast<int32_t>(pmc.type);
result["agent_type_index"] = static_cast<int>(pmc.agent_type_index);
result["target_arch"] = pmc.target_arch;
result["event_code"] = static_cast<int>(pmc.event_code);
result["instance_id"] = static_cast<int>(pmc.instance_id);
result["name"] = pmc.name;
result["symbol"] = pmc.symbol;
result["description"] = pmc.description;
result["long_description"] = pmc.long_description;
result["component"] = pmc.component;
result["units"] = pmc.units;
result["value_type"] = pmc.value_type;
result["block"] = pmc.block;
result["expression"] = pmc.expression;
result["is_constant"] = pmc.is_constant;
result["is_derived"] = pmc.is_derived;
result["extdata"] = pmc.extdata;
return result;
}
info::pmc
from_json_pmc(const nlohmann::json& _json)
{
info::pmc p;
p.type = static_cast<agent_type>(_json["type"].get<int32_t>());
p.agent_type_index = _json["agent_type_index"].get<int32_t>();
p.target_arch = _json["target_arch"].get<std::string>();
p.event_code = _json["event_code"].get<int32_t>();
p.instance_id = _json["instance_id"].get<int32_t>();
p.name = _json["name"].get<std::string>();
p.symbol = _json["symbol"].get<std::string>();
p.description = _json["description"].get<std::string>();
p.long_description = _json["long_description"].get<std::string>();
p.component = _json["component"].get<std::string>();
p.units = _json["units"].get<std::string>();
p.value_type = _json["value_type"].get<std::string>();
p.block = _json["block"].get<std::string>();
p.expression = _json["expression"].get<std::string>();
p.is_constant = _json["is_constant"].get<int32_t>();
p.is_derived = _json["is_derived"].get<int32_t>();
p.extdata = _json["extdata"].get<std::string>();
return p;
}
nlohmann::json
to_json(const info::thread& thread)
{
nlohmann::json result;
result["parent_process_id"] = thread.parent_process_id;
result["process_id"] = thread.process_id;
result["thread_id"] = static_cast<int32_t>(thread.thread_id);
result["start"] = thread.start;
result["end"] = thread.end;
result["extdata"] = thread.extdata;
return result;
}
info::thread
from_json_thread(const nlohmann::json& _json)
{
info::thread t;
t.parent_process_id = _json["parent_process_id"].get<int32_t>();
t.process_id = _json["process_id"].get<int32_t>();
t.thread_id = _json["thread_id"].get<int32_t>();
t.start = _json["start"].get<int32_t>();
t.end = _json["end"].get<int32_t>();
t.extdata = _json["extdata"].get<std::string>();
return t;
}
nlohmann::json
to_json(const info::track& track)
{
nlohmann::json result;
result["track_name"] = track.track_name;
if(track.thread_id.has_value())
{
result["thread_id"] = static_cast<int32_t>(track.thread_id.value());
}
else
{
result["thread_id"] = nullptr;
}
result["extdata"] = track.extdata;
return result;
}
info::track
from_json_track(const nlohmann::json& _json)
{
info::track t;
t.track_name = _json["track_name"].get<std::string>();
if(_json["thread_id"].is_null())
{
t.thread_id = std::nullopt;
}
else
{
t.thread_id = _json["thread_id"].get<int32_t>();
}
t.extdata = _json["extdata"].get<std::string>();
return t;
}
#if ROCPROFSYS_USE_ROCM
nlohmann::json
to_json(const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
{
nlohmann::json result;
result["code_object_id"] = static_cast<long long>(code_object.code_object_id);
result["uri"] = std::string(code_object.uri);
result["load_base"] = static_cast<long long>(code_object.load_base);
result["load_size"] = static_cast<long long>(code_object.load_size);
result["load_delta"] = static_cast<long long>(code_object.load_delta);
result["storage_type"] = static_cast<int>(code_object.storage_type);
# if(ROCPROFILER_VERSION >= 600)
result["agent_id_handle"] = static_cast<long long>(code_object.agent_id.handle);
# else
result["agent_id_handle"] = static_cast<long long>(code_object.rocp_agent.handle);
# endif
return result;
}
rocprofiler_callback_tracing_code_object_load_data_t
from_json_code_object(const nlohmann::json& _json)
{
rocprofiler_callback_tracing_code_object_load_data_t co = {};
co.code_object_id = _json["code_object_id"].get<long long>();
auto uri_str = _json["uri"].get<std::string>();
co.uri = new char[uri_str.size() + 1];
strncpy(const_cast<char*>(co.uri), uri_str.c_str(), uri_str.size() + 1);
co.load_base = _json["load_base"].get<long long>();
co.load_size = _json["load_size"].get<long long>();
co.load_delta = _json["load_delta"].get<long long>();
co.storage_type = static_cast<rocprofiler_code_object_storage_type_t>(
_json["storage_type"].get<int>());
auto handle = _json["agent_id_handle"].get<long long>();
# if(ROCPROFILER_VERSION >= 600)
co.agent_id.handle = handle;
# else
co.rocp_agent.handle = handle;
# endif
return co;
}
nlohmann::json
to_json(const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
kernel_symbol)
{
nlohmann::json result;
result["kernel_id"] = static_cast<long long>(kernel_symbol.kernel_id);
result["code_object_id"] = static_cast<long long>(kernel_symbol.code_object_id);
result["kernel_name"] = std::string(kernel_symbol.kernel_name);
result["kernel_object"] = static_cast<long long>(kernel_symbol.kernel_object);
result["kernarg_segment_size"] = static_cast<int>(kernel_symbol.kernarg_segment_size);
result["kernarg_segment_alignment"] =
static_cast<int>(kernel_symbol.kernarg_segment_alignment);
result["group_segment_size"] = static_cast<int>(kernel_symbol.group_segment_size);
result["private_segment_size"] = static_cast<int>(kernel_symbol.private_segment_size);
result["sgpr_count"] = static_cast<int>(kernel_symbol.sgpr_count);
result["arch_vgpr_count"] = static_cast<int>(kernel_symbol.arch_vgpr_count);
result["accum_vgpr_count"] = static_cast<int>(kernel_symbol.accum_vgpr_count);
return result;
}
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t
from_json_kernel_symbol(const nlohmann::json& _json)
{
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t ks = {};
ks.kernel_id = _json["kernel_id"].get<long long>();
ks.code_object_id = _json["code_object_id"].get<long long>();
auto kernel_name_str = _json["kernel_name"].get<std::string>();
ks.kernel_name = new char[kernel_name_str.size() + 1];
strncpy(const_cast<char*>(ks.kernel_name), kernel_name_str.c_str(),
sizeof(ks.kernel_name) + 1);
ks.kernel_object = _json["kernel_object"].get<long long>();
ks.kernarg_segment_size = _json["kernarg_segment_size"].get<int>();
ks.kernarg_segment_alignment = _json["kernarg_segment_alignment"].get<int>();
ks.group_segment_size = _json["group_segment_size"].get<int>();
ks.private_segment_size = _json["private_segment_size"].get<int>();
ks.sgpr_count = _json["sgpr_count"].get<int>();
ks.arch_vgpr_count = _json["arch_vgpr_count"].get<int>();
ks.accum_vgpr_count = _json["accum_vgpr_count"].get<int>();
return ks;
}
#endif
nlohmann::json
to_json(const agent& _agent)
{
nlohmann::json result;
result["type"] = _agent.type;
result["handle"] = _agent.handle;
result["device_id"] = _agent.device_id;
result["node_id"] = _agent.node_id;
result["logical_node_id"] = _agent.logical_node_id;
result["logical_node_type_id"] = _agent.logical_node_type_id;
result["name"] = _agent.name;
result["model_name"] = _agent.model_name;
result["vendor_name"] = _agent.vendor_name;
result["product_name"] = _agent.product_name;
result["device_type_index"] = _agent.device_type_index;
return result;
}
std::shared_ptr<agent>
from_json_agent(const nlohmann::json& _json)
{
auto a = std::make_shared<agent>();
a->type = _json["type"].get<agent_type>();
a->handle = _json["handle"].get<uint64_t>();
a->device_id = _json["device_id"].get<int32_t>();
a->node_id = _json["node_id"].get<int32_t>();
a->logical_node_id = _json["logical_node_id"].get<int32_t>();
a->logical_node_type_id = _json["logical_node_type_id"].get<int32_t>();
a->name = _json["name"].get<std::string>();
a->model_name = _json["model_name"].get<std::string>();
a->vendor_name = _json["vendor_name"].get<std::string>();
a->product_name = _json["product_name"].get<std::string>();
a->device_type_index = _json["device_type_index"].get<int32_t>();
return a;
}
nlohmann::json
to_json(const metadata_registry& _registry,
const std::vector<std::shared_ptr<agent>>& _agents)
{
nlohmann::json result;
auto process_info = _registry.get_process_info();
result["process"] = to_json(process_info);
auto pmc_list = _registry.get_pmc_info_list();
nlohmann::json pmc_array = nlohmann::json::array();
for(const auto& pmc : pmc_list)
{
pmc_array.push_back(to_json(pmc));
}
result["pmc_infos"] = pmc_array;
auto thread_list = _registry.get_thread_info_list();
nlohmann::json thread_array = nlohmann::json::array();
for(const auto& thread : thread_list)
{
thread_array.push_back(to_json(thread));
}
result["threads"] = thread_array;
auto track_list = _registry.get_track_info_list();
nlohmann::json track_array = nlohmann::json::array();
for(const auto& track : track_list)
{
track_array.push_back(to_json(track));
}
result["tracks"] = track_array;
auto queue_list = _registry.get_queue_list();
for(const auto& queue : queue_list)
{
result["queues"].push_back(static_cast<long long>(queue));
}
auto stream_list = _registry.get_stream_list();
for(const auto& stream : stream_list)
{
result["streams"].push_back(static_cast<long long>(stream));
}
auto string_list = _registry.get_string_list();
for(const auto& str : string_list)
{
result["strings"].push_back(str);
}
#if ROCPROFSYS_USE_ROCM
auto code_object_list = _registry.get_code_object_list();
nlohmann::json code_object_array = nlohmann::json::array();
for(const auto& code_object : code_object_list)
{
code_object_array.push_back(to_json(code_object));
}
result["code_objects"] = code_object_array;
auto kernel_symbol_list = _registry.get_kernel_symbol_list();
nlohmann::json kernel_symbol_array = nlohmann::json::array();
for(const auto& kernel_symbol : kernel_symbol_list)
{
kernel_symbol_array.push_back(to_json(kernel_symbol));
}
result["kernel_symbols"] = kernel_symbol_array;
#endif
for(const auto& agent : _agents)
{
if(agent == nullptr)
{
continue;
}
result["agents"].push_back(to_json(*agent));
}
return result;
}
void
from_json(metadata_registry& _registry, std::vector<std::shared_ptr<agent>>& _agents,
const nlohmann::json& _json)
{
const auto& process_json = _json["process"];
auto process = from_json_process(process_json);
_registry.set_process(process);
const auto& pmc_array = _json["pmc_infos"];
for(const auto& pmc_json : pmc_array)
{
auto pmc = from_json_pmc(pmc_json);
_registry.add_pmc_info(pmc);
}
const auto& thread_array = _json["threads"];
for(const auto& thread_json : thread_array)
{
auto thread = from_json_thread(thread_json);
_registry.add_thread_info(thread);
}
const auto& track_array = _json["tracks"];
for(const auto& track_json : track_array)
{
auto track = from_json_track(track_json);
_registry.add_track(track);
}
const auto& queue_array = _json["queues"];
for(const auto& queue_json : queue_array)
{
auto handle = queue_json.get<long long>();
_registry.add_queue(static_cast<uint64_t>(handle));
}
const auto& stream_array = _json["streams"];
for(const auto& stream_json : stream_array)
{
auto handle = stream_json.get<long long>();
_registry.add_stream(static_cast<uint64_t>(handle));
}
const auto& string_array = _json["strings"];
for(const auto& string_json : string_array)
{
auto str = string_json.get<std::string>();
_registry.add_string(str);
}
#if ROCPROFSYS_USE_ROCM
if(_json.contains("code_objects"))
{
const auto& code_object_array = _json["code_objects"];
for(const auto& code_object_json : code_object_array)
{
auto code_object = from_json_code_object(code_object_json);
_registry.add_code_object(code_object);
}
}
if(_json.contains("kernel_symbols"))
{
const auto& kernel_symbol_array = _json["kernel_symbols"];
for(const auto& kernel_symbol_json : kernel_symbol_array)
{
auto kernel_symbol = from_json_kernel_symbol(kernel_symbol_json);
_registry.add_kernel_symbol(kernel_symbol);
}
}
#endif
if(!_agents.empty())
{
ROCPROFSYS_WARNING(0, "Given agents vector is not empty. Clearing it..");
_agents.clear();
}
if(_json.contains("agents"))
{
const auto& agents_array = _json["agents"];
for(const auto& agent_json : agents_array)
{
_agents.push_back(from_json_agent(agent_json));
}
}
}
} // namespace
void
@@ -279,8 +706,8 @@ metadata_registry::get_kernel_symbol_list() const
return result;
}
// As the underlying implementation of callback_name_info_t resizes the category storage
// during emplace, this special method is required
// As the underlying implementation of callback_name_info_t resizes the category
// storage during emplace, this special method is required
void
metadata_registry::overwrite_callback_names(
std::initializer_list<
@@ -404,5 +831,53 @@ metadata_registry::metadata_registry()
#endif
}
bool
metadata_registry::save_to_file(const std::string& filepath,
const std::vector<std::shared_ptr<agent>>& _agents) const
{
try
{
auto json = to_json(*this, _agents);
auto json_string = json.dump();
std::ofstream file(filepath);
if(!file.is_open())
{
return false;
}
file << json_string;
file.close();
return true;
} catch(const std::exception& e)
{
return false;
}
}
bool
metadata_registry::load_from_file(const std::string& filepath,
std::vector<std::shared_ptr<agent>>& _agents)
{
try
{
std::ifstream file(filepath);
if(!file.is_open())
{
return false;
}
nlohmann::json json;
file >> json;
file.close();
rocprofsys::trace_cache::from_json(*this, _agents, json);
return true;
} catch(const std::exception& e)
{
return false;
}
}
} // namespace trace_cache
} // namespace rocprofsys
+8
Просмотреть файл
@@ -30,6 +30,7 @@
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <optional>
#if ROCPROFSYS_USE_ROCM > 0
# include <rocprofiler-sdk/callback_tracing.h>
@@ -56,6 +57,8 @@ struct process
pid_t pid; // < Unique
pid_t ppid;
std::string command;
uint32_t start;
uint32_t end;
};
template <typename Category>
@@ -197,6 +200,11 @@ struct metadata_registry
std::vector<uint64_t> get_stream_list() const;
std::vector<std::string_view> get_string_list() const;
bool save_to_file(const std::string& filepath,
const std::vector<std::shared_ptr<agent>>& _agents) const;
bool load_from_file(const std::string& filepath,
std::vector<std::shared_ptr<agent>>& _agents);
#if ROCPROFSYS_USE_ROCM > 0
void add_code_object(
const rocprofiler_callback_tracing_code_object_load_data_t& code_object);
+152 -113
Просмотреть файл
@@ -22,17 +22,18 @@
#include "trace_cache/rocpd_post_processing.hpp"
#include "agent_manager.hpp"
#include "common.hpp"
#include "config.hpp"
#include "debug.hpp"
#include "library/thread_info.hpp"
#include "node_info.hpp"
#include "rocpd/data_processor.hpp"
#include "rocpd/data_storage/database.hpp"
#include "trace_cache/metadata_registry.hpp"
#include "trace_cache/sample_type.hpp"
#include "trace_cache/storage_parser.hpp"
#include <cstdint>
#include <limits>
#include <memory>
#include <sstream>
#include <stdexcept>
#include <string>
@@ -49,11 +50,6 @@ namespace trace_cache
{
namespace
{
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
#if ROCPROFSYS_USE_ROCM > 0
auto
@@ -69,6 +65,12 @@ get_handle_from_code_object(
#endif
} // namespace
std::shared_ptr<rocpd::data_processor>
rocpd_post_processing::get_data_processor() const
{
return m_data_processor;
}
postprocessing_callback
rocpd_post_processing::get_kernel_dispatch_callback() const
{
@@ -76,17 +78,16 @@ rocpd_post_processing::get_kernel_dispatch_callback() const
#if ROCPROFSYS_USE_ROCM > 0
auto _kds = static_cast<const struct kernel_dispatch_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& agent_manager = agent_manager::get_instance();
auto data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto agent_primary_key =
agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id;
m_agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id;
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_kds.thread_id);
data_processor->map_thread_id_to_primary_key(_kds.thread_id);
auto category_id = data_processor.insert_string(
auto category_id = data_processor->insert_string(
trait::name<category::rocm_kernel_dispatch>::value);
auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id);
@@ -97,17 +98,17 @@ rocpd_post_processing::get_kernel_dispatch_callback() const
return;
}
auto region_name_primary_key = data_processor.insert_string(
auto region_name_primary_key = data_processor->insert_string(
tim::demangle(kernel_symbol->kernel_name).c_str());
auto stack_id = _kds.correlation_id_internal;
auto parent_stack_id = _kds.correlation_id_ancestor;
auto correlation_id = 0;
auto event_id = data_processor.insert_event(category_id, stack_id,
parent_stack_id, correlation_id);
auto event_id = data_processor->insert_event(category_id, stack_id,
parent_stack_id, correlation_id);
data_processor.insert_kernel_dispatch(
data_processor->insert_kernel_dispatch(
n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id,
_kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle,
_kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size,
@@ -125,36 +126,35 @@ rocpd_post_processing::get_memory_copy_callback() const
#if ROCPROFSYS_USE_ROCM > 0
auto _mcs = static_cast<const struct memory_copy_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& agent_manager = agent_manager::get_instance();
auto data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto _name = std::string{ m_metadata.get_buffer_name_info().at(
static_cast<rocprofiler_buffer_tracing_kind_t>(_mcs.kind),
static_cast<rocprofiler_tracing_operation_t>(_mcs.operation)) };
auto name_primary_key = data_processor.insert_string(_name.c_str());
auto name_primary_key = data_processor->insert_string(_name.c_str());
auto category_primary_key =
data_processor.insert_string(trait::name<category::rocm_memory_copy>::value);
data_processor->insert_string(trait::name<category::rocm_memory_copy>::value);
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_mcs.thread_id);
data_processor->map_thread_id_to_primary_key(_mcs.thread_id);
auto dst_agent_primary_key =
agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id;
m_agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id;
auto src_agent_primary_key =
agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id;
m_agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id;
auto stack_id = _mcs.correlation_id_internal;
auto parent_stack_id = _mcs.correlation_id_ancestor;
auto correlation_id = 0;
auto queue_id = 0;
auto event_primary_key = data_processor.insert_event(
auto event_primary_key = data_processor->insert_event(
category_primary_key, stack_id, parent_stack_id, correlation_id);
data_processor.insert_memory_copy(
data_processor->insert_memory_copy(
n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp,
_mcs.end_timestamp, name_primary_key, dst_agent_primary_key,
_mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value,
@@ -213,12 +213,11 @@ rocpd_post_processing::get_memory_allocate_callback() const
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
# if ROCPROFSYS_USE_ROCM > 0
auto _mas = static_cast<const struct memory_allocate_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& agent_manager = agent_manager::get_instance();
auto data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_mas.thread_id);
data_processor->map_thread_id_to_primary_key(_mas.thread_id);
auto agent_primary_key = std::optional<uint64_t>{};
const auto invalid_context = ROCPROFILER_CONTEXT_NONE;
@@ -226,7 +225,7 @@ rocpd_post_processing::get_memory_allocate_callback() const
{
{
agent_primary_key =
agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id;
m_agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id;
}
const auto* _name = m_metadata.get_buffer_name_info().at(
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
@@ -239,13 +238,13 @@ rocpd_post_processing::get_memory_allocate_callback() const
auto correlation_id = 0;
auto queue_id = 0;
auto category_primary_key = data_processor.insert_string(
auto category_primary_key = data_processor->insert_string(
trait::name<category::rocm_memory_allocate>::value);
auto event_primary_key = data_processor.insert_event(
auto event_primary_key = data_processor->insert_event(
category_primary_key, stack_id, parent_stack_id, correlation_id);
data_processor.insert_memory_alloc(
data_processor->insert_memory_alloc(
n_info.id, process.pid, thread_primary_key, agent_primary_key,
type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp,
_mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle,
@@ -306,73 +305,100 @@ rocpd_post_processing::get_region_callback() const
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
#if ROCPROFSYS_USE_ROCM > 0
auto _rs = static_cast<const struct region_sample&>(parsed);
auto& data_processor = get_data_processor();
auto data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_rs.thread_id);
data_processor->map_thread_id_to_primary_key(_rs.thread_id);
auto name_primary_key = data_processor.insert_string(_rs.name.c_str());
auto category_primary_key = data_processor.insert_string(_rs.category.c_str());
auto name_primary_key = data_processor->insert_string(_rs.name.c_str());
auto category_primary_key = data_processor->insert_string(_rs.category.c_str());
size_t stack_id = _rs.correlation_id_internal;
size_t parent_stack_id = _rs.correlation_id_ancestor;
size_t correlation_id = 0;
auto event_primary_key =
data_processor.insert_event(category_primary_key, stack_id, parent_stack_id,
correlation_id, _rs.call_stack.c_str());
data_processor->insert_event(category_primary_key, stack_id, parent_stack_id,
correlation_id, _rs.call_stack.c_str());
auto args = parse_args(_rs.args_str);
for(const auto& arg : args)
{
data_processor.insert_args(event_primary_key, arg.arg_number,
arg.arg_type.c_str(), arg.arg_name.c_str(),
arg.arg_value.c_str());
data_processor->insert_args(event_primary_key, arg.arg_number,
arg.arg_type.c_str(), arg.arg_name.c_str(),
arg.arg_value.c_str());
}
data_processor.insert_region(n_info.id, process.pid, thread_primary_key,
_rs.start_timestamp, _rs.end_timestamp,
name_primary_key, event_primary_key);
data_processor->insert_region(n_info.id, process.pid, thread_primary_key,
_rs.start_timestamp, _rs.end_timestamp,
name_primary_key, event_primary_key);
#endif
};
}
postprocessing_callback
rocpd_post_processing::get_backtrace_sample_callback() const
{
return [&](const storage_parsed_type_base& parsed) {
auto _bts = static_cast<const struct backtrace_region_sample&>(parsed);
auto data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto thread_primary_key =
data_processor->map_thread_id_to_primary_key(_bts.thread_id);
auto name_primary_key = data_processor->insert_string(_bts.name.c_str());
auto category_primary_key = data_processor->insert_string(_bts.category.c_str());
auto event_primary_key = data_processor->insert_event(
category_primary_key, 0, 0, 0, _bts.call_stack.c_str(),
_bts.line_info.c_str(), _bts.extdata.c_str());
data_processor->insert_region(n_info.id, process.pid, thread_primary_key,
_bts.start_timestamp, _bts.end_timestamp,
name_primary_key, event_primary_key);
data_processor->insert_sample(_bts.track_name.c_str(), _bts.start_timestamp,
event_primary_key);
};
}
postprocessing_callback
rocpd_post_processing::get_in_time_sample_callback() const
{
return [&](const storage_parsed_type_base& parsed) {
auto _its = static_cast<const struct in_time_sample&>(parsed);
auto& data_processor = get_data_processor();
auto track_primary_key = data_processor.insert_string(_its.track_name.c_str());
auto _its = static_cast<const struct in_time_sample&>(parsed);
auto data_processor = get_data_processor();
auto track_primary_key = data_processor->insert_string(_its.track_name.c_str());
auto event_id = data_processor.insert_event(
auto event_id = data_processor->insert_event(
track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id,
_its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str());
data_processor.insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id,
"{}");
data_processor->insert_sample(_its.track_name.c_str(), _its.timestamp_ns,
event_id, "{}");
};
}
postprocessing_callback
rocpd_post_processing::get_pmc_event_with_sample_callback() const
{
return [&](const storage_parsed_type_base& parsed) {
auto _pmc = static_cast<const struct pmc_event_with_sample&>(parsed);
auto& data_processor = get_data_processor();
auto track_primary_key = data_processor.insert_string(_pmc.track_name.c_str());
auto _pmc = static_cast<const struct pmc_event_with_sample&>(parsed);
auto data_processor = get_data_processor();
auto track_primary_key = data_processor->insert_string(_pmc.track_name.c_str());
auto& agent_manager = agent_manager::get_instance();
auto agent_primary_key =
agent_manager.get_agent_by_handle(_pmc.agent_handle).base_id;
auto agent_primary_key =
m_agent_manager
.get_agent_by_id(_pmc.device_id,
static_cast<agent_type>(_pmc.device_type))
.base_id;
auto event_id = data_processor.insert_event(
auto event_id = data_processor->insert_event(
track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id,
_pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str());
data_processor.insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id,
"{}");
data_processor->insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns,
event_id, "{}");
data_processor.insert_pmc_event(event_id, agent_primary_key,
_pmc.pmc_info_name.c_str(), _pmc.value);
data_processor->insert_pmc_event(event_id, agent_primary_key,
_pmc.pmc_info_name.c_str(), _pmc.value);
};
}
@@ -450,22 +476,21 @@ rocpd_post_processing::get_amd_smi_sample_callback() const
return [&](const storage_parsed_type_base& parsed) {
auto _amd_smi = static_cast<const struct amd_smi_sample&>(parsed);
auto& data_processor = get_data_processor();
auto data_processor = get_data_processor();
const auto* _name = trait::name<category::amd_smi>::value;
auto name_primary_key = data_processor.insert_string(_name);
auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0);
auto name_primary_key = data_processor->insert_string(_name);
auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0);
auto& _agent_manager = agent_manager::get_instance();
auto base_id =
_agent_manager.get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU)
auto base_id =
m_agent_manager.get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU)
.base_id;
auto insert_event_and_sample = [&](bool enabled, const char* pmc_name,
const char* track_name, double value) {
if(!enabled) return;
data_processor.insert_pmc_event(event_id, base_id, pmc_name, value);
data_processor.insert_sample(track_name, _amd_smi.timestamp, event_id);
data_processor->insert_pmc_event(event_id, base_id, pmc_name, value);
data_processor->insert_sample(track_name, _amd_smi.timestamp, event_id);
};
using pos = trace_cache::amd_smi_sample::settings_positions;
@@ -591,20 +616,19 @@ rocpd_post_processing::get_cpu_freq_sample_callback() const
return [&](const storage_parsed_type_base& parsed) {
auto _cpu_freq_sample = static_cast<const struct cpu_freq_sample&>(parsed);
auto& data_processor = get_data_processor();
auto data_processor = get_data_processor();
const auto* _name = trait::name<category::cpu_freq>::value;
auto name_primary_key = data_processor.insert_string(_name);
auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0);
auto name_primary_key = data_processor->insert_string(_name);
auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0);
auto device_id = 0;
auto& agent_mngr = agent_manager::get_instance();
auto base_id =
agent_mngr.get_agent_by_type_index(device_id, agent_type::CPU).base_id;
auto base_id =
m_agent_manager.get_agent_by_type_index(device_id, agent_type::CPU).base_id;
auto insert_event_and_sample = [&](const char* name, double value) {
data_processor.insert_pmc_event(event_id, base_id, name, value);
data_processor.insert_sample(name, _cpu_freq_sample.timestamp, event_id);
data_processor->insert_pmc_event(event_id, base_id, name, value);
data_processor->insert_sample(name, _cpu_freq_sample.timestamp, event_id);
};
insert_event_and_sample(trait::name<category::process_page>::value,
@@ -635,8 +659,12 @@ rocpd_post_processing::get_cpu_freq_sample_callback() const
};
}
rocpd_post_processing::rocpd_post_processing(metadata_registry& md)
rocpd_post_processing::rocpd_post_processing(metadata_registry& md,
agent_manager& agent_mngr, int pid, int ppid)
: m_metadata(md)
, m_agent_manager(agent_mngr)
, m_data_processor(std::make_shared<rocpd::data_processor>(
std::make_shared<rocpd::data_storage::database>(pid, ppid)))
{}
void
@@ -663,7 +691,17 @@ rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser&
get_amd_smi_sample_callback());
parser.register_type_callback(entry_type::cpu_freq_sample,
get_cpu_freq_sample_callback());
ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..");
parser.register_type_callback(entry_type::backtrace_region_sample,
get_backtrace_sample_callback());
ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..\n");
parser.register_on_finished_callback(
std::make_unique<std::function<void()>>([this]() {
if(m_data_processor != nullptr)
{
m_data_processor->flush();
}
}));
#endif
}
@@ -675,25 +713,25 @@ rocpd_post_processing::post_process_metadata()
{
return;
}
ROCPROFSYS_DEBUG("Post processing metadata..");
auto& data_processor = get_data_processor();
auto& agent_mngr = agent_manager::get_instance();
auto n_info = node_info::get_instance();
ROCPROFSYS_DEBUG("Post processing metadata..\n");
auto data_processor = get_data_processor();
auto n_info = node_info::get_instance();
data_processor.insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(),
n_info.system_name.c_str(), n_info.node_name.c_str(),
n_info.release.c_str(), n_info.version.c_str(),
n_info.machine.c_str(), n_info.domain_name.c_str());
data_processor->insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(),
n_info.system_name.c_str(), n_info.node_name.c_str(),
n_info.release.c_str(), n_info.version.c_str(),
n_info.machine.c_str(), n_info.domain_name.c_str());
auto process_info = m_metadata.get_process_info();
data_processor.insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0,
0, 0, 0, process_info.command.c_str(), "{}");
data_processor->insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0,
0, process_info.start, process_info.end,
process_info.command.c_str(), "{}");
const auto& agents = agent_mngr.get_agents();
const auto& agents = m_agent_manager.get_agents();
int counter = 0;
for(const auto& rocpd_agent : agents)
{
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
auto _base_id = data_processor->insert_agent(
n_info.id, process_info.pid,
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++,
rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id,
@@ -705,7 +743,7 @@ rocpd_post_processing::post_process_metadata()
auto _string_list = m_metadata.get_string_list();
for(auto& _string : _string_list)
{
data_processor.insert_string(std::string(_string).c_str());
data_processor->insert_string(std::string(_string).c_str());
}
auto _thread_info_list = m_metadata.get_thread_info_list();
@@ -719,18 +757,18 @@ rocpd_post_processing::post_process_metadata()
{
auto thread_id =
track.thread_id.has_value()
? std::make_optional<size_t>(data_processor.map_thread_id_to_primary_key(
? std::make_optional<size_t>(data_processor->map_thread_id_to_primary_key(
track.thread_id.value()))
: std::nullopt;
data_processor.insert_track(track.track_name.c_str(), n_info.id, process_info.pid,
thread_id);
data_processor->insert_track(track.track_name.c_str(), n_info.id,
process_info.pid, thread_id);
}
auto _code_object_list = m_metadata.get_code_object_list();
for(const auto& code_object : _code_object_list)
{
auto dev_id =
agent_mngr.get_agent_by_handle(get_handle_from_code_object(code_object))
m_agent_manager.get_agent_by_handle(get_handle_from_code_object(code_object))
.base_id;
const char* strg_type = "UNKNOWN";
@@ -740,17 +778,17 @@ rocpd_post_processing::post_process_metadata()
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break;
default: break;
}
data_processor.insert_code_object(code_object.code_object_id, n_info.id,
process_info.pid, dev_id, code_object.uri,
code_object.load_base, code_object.load_size,
code_object.load_delta, strg_type);
data_processor->insert_code_object(code_object.code_object_id, n_info.id,
process_info.pid, dev_id, code_object.uri,
code_object.load_base, code_object.load_size,
code_object.load_delta, strg_type);
}
auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list();
for(const auto& kernel_symbol : _kernel_symbols_list)
{
auto kernel_name = tim::demangle(kernel_symbol.kernel_name);
data_processor.insert_kernel_symbol(
data_processor->insert_kernel_symbol(
kernel_symbol.kernel_id, n_info.id, process_info.pid,
kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(),
kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size,
@@ -758,7 +796,7 @@ rocpd_post_processing::post_process_metadata()
kernel_symbol.private_segment_size, kernel_symbol.sgpr_count,
kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count);
data_processor.insert_string(kernel_name.c_str());
data_processor->insert_string(kernel_name.c_str());
}
auto _queue_list = m_metadata.get_queue_list();
@@ -766,8 +804,8 @@ rocpd_post_processing::post_process_metadata()
{
std::stringstream ss;
ss << "Queue " << queue_handle;
data_processor.insert_queue_info(queue_handle, n_info.id, process_info.pid,
ss.str().c_str());
data_processor->insert_queue_info(queue_handle, n_info.id, process_info.pid,
ss.str().c_str());
}
auto _stream_list = m_metadata.get_stream_list();
@@ -775,8 +813,8 @@ rocpd_post_processing::post_process_metadata()
{
std::stringstream ss;
ss << "Stream " << stream_handle;
data_processor.insert_stream_info(stream_handle, n_info.id, process_info.pid,
ss.str().c_str());
data_processor->insert_stream_info(stream_handle, n_info.id, process_info.pid,
ss.str().c_str());
}
auto buffer_info_list = m_metadata.get_buffer_name_info();
@@ -784,7 +822,7 @@ rocpd_post_processing::post_process_metadata()
{
for(const auto& item : buffer_info.items())
{
data_processor.insert_string(*item.second);
data_processor->insert_string(*item.second);
}
}
@@ -793,7 +831,7 @@ rocpd_post_processing::post_process_metadata()
{
for(const auto& item : cb_info.items())
{
data_processor.insert_string(*item.second);
data_processor->insert_string(*item.second);
}
}
@@ -801,10 +839,11 @@ rocpd_post_processing::post_process_metadata()
for(const auto& pmc_info : pmc_info_list)
{
const auto agent_primary_key =
agent_mngr.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type)
m_agent_manager
.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type)
.base_id;
data_processor.insert_pmc_description(
data_processor->insert_pmc_description(
n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(),
pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(),
pmc_info.symbol.c_str(), pmc_info.description.c_str(),
@@ -820,7 +859,7 @@ rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info,
const node_info& n_info,
const info::process& process_info) const
{
const auto& extended_info = thread_info::get(t_info.thread_id, SequentTID);
const auto& extended_info = thread_info::get(t_info.thread_id, SystemTID);
if(extended_info.has_value())
{
t_info.start = extended_info->get_start();
@@ -829,9 +868,9 @@ rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info,
std::stringstream ss;
ss << "Thread " << t_info.thread_id;
get_data_processor().insert_thread_info(n_info.id, process_info.ppid,
process_info.pid, t_info.thread_id,
ss.str().c_str(), t_info.start, t_info.end);
get_data_processor()->insert_thread_info(n_info.id, process_info.ppid,
process_info.pid, t_info.thread_id,
ss.str().c_str(), t_info.start, t_info.end);
}
} // namespace trace_cache
+10 -2
Просмотреть файл
@@ -21,7 +21,9 @@
// SOFTWARE.
#pragma once
#include "agent_manager.hpp"
#include "core/node_info.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "core/trace_cache/storage_parser.hpp"
@@ -33,11 +35,14 @@ namespace trace_cache
class rocpd_post_processing
{
public:
rocpd_post_processing(metadata_registry& metadata);
rocpd_post_processing(metadata_registry& metadata, agent_manager& agent_mngr, int pid,
int ppid);
void register_parser_callback(storage_parser& parser);
void post_process_metadata();
std::shared_ptr<rocpd::data_processor> get_data_processor() const;
private:
using primary_key = size_t;
@@ -54,8 +59,11 @@ private:
postprocessing_callback get_pmc_event_with_sample_callback() const;
postprocessing_callback get_amd_smi_sample_callback() const;
postprocessing_callback get_cpu_freq_sample_callback() const;
postprocessing_callback get_backtrace_sample_callback() const;
metadata_registry& m_metadata;
metadata_registry& m_metadata;
agent_manager& m_agent_manager;
std::shared_ptr<rocpd::data_processor> m_data_processor;
};
} // namespace trace_cache
+41 -9
Просмотреть файл
@@ -26,7 +26,6 @@
#include <string>
#include <unistd.h>
#include <utility>
#include <variant>
#include <vector>
#if ROCPROFSYS_USE_ROCM > 0
@@ -149,15 +148,12 @@ struct region_sample : storage_parsed_type_base
uint64_t thread_id;
std::string name;
// Correlation fields
uint64_t correlation_id_internal;
uint64_t correlation_id_ancestor;
// Timing fields
uint64_t start_timestamp;
uint64_t end_timestamp;
// Additional fields
std::string call_stack;
std::string args_str;
std::string category;
@@ -177,9 +173,10 @@ struct in_time_sample : storage_parsed_type_base
struct pmc_event_with_sample : in_time_sample
{
size_t agent_handle;
uint32_t device_id;
uint8_t device_type;
std::string pmc_info_name;
size_t value;
double value;
};
struct amd_smi_sample : storage_parsed_type_base
@@ -219,6 +216,40 @@ struct cpu_freq_sample : storage_parsed_type_base
std::vector<uint8_t> freqs;
};
struct backtrace_region_sample : storage_parsed_type_base
{
backtrace_region_sample() = default;
backtrace_region_sample(uint32_t _type, uint64_t _thread_id, std::string _track_name,
std::string _name, uint64_t _start_timestamp,
uint64_t _end_timestamp, std::string _category,
std::string _call_stack, std::string _line_info,
std::string _extdata)
: type(_type)
, thread_id(_thread_id)
, track_name(std::move(_track_name))
, name(std::move(_name))
, start_timestamp(_start_timestamp)
, end_timestamp(_end_timestamp)
, category(std::move(_category))
, call_stack(std::move(_call_stack))
, line_info(std::move(_line_info))
, extdata(std::move(_extdata))
{}
uint32_t type;
uint64_t thread_id;
std::string track_name;
std::string name;
uint64_t start_timestamp;
uint64_t end_timestamp;
std::string category;
std::string call_stack;
std::string line_info;
std::string extdata;
};
enum class entry_type : uint32_t
{
in_time_sample = 0x0000,
@@ -229,9 +260,10 @@ enum class entry_type : uint32_t
#if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600)
memory_alloc = 0x0005,
#endif
amd_smi_sample = 0x0006,
cpu_freq_sample = 0x0007,
fragmented_space = 0xFFFF
amd_smi_sample = 0x0006,
cpu_freq_sample = 0x0007,
backtrace_region_sample = 0x0008,
fragmented_space = 0xFFFF
};
} // namespace trace_cache
} // namespace rocprofsys
+43 -20
Просмотреть файл
@@ -26,6 +26,8 @@
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <functional>
#include <memory>
#include <sstream>
#include <string>
@@ -34,8 +36,8 @@ namespace rocprofsys
namespace trace_cache
{
storage_parser::storage_parser(pid_t _pid)
: m_pid(_pid)
storage_parser::storage_parser(std::string _filename)
: m_filename(std::move(_filename))
{}
void
@@ -46,22 +48,24 @@ storage_parser::register_type_callback(
m_callbacks[type].push_back(callback);
}
void
storage_parser::register_on_finished_callback(
std::unique_ptr<std::function<void()>> callback)
{
m_on_finished_callback = std::move(callback);
}
void
storage_parser::consume_storage()
{
ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s", filename.c_str());
if(m_pid != getpid())
{
ROCPROFSYS_DEBUG(
"Storage parser is not created in same process as shutting down..");
return;
}
ROCPROFSYS_VERBOSE(0, "Consuming buffered storage with filename: %s\n",
m_filename.c_str());
std::ifstream ifs(filename, std::ios::binary);
std::ifstream ifs(m_filename, std::ios::binary);
if(!ifs)
{
std::stringstream ss;
ss << "Error opening file for reading: " << filename << "\n";
ss << "Error opening file for reading: " << m_filename << "\n";
throw std::runtime_error(ss.str());
}
@@ -90,10 +94,10 @@ storage_parser::consume_storage()
if(ifs.bad())
{
ROCPROFSYS_WARNING(
1,
"Bad read while consuming buffered storage. Filename: %s. Bytes read: %d",
filename.c_str(), static_cast<int>(ifs.tellg()));
ROCPROFSYS_WARNING(1,
"Bad read while consuming buffered storage. Filename: %s. "
"Bytes read: %d\n",
m_filename.c_str(), static_cast<int>(ifs.tellg()));
continue;
}
@@ -196,7 +200,7 @@ storage_parser::consume_storage()
_pmc_event_with_sample.parent_stack_id,
_pmc_event_with_sample.correlation_id,
_pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info,
_pmc_event_with_sample.agent_handle,
_pmc_event_with_sample.device_id, _pmc_event_with_sample.device_type,
_pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value);
invoke_callbacks(header.type, _pmc_event_with_sample);
break;
@@ -225,14 +229,33 @@ storage_parser::consume_storage()
invoke_callbacks(header.type, _cpu_freq_sample);
break;
}
case entry_type::backtrace_region_sample:
{
backtrace_region_sample _backtrace_region_sample;
parse_data(
sample.data(), _backtrace_region_sample.type,
_backtrace_region_sample.thread_id,
_backtrace_region_sample.track_name, _backtrace_region_sample.name,
_backtrace_region_sample.start_timestamp,
_backtrace_region_sample.end_timestamp,
_backtrace_region_sample.category,
_backtrace_region_sample.call_stack,
_backtrace_region_sample.line_info, _backtrace_region_sample.extdata);
invoke_callbacks(header.type, _backtrace_region_sample);
}
default: break;
}
}
ifs.close();
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system",
filename.c_str());
std::remove(filename.c_str());
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system\n",
m_filename.c_str());
std::remove(m_filename.c_str());
if(m_on_finished_callback != nullptr)
{
(*m_on_finished_callback)();
}
}
void
@@ -241,7 +264,7 @@ storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base
auto _callback_list = m_callbacks.find(type);
if(_callback_list == m_callbacks.end())
{
ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing");
ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing\n");
return;
}
+7 -4
Просмотреть файл
@@ -49,10 +49,12 @@ public:
const postprocessing_callback& callback);
void consume_storage();
void register_on_finished_callback(std::unique_ptr<std::function<void()>> callback);
private:
friend class cache_manager;
storage_parser(pid_t _pid);
storage_parser(std::string _filename);
template <typename T>
static void process_arg(const uint8_t*& data_pos, T& arg)
{
@@ -82,10 +84,11 @@ private:
(process_arg(data_pos, args), ...);
}
private:
pid_t m_pid;
void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed);
void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed);
std::string m_filename;
std::map<entry_type, std::vector<postprocessing_callback>> m_callbacks;
std::unique_ptr<std::function<void()>> m_on_finished_callback{ nullptr };
};
} // namespace trace_cache
+39 -22
Просмотреть файл
@@ -45,6 +45,8 @@
#include "core/rocpd/data_processor.hpp"
#include "core/timemory.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/cache_utility.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "core/utility.hpp"
#include "library/causal/data.hpp"
#include "library/causal/experiment.hpp"
@@ -117,6 +119,22 @@ namespace
auto _timemory_manager = tim::manager::instance();
auto _timemory_settings = tim::settings::shared_instance();
void
set_metadata_process_start_timestamp(int64_t _ts)
{
auto process_info = trace_cache::get_metadata_registry().get_process_info();
process_info.start = _ts;
trace_cache::get_metadata_registry().set_process(process_info);
}
void
set_metadata_process_end_timestamp(int64_t _ts)
{
auto process_info = trace_cache::get_metadata_registry().get_process_info();
process_info.end = _ts;
trace_cache::get_metadata_registry().set_process(process_info);
}
bool
ensure_initialization(bool _offset, int64_t _glob_n, int64_t _offset_n)
{
@@ -559,6 +577,8 @@ rocprofsys_init_tooling_hidden(void)
get_main_bundle()->start();
ROCPROFSYS_DEBUG_F("State: %s -> State::Active\n",
std::to_string(get_state()).c_str());
trace_cache::get_buffer_storage().start_flushing_thread(getpid());
set_state(State::Active); // set to active as very last operation
} };
@@ -701,6 +721,8 @@ rocprofsys_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _
if(get_state() == State::Active) rocprofsys_finalize_hidden();
});
set_metadata_process_start_timestamp(comp::wall_clock::record());
ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F(
get_debug_env() || get_verbose_env() > 2,
"mode: %s | is binary rewrite: %s | command: %s\n", _mode,
@@ -757,8 +779,13 @@ rocprofsys_finalize_hidden(void)
std::to_string(get_state()).c_str());
return;
}
else if(_is_child)
set_metadata_process_end_timestamp(comp::wall_clock::record());
if(_is_child)
{
set_state(State::Finalized);
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
// Flush buffered traces in case of child process
if(get_use_rocm())
@@ -767,17 +794,13 @@ rocprofsys_finalize_hidden(void)
rocprofiler_sdk::shutdown();
}
#endif
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
const auto _agents = get_agent_manager_instance().get_agents();
_manager.shutdown();
_manager.post_process();
const auto metadata_filepath =
trace_cache::get_metadata_filepath(get_root_process_id(), getpid());
_manager.get_metadata_registry().save_to_file(metadata_filepath, _agents);
#if ROCPROFSYS_USE_ROCM > 0
if(get_use_rocpd())
{
rocpd::data_processor::get_instance().flush();
}
#endif
set_state(State::Finalized);
std::quick_exit(EXIT_SUCCESS);
return;
}
@@ -871,12 +894,6 @@ rocprofsys_finalize_hidden(void)
}
#endif
{
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
_manager.shutdown();
_manager.post_process();
}
ROCPROFSYS_DEBUG_F("Stopping and destroying instrumentation bundles...\n");
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
{
@@ -1013,6 +1030,12 @@ rocprofsys_finalize_hidden(void)
_perfetto_output_error);
}
{
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
_manager.shutdown();
_manager.post_process_bulk();
}
if(_timemory_manager && _timemory_manager != nullptr)
{
_timemory_manager->add_metadata([](auto& ar) {
@@ -1067,12 +1090,6 @@ rocprofsys_finalize_hidden(void)
[](int) {});
common::destroy_static_objects();
#if ROCPROFSYS_USE_ROCM > 0
if(get_use_rocpd())
{
rocpd::data_processor::get_instance().flush();
}
#endif
}
//======================================================================================//
+104 -162
Просмотреть файл
@@ -21,21 +21,21 @@
// SOFTWARE.
#include "library/components/backtrace_metrics.hpp"
#include "core/agent.hpp"
#include "core/agent_manager.hpp"
#include "core/common.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/cache_utility.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "library/components/ensure_storage.hpp"
#include "library/ptl.hpp"
#include "library/runtime.hpp"
#include "library/thread_info.hpp"
#include "library/tracing.hpp"
#include <functional>
#include <timemory/backends/papi.hpp>
#include <timemory/backends/threading.hpp>
#include <timemory/components/data_tracker/components.hpp>
@@ -142,12 +142,6 @@ backtrace_metrics::get_hw_counter_labels(int64_t _tid)
return (_v) ? *_v : std::vector<std::string>{};
}
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
backtrace_metrics::start()
{}
@@ -170,38 +164,31 @@ get_enabled(tim::type_list<Tp...>)
}
void
rocpd_init_categories()
metadata_init_categories()
{
static bool _is_initialized = false;
if(_is_initialized) return;
auto& data_processor = get_data_processor();
data_processor.insert_string(trait::name<category::thread_cpu_time>::value);
data_processor.insert_string(trait::name<category::thread_peak_memory>::value);
data_processor.insert_string(trait::name<category::thread_context_switch>::value);
data_processor.insert_string(trait::name<category::thread_page_fault>::value);
data_processor.insert_string(trait::name<category::thread_hardware_counter>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::thread_cpu_time>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::thread_peak_memory>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::thread_context_switch>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::thread_page_fault>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::thread_hardware_counter>::value);
_is_initialized = true;
}
template <typename Category>
void
rocpd_init_tracks(int64_t _tid)
apply_for_all_thread_names(int64_t _tid, std::function<void(const std::string&)> _apply)
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto& t_info = thread_info::get(_tid, SequentTID);
auto _tid_name = JOIN("", '[', _tid, ']');
auto thread_idx = data_processor.insert_thread_info(
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
// Initialize hw_counter_tracks and create one track for each hardware counter
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
@@ -209,97 +196,98 @@ rocpd_init_tracks(int64_t _tid)
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
thread_idx, "{}");
std::stringstream track_name_ss;
track_name_ss << "Thread " << _desc << " [" << _tid << "] (S)";
_apply(track_name_ss.str());
}
}
else
data_processor.insert_track(
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
getpid(), thread_idx, "{}");
{
std::stringstream track_name_ss;
track_name_ss << trait::name<Category>::value << " [" << _tid << "]";
_apply(track_name_ss.str());
}
}
template <typename Category>
void
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
metadata_init_tracks(int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
const auto& t_info = thread_info::get(_tid, SequentTID);
auto thread_id = static_cast<uint64_t>(t_info->index_data->system_value);
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
auto ni = node_info::get_instance();
const auto TARGET_ARCH = "CPU";
trace_cache::get_metadata_registry().add_thread_info(
{ getppid(), getpid(), thread_id, static_cast<uint32_t>(t_info->get_start()),
static_cast<uint32_t>(t_info->get_stop()), "{}" });
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(dev_id, agent_type::CPU).base_id;
apply_for_all_thread_names<Category>(_tid, [&](const std::string& _track_name) {
trace_cache::get_metadata_registry().add_track({ _track_name, thread_id, "{}" });
});
}
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
template <typename Category>
void
metadata_initialize_backtrace_metrics_pmc(size_t dev_id, const char* _units, int64_t _tid)
{
constexpr size_t EVENT_CODE = 0;
constexpr size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
const char* TARGET_ARCH = "CPU";
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
track_name.c_str(), trait::name<Category>::value,
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
"ABS", BLOCK, EXPRESSION, 0, 0);
}
}
else
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
trait::name<Category>::value, trait::name<Category>::description,
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
apply_for_all_thread_names<Category>(_tid, [&](const std::string& _track_name) {
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, _track_name,
trait::name<Category>::value, trait::name<Category>::description,
LONG_DESCRIPTION, COMPONENT, _units, trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0, "{}" });
});
}
template <typename Category, typename Value>
void
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
Value value, int64_t _tid)
cache_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp_ns,
Value value, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
auto _tid_name = JOIN("", '[', _tid, ']');
auto string_primary_key = data_processor.insert_string(trait::name<Category>::value);
auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0);
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
size_t stack_id = 0;
size_t parent_stack_id = 0;
size_t correlation_id = 0;
const auto* event_metadata = "";
const auto* call_stack = "";
const auto* line_info = "";
auto insert_event_and_sample = [&](const char* _name, double _value) {
data_processor.insert_pmc_event(event_id, base_id, _name, _value);
data_processor.insert_sample(_name, timestamp, event_id);
auto insert_event_and_sample = [&](const char* _track_name, double _value) {
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::pmc_event_with_sample, _track_name, timestamp_ns,
event_metadata, stack_id, parent_stack_id, correlation_id, call_stack,
line_info, device_id, static_cast<uint8_t>(agent_type::CPU), _track_name,
_value);
};
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
const auto& hw_counters =
static_cast<backtrace_metrics::hw_counter_data_t>(value);
for(size_t i = 0; i < _hw_cnt_labels.size() && i < hw_counters.size(); ++i)
{
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
if(_desc.empty()) _desc = _hw_cnt_labels[i];
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
insert_event_and_sample(track_name.c_str(), hw_counters.at(i));
}
size_t idx = 0;
apply_for_all_thread_names<Category>(_tid, [&](const std::string& _track_name) {
if(idx < hw_counters.size())
{
insert_event_and_sample(_track_name.c_str(), hw_counters.at(idx));
}
++idx;
});
}
else
insert_event_and_sample(
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
{
apply_for_all_thread_names<Category>(_tid, [&](const std::string& _track_name) {
insert_event_and_sample(_track_name.c_str(), value);
});
}
}
} // namespace
@@ -473,84 +461,39 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid)
}
void
backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid)
backtrace_metrics::init_cache(int64_t _tid, valid_array_t _valid)
{
rocpd_init_categories();
metadata_init_categories();
if(get_valid(category::thread_cpu_time{}, _valid))
{
rocpd_init_tracks<category::thread_cpu_time>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_cpu_time>(0, "sec", _tid);
}
if(get_valid(category::thread_peak_memory{}, _valid))
{
rocpd_init_tracks<category::thread_peak_memory>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_peak_memory>(0, "MB",
metadata_init_tracks<category::thread_cpu_time>(_tid);
metadata_initialize_backtrace_metrics_pmc<category::thread_cpu_time>(0, "sec",
_tid);
}
if(get_valid(category::thread_context_switch{}, _valid))
{
rocpd_init_tracks<category::thread_context_switch>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_context_switch>(0, "",
_tid);
}
if(get_valid(category::thread_page_fault{}, _valid))
{
rocpd_init_tracks<category::thread_page_fault>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_page_fault>(0, "", _tid);
}
if(get_valid(type_list<hw_counters>{}, _valid) &&
get_valid(category::thread_hardware_counter{}, _valid))
{
rocpd_init_tracks<category::thread_hardware_counter>(_tid);
rocpd_initialize_backtrace_metrics_pmc<category::thread_hardware_counter>(0, "",
_tid);
}
}
void
backtrace_metrics::fini_rocpd(int64_t _tid, valid_array_t _valid)
{
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "Error! missing thread info for tid=%li\n", _tid);
if(!_thread_info) return;
uint64_t _ts = _thread_info->get_stop();
if(get_valid(category::thread_cpu_time{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_cpu_time, double>(
0, _ts, 0, _tid);
}
if(get_valid(category::thread_peak_memory{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_peak_memory, double>(
0, _ts, 0, _tid);
metadata_init_tracks<category::thread_peak_memory>(_tid);
metadata_initialize_backtrace_metrics_pmc<category::thread_peak_memory>(0, "MB",
_tid);
}
if(get_valid(category::thread_context_switch{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_context_switch, int64_t>(
0, _ts, 0, _tid);
metadata_init_tracks<category::thread_context_switch>(_tid);
metadata_initialize_backtrace_metrics_pmc<category::thread_context_switch>(0, "",
_tid);
}
if(get_valid(category::thread_page_fault{}, _valid))
{
rocpd_process_backtrace_metrics_events<category::thread_page_fault, int64_t>(
0, _ts, 0, _tid);
metadata_init_tracks<category::thread_page_fault>(_tid);
metadata_initialize_backtrace_metrics_pmc<category::thread_page_fault>(0, "",
_tid);
}
if(get_valid(type_list<hw_counters>{}, _valid) &&
get_valid(category::thread_hardware_counter{}, _valid))
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
hw_counter_data_t zero_counters{};
zero_counters.fill(0.0);
rocpd_process_backtrace_metrics_events<category::thread_hardware_counter,
hw_counter_data_t>(0, _ts, zero_counters,
_tid);
metadata_init_tracks<category::thread_hardware_counter>(_tid);
metadata_initialize_backtrace_metrics_pmc<category::thread_hardware_counter>(
0, "", _tid);
}
}
@@ -636,41 +579,40 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const
}
void
backtrace_metrics::post_process_rocpd([[maybe_unused]] int64_t _tid,
[[maybe_unused]] uint64_t _ts) const
backtrace_metrics::cache_backtrace_data([[maybe_unused]] int64_t _tid,
[[maybe_unused]] uint64_t _ts) const
{
#if ROCPROFSYS_USE_ROCM > 0
auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); };
if(is_category_enabled(category::thread_cpu_time{}))
{
rocpd_process_backtrace_metrics_events<category::thread_cpu_time, double>(
cache_backtrace_metrics_events<category::thread_cpu_time, double>(
0, _ts, m_cpu / units::sec, _tid);
}
if(is_category_enabled(category::thread_peak_memory{}))
{
rocpd_process_backtrace_metrics_events<category::thread_peak_memory, double>(
cache_backtrace_metrics_events<category::thread_peak_memory, double>(
0, _ts, m_mem_peak / units::megabyte, _tid);
}
if(is_category_enabled(category::thread_context_switch{}))
{
rocpd_process_backtrace_metrics_events<category::thread_context_switch, int64_t>(
cache_backtrace_metrics_events<category::thread_context_switch, int64_t>(
0, _ts, m_ctx_swch, _tid);
}
if(is_category_enabled(category::thread_page_fault{}))
{
rocpd_process_backtrace_metrics_events<category::thread_page_fault, int64_t>(
cache_backtrace_metrics_events<category::thread_page_fault, int64_t>(
0, _ts, m_page_flt, _tid);
}
if(is_category_enabled(type_list<hw_counters>{}) &&
is_category_enabled(category::thread_hardware_counter{}))
{
rocpd_process_backtrace_metrics_events<category::thread_hardware_counter,
hw_counter_data_t>(0, _ts, m_hw_counter,
_tid);
cache_backtrace_metrics_events<category::thread_hardware_counter,
hw_counter_data_t>(0, _ts, m_hw_counter, _tid);
}
#endif
}
+2 -3
Просмотреть файл
@@ -83,8 +83,7 @@ struct backtrace_metrics : comp::empty_base
static void configure(bool, int64_t _tid = threading::get_id());
static void init_perfetto(int64_t _tid, valid_array_t);
static void fini_perfetto(int64_t _tid, valid_array_t);
static void init_rocpd(int64_t _tid, valid_array_t);
static void fini_rocpd(int64_t _tid, valid_array_t);
static void init_cache(int64_t _tid, valid_array_t);
static std::vector<std::string> get_hw_counter_labels(int64_t);
template <typename Tp>
@@ -115,7 +114,7 @@ struct backtrace_metrics : comp::empty_base
const auto& get_hw_counters() const { return m_hw_counter; }
void post_process_perfetto(int64_t _tid, uint64_t _ts) const;
void post_process_rocpd(int64_t _tid, uint64_t _ts) const;
void cache_backtrace_data(int64_t _tid, uint64_t _ts) const;
backtrace_metrics& operator-=(const backtrace_metrics&);
+3 -6
Просмотреть файл
@@ -21,7 +21,6 @@
// SOFTWARE.
#include "library/components/comm_data.hpp"
#include "core/agent_manager.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/node_info.hpp"
@@ -135,9 +134,6 @@ template <typename Track>
void
cache_comm_data_events(const uint32_t device_id, int bytes)
{
auto& agents = agent_manager::get_instance();
auto agent = agents.get_agent_by_type_index(device_id, agent_type::CPU);
static std::mutex _mutex{};
static uint64_t value = 0;
uint64_t _now = 0;
@@ -154,12 +150,13 @@ cache_comm_data_events(const uint32_t device_id, int bytes)
const size_t correlation_id = 0;
const std::string call_stack = "{}";
const std::string line_info = "{}";
const size_t agent_handle = agent.handle;
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns,
event_metadata.c_str(), stack_id, parent_stack_id, correlation_id,
call_stack.c_str(), line_info.c_str(), agent_handle, track_name.c_str(), value);
call_stack.c_str(), line_info.c_str(), device_id,
static_cast<uint8_t>(agent_type::CPU), track_name.c_str(),
static_cast<double>(value));
}
} // namespace
+1 -1
Просмотреть файл
@@ -244,7 +244,7 @@ config()
// the agents seems to be assigned per device basis not per core.
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
// implementation.
auto cpu_agents = agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
auto cpu_agents = get_agent_manager_instance().get_agents_by_type(agent_type::CPU);
for(auto& agent : cpu_agents)
{
metadata_initialize_cpu_freq_pmc(agent->device_id);
+7 -6
Просмотреть файл
@@ -31,7 +31,6 @@
#include "core/defines.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/json.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/sample_type.hpp"
#include "library/components/category_region.hpp"
@@ -46,6 +45,8 @@
#include <timemory/utility/procfs/maps.hpp>
#include <timemory/utility/types.hpp>
#include <nlohmann/json.hpp>
#include <cstdlib>
#include <sstream>
#include <string>
@@ -177,11 +178,11 @@ void
cache_kokkos_event(const char* name, const char* event_type, const char* target,
uint64_t timestamp_ns)
{
auto event_metadata = rocpd::json::create();
nlohmann::json event_metadata;
event_metadata->set("name", name);
event_metadata->set("event_type", event_type);
event_metadata->set("target", target);
event_metadata["name"] = name;
event_metadata["event_type"] = event_type;
event_metadata["target"] = target;
const size_t stack_id = 0;
const size_t parent_stack_id = 0;
@@ -192,7 +193,7 @@ cache_kokkos_event(const char* name, const char* event_type, const char* target,
rocprofsys::trace_cache::get_buffer_storage().store(
rocprofsys::trace_cache::entry_type::in_time_sample,
rocprofsys::trait::name<category::kokkos>::value, timestamp_ns,
event_metadata->to_string().c_str(), stack_id, parent_stack_id, correlation_id,
event_metadata.dump().c_str(), stack_id, parent_stack_id, correlation_id,
call_stack, line_info);
}
+12 -30
Просмотреть файл
@@ -29,7 +29,6 @@
#include "core/debug.hpp"
#include "core/gpu.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/json.hpp"
#include "core/state.hpp"
#include "core/trace_cache/buffer_storage.hpp"
#include "core/trace_cache/cache_manager.hpp"
@@ -65,6 +64,8 @@
#include <timemory/utility/demangle.hpp>
#include <timemory/utility/types.hpp>
#include <nlohmann/json.hpp>
#include <atomic>
#include <cctype>
#include <cstdint>
@@ -368,7 +369,8 @@ consume_args(Tp&&...)
auto
get_backtrace(std::optional<std::vector<tim::unwind::processed_entry>>& _bt_data)
{
auto backtrace = ::rocpd::json::create();
auto backtrace = nlohmann::json();
if(_bt_data && !_bt_data->empty())
{
const std::string _unk = "??";
@@ -384,9 +386,9 @@ get_backtrace(std::optional<std::vector<tim::unwind::processed_entry>>& _bt_data
(_linfo && _linfo.line > 0)
? join("", _linfo.line)
: ((itr.lineno == 0) ? std::string{ "?" } : join("", itr.lineno));
auto _entry = join("", demangle(*_func), " @ ",
join(':', ::basename(_loc->c_str()), _line));
backtrace->set(join("", "frame#", _bt_cnt++), _entry);
auto _entry = join("", demangle(*_func), " @ ",
join(':', ::basename(_loc->c_str()), _line));
backtrace[join("", "frame#", _bt_cnt++)] = _entry;
}
}
return backtrace;
@@ -410,26 +412,6 @@ get_parent_stack_id([[maybe_unused]] const CorrelationIdType& correlation_id)
#endif
}
auto
get_extdata(const rocprofiler_callback_tracing_record_t& record)
{
constexpr auto message_key = "message";
auto args = callback_arg_array_t{};
auto extdata = ::rocpd::json::create();
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, &args);
for(auto [key, val] : args)
{
if(key == message_key)
{
extdata->set(key, val);
}
}
return extdata;
}
struct scope_destructor
{
/// \fn scope_destructor(FuncT&& _fini, InitT&& _init)
@@ -867,7 +849,7 @@ tool_tracing_callback_stop(
cache_category<CategoryT>();
cache_add_thread_info(record.thread_id);
std::string args_str = get_args_string(args);
cache_region(&record, _beg_ts, _end_ts, call_stack->to_string(), args_str,
cache_region(&record, _beg_ts, _end_ts, call_stack.dump(), args_str,
trait::name<CategoryT>::value);
}
}
@@ -952,7 +934,7 @@ ompt_cache_instant_event(
cache_category<category::rocm_ompt_api>();
cache_add_thread_info(record.thread_id);
cache_region(&record, _instant_ts, _instant_ts, call_stack->to_string(),
cache_region(&record, _instant_ts, _instant_ts, call_stack.dump(),
get_args_string(args), trait::name<category::rocm_ompt_api>::value);
}
@@ -966,7 +948,7 @@ ompt_cache_orphan_event(
cache_category<category::rocm_ompt_api>();
cache_add_thread_info(stored_data.record.thread_id);
cache_region(&stored_data.record, stored_data._beg_ts, stored_data._beg_ts,
call_stack->to_string(), get_args_string(stored_data.args),
call_stack.dump(), get_args_string(stored_data.args),
trait::name<category::rocm_ompt_api>::value);
}
@@ -1034,7 +1016,7 @@ ompt_pop_standard_callback(
auto call_stack = get_backtrace(_bt_data);
cache_category<category::rocm_ompt_api>();
cache_add_thread_info(record.thread_id);
cache_region(&record, stored_data._beg_ts, _end_ts, call_stack->to_string(),
cache_region(&record, stored_data._beg_ts, _end_ts, call_stack.dump(),
get_args_string(stored_data.args),
trait::name<category::rocm_ompt_api>::value);
}
@@ -1084,7 +1066,7 @@ ompt_pop_parallel_callback(
cache_category<category::rocm_ompt_api>();
cache_add_thread_info(record.thread_id);
cache_region(&record, stored_data._beg_ts, _end_ts, call_stack->to_string(),
cache_region(&record, stored_data._beg_ts, _end_ts, call_stack.dump(),
get_args_string(stored_data.args),
trait::name<category::rocm_ompt_api>::value);
}
+5 -1
Просмотреть файл
@@ -21,6 +21,7 @@
// SOFTWARE.
#include "library/rocprofiler-sdk/counters.hpp"
#include "core/agent_manager.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "library/rocprofiler-sdk/fwd.hpp"
@@ -117,10 +118,13 @@ counter_event::operator()(const client_data* tool_data, ::perfetto::CounterTrack
const size_t agent_handle = record.record_counter.agent_id.handle;
const size_t value = record.record_counter.counter_value;
auto agent = get_agent_manager_instance().get_agent_by_handle(agent_handle);
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(),
_timing.start, event_metadata.c_str(), stack_id, parent_stack_id,
correlation_id, call_stack.c_str(), line_info.c_str(), agent_handle,
correlation_id, call_stack.c_str(), line_info.c_str(),
static_cast<uint32_t>(agent.device_id), static_cast<uint8_t>(agent.type),
track_name.c_str(), value);
}
}
+2 -2
Просмотреть файл
@@ -131,7 +131,7 @@ client_data::initialize()
void
client_data::initialize_event_info()
{
if(agent_manager::get_instance().get_agents().empty())
if(get_agent_manager_instance().get_agents().empty())
{
initialize();
}
@@ -231,7 +231,7 @@ client_data::initialize_event_info()
void
client_data::set_agents()
{
auto& agent_mngr = agent_manager::get_instance();
auto& agent_mngr = get_agent_manager_instance();
auto fill_agents = [&](agent_type type, std::vector<tool_agent>& out) {
const auto& _agents = agent_mngr.get_agents_by_type(type);
+1 -1
Просмотреть файл
@@ -198,7 +198,7 @@ client_data::get_buffers() const
inline const rocprofsys_agent_t*
client_data::get_agent(rocprofiler_agent_id_t _id) const
{
const auto& agent = agent_manager::get_instance().get_agent_by_handle(_id.handle);
const auto& agent = get_agent_manager_instance().get_agent_by_handle(_id.handle);
return &agent;
}
+133 -258
Просмотреть файл
@@ -29,7 +29,6 @@
#include "core/node_info.hpp"
#include "core/perf.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/rocpd/json.hpp"
#include "core/state.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/utility.hpp"
@@ -73,6 +72,8 @@
#include <timemory/utility/types.hpp>
#include <timemory/variadic.hpp>
#include <nlohmann/json.hpp>
#include <array>
#include <chrono>
#include <condition_variable>
@@ -168,21 +169,21 @@ get_category_track_name(uint64_t tid)
std::string
generate_call_stack_json(const tim::unwind::processed_entry& stack_entry)
{
auto call_stack = ::rocpd::json::create();
nlohmann::json call_stack;
call_stack->set("name", std::string(demangle(stack_entry.name)));
call_stack->set("pc", as_hex(stack_entry.address));
call_stack->set("file", std::string(stack_entry.location));
call_stack["name"] = std::string(demangle(stack_entry.name));
call_stack["pc"] = as_hex(stack_entry.address);
call_stack["file"] = std::string(stack_entry.location);
return call_stack->to_string();
return call_stack.dump();
}
std::string
generate_line_info_json(const tim::unwind::processed_entry& line_info_entry)
{
auto line_info = ::rocpd::json::create();
line_info->set("line_address", as_hex(line_info_entry.line_address));
line_info->set("name", std::string(demangle(line_info_entry.name)));
nlohmann::json line_info;
line_info["line_address"] = as_hex(line_info_entry.line_address);
line_info["name"] = std::string(demangle(line_info_entry.name));
if(line_info_entry.lineinfo && !line_info_entry.lineinfo.lines.empty())
{
@@ -190,43 +191,15 @@ generate_line_info_json(const tim::unwind::processed_entry& line_info_entry)
std::reverse(_lines.begin(), _lines.end());
for(const auto& line : _lines)
{
auto inlined = ::rocpd::json::create();
inlined->set("name", std::string(demangle(line.name)));
inlined->set("location", std::string(line.location));
inlined->set("line", std::to_string(line.line));
line_info->set("inlined", inlined);
nlohmann::json inlined;
inlined["name"] = std::string(demangle(line.name));
inlined["location"] = std::string(line.location);
inlined["line"] = std::to_string(line.line);
line_info["inlined"] = inlined;
}
}
return line_info->to_string();
}
std::string
generate_hw_counter_json(int64_t _tid, const backtrace_metrics& metrics)
{
auto extdata = ::rocpd::json::create();
if(!metrics.get_hw_counters().empty())
{
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
auto _hw_cnt_vals = metrics.get_hw_counters();
auto hw_counters = ::rocpd::json::create();
for(size_t i = 0; i < _labels.size(); ++i)
{
hw_counters->set(_labels.at(i), _hw_cnt_vals.at(i));
}
extdata->set("hw_counters", hw_counters);
}
return extdata->to_string();
}
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
return line_info.dump();
}
template <typename Category>
@@ -237,11 +210,13 @@ get_track_name(const thread_info& _thread_info)
size_t sequent_value = _thread_info.index_data->sequent_value;
constexpr auto sample_type =
std::is_same_v<Category, category::timer_sampling> ? "Timer" : "Overflow";
return JOIN(" ", "Thread", sequent_value, sample_type, "(S)", thread_id);
std::stringstream name_ss;
name_ss << "Thread " << sequent_value << " " << sample_type << " (S) " << thread_id;
return name_ss.str();
}
void
rocpd_initialize_sampling_category()
metadata_initialize_sampling_category()
{
static bool _is_initialized = false;
if(_is_initialized) return;
@@ -257,7 +232,7 @@ rocpd_initialize_sampling_category()
}
void
rocpd_initialize_thread_info(size_t tid)
metadata_initialize_thread_info(size_t tid)
{
const auto& _thread_info = thread_info::get(tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
@@ -271,7 +246,7 @@ rocpd_initialize_thread_info(size_t tid)
}
void
rocpd_init_track(int64_t tid)
metadata_initialize_track(int64_t tid)
{
const auto& _thread_info = thread_info::get(tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
@@ -290,22 +265,98 @@ rocpd_init_track(int64_t tid)
{ _overflow_track_name, thread_id, "{}" });
}
template <typename Category>
void
rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t name_id,
const char* track, const char* call_stack = "{}",
const char* line_info = "{}", const char* extdata = "{}")
// Added
struct timer_sampling_data
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto string_primary_key = data_processor.insert_string(trait::name<Category>::value);
int64_t m_tid = -1;
uint64_t m_beg = 0;
uint64_t m_end = 0;
std::vector<tim::unwind::processed_entry> m_stack = {};
backtrace_metrics m_metrics = {};
};
auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0, call_stack,
line_info, extdata);
struct overflow_sampling_data
{
int64_t m_tid = -1;
uint64_t m_beg = 0;
uint64_t m_end = 0;
std::vector<tim::unwind::processed_entry> m_stack = {};
};
data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time,
name_id, event_id);
data_processor.insert_sample(track, start_time, event_id);
std::vector<timer_sampling_data>
parse_timer_data(int64_t _tid, const bundle_t* _init,
const std::vector<bundle_t*>& _data);
std::vector<overflow_sampling_data>
parse_overflow_data(int64_t _tid, const bundle_t*, const std::vector<bundle_t*>& _data);
// TODO: should we remove _tid? it's inside timer_data and overflow_data
void
cache_sampling_data(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data)
{
ROCPROFSYS_VERBOSE(3 || get_debug_sampling(),
"[%li] Storing sampling data to trace cache...\n", _tid);
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
if(!_thread_info) return;
// Store timer sampling data
for(const auto& itr : _timer_data)
{
if(!_thread_info->is_valid_lifetime({ itr.m_beg, itr.m_end })) continue;
for(const auto& iitr : itr.m_stack)
{
auto _name = std::string(demangle(iitr.name));
auto _track_name = get_track_name<category::timer_sampling>(*_thread_info);
auto _call_stack = generate_call_stack_json(iitr);
auto _line_info = generate_line_info_json(iitr);
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::backtrace_region_sample,
static_cast<uint32_t>(ROCPROFSYS_CATEGORY_TIMER_SAMPLING),
static_cast<uint64_t>(_thread_info->index_data->system_value),
_track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end,
trait::name<category::timer_sampling>::value, _call_stack.c_str(),
_line_info.c_str(), "{}");
}
}
auto _overflow_event =
get_setting_value<std::string>("ROCPROFSYS_SAMPLING_OVERFLOW_EVENT").value_or("");
if(!_overflow_event.empty())
{
const auto _overflow_prefix = std::string_view{ "PERF_COUNT_" };
const auto _overflow_pos = _overflow_event.find(_overflow_prefix);
if(_overflow_pos != std::string::npos)
_overflow_event =
_overflow_event.substr(_overflow_pos + _overflow_prefix.length());
}
for(const auto& itr : _overflow_data)
{
if(!_thread_info->is_valid_lifetime({ itr.m_beg, itr.m_end })) continue;
for(const auto& iitr : itr.m_stack)
{
auto _name = std::string(demangle(iitr.name));
auto _track_name = get_track_name<category::overflow_sampling>(*_thread_info);
auto _call_stack = generate_call_stack_json(iitr);
auto _line_info = generate_line_info_json(iitr);
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::backtrace_region_sample,
static_cast<uint32_t>(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING),
static_cast<uint64_t>(_thread_info->index_data->system_value),
_track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end,
trait::name<category::overflow_sampling>::value, _call_stack.c_str(),
_line_info.c_str(), "{}");
}
}
}
auto&
@@ -871,9 +922,9 @@ configure(bool _setup, int64_t _tid)
}
}
}
rocpd_initialize_sampling_category();
rocpd_initialize_thread_info(_tid);
rocpd_init_track(_tid);
metadata_initialize_sampling_category();
metadata_initialize_thread_info(_tid);
metadata_initialize_track(_tid);
*_running = true;
sampling::get_sampler_init(_tid)->sample();
@@ -937,28 +988,11 @@ configure(bool _setup, int64_t _tid)
return (_signal_types) ? *_signal_types : std::set<int>{};
}
struct timer_sampling_data
{
int64_t m_tid = -1;
uint64_t m_beg = 0;
uint64_t m_end = 0;
std::vector<tim::unwind::processed_entry> m_stack = {};
backtrace_metrics m_metrics = {};
};
struct overflow_sampling_data
{
int64_t m_tid = -1;
uint64_t m_beg = 0;
uint64_t m_end = 0;
std::vector<tim::unwind::processed_entry> m_stack = {};
};
std::vector<timer_sampling_data>
post_process_timer_data(int64_t, const bundle_t*, const std::vector<bundle_t*>&);
parse_timer_data(int64_t, const bundle_t*, const std::vector<bundle_t*>&);
std::vector<overflow_sampling_data>
post_process_overflow_data(int64_t, const bundle_t*, const std::vector<bundle_t*>&);
parse_overflow_data(int64_t, const bundle_t*, const std::vector<bundle_t*>&);
void
post_process_perfetto(int64_t, const std::vector<timer_sampling_data>&,
@@ -969,8 +1003,9 @@ post_process_timemory(int64_t, const std::vector<timer_sampling_data>&,
const std::vector<overflow_sampling_data>&);
void
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data);
store_sampling_data_in_cache(int64_t _tid,
const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data);
auto static_strings = std::set<std::string>{};
@@ -1145,12 +1180,12 @@ post_process()
"Sampler data for thread %lu has %zu valid entries...\n",
i, _data.size());
auto _timer_data = post_process_timer_data(i, _init, _data);
auto _overflow_data = post_process_overflow_data(i, _init, _data);
auto _timer_data = parse_timer_data(i, _init, _data);
auto _overflow_data = parse_overflow_data(i, _init, _data);
if(get_use_perfetto()) post_process_perfetto(i, _timer_data, _overflow_data);
if(get_use_timemory()) post_process_timemory(i, _timer_data, _overflow_data);
if(get_use_rocpd()) post_process_rocpd(i, _timer_data, _overflow_data);
store_sampling_data_in_cache(i, _timer_data, _overflow_data);
}
else
{
@@ -1191,8 +1226,7 @@ post_process()
namespace
{
std::vector<timer_sampling_data>
post_process_timer_data(int64_t _tid, const bundle_t* _init,
const std::vector<bundle_t*>& _data)
parse_timer_data(int64_t _tid, const bundle_t* _init, const std::vector<bundle_t*>& _data)
{
auto _results = std::vector<timer_sampling_data>{};
@@ -1238,8 +1272,7 @@ post_process_timer_data(int64_t _tid, const bundle_t* _init,
}
std::vector<overflow_sampling_data>
post_process_overflow_data(int64_t _tid, const bundle_t*,
const std::vector<bundle_t*>& _data)
parse_overflow_data(int64_t _tid, const bundle_t*, const std::vector<bundle_t*>& _data)
{
auto _results = std::vector<overflow_sampling_data>{};
@@ -1704,66 +1737,7 @@ post_process_timemory(int64_t _tid, const std::vector<timer_sampling_data>& _tim
}
void
rocpd_post_process_overflow_data(
int64_t _tid, const std::vector<overflow_sampling_data>& _overflow_data)
{
auto& data_processor = get_data_processor();
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
if(!_thread_info) return;
auto _overflow_event =
get_setting_value<std::string>("ROCPROFSYS_SAMPLING_OVERFLOW_EVENT").value_or("");
if(!_overflow_event.empty() && !_overflow_data.empty())
{
auto _beg_ns = std::max(_overflow_data.front().m_beg, _thread_info->get_start());
auto _end_ns = std::min(_overflow_data.back().m_end, _thread_info->get_stop());
const auto _overflow_prefix = std::string_view{ "PERF_COUNT_" };
const auto _overflow_pos = _overflow_event.find(_overflow_prefix);
if(_overflow_pos != std::string::npos)
_overflow_event =
_overflow_event.substr(_overflow_pos + _overflow_prefix.length());
const auto* _main_name =
static_strings.emplace(join(" ", _overflow_event, "samples [rocprof-sys]"))
.first->c_str();
auto main_name_id = data_processor.insert_string(_main_name);
size_t thread_id = _thread_info->index_data->system_value;
auto thread_primary_key = data_processor.map_thread_id_to_primary_key(thread_id);
const auto _track_name =
get_track_name<category::overflow_sampling>(*_thread_info);
rocpd_insert_region<category::overflow_sampling>(
thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
for(const auto& itr : _overflow_data)
{
auto _beg = itr.m_beg;
auto _end = itr.m_end;
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
for(const auto& iitr : itr.m_stack)
{
const auto* _name =
static_strings.emplace(demangle(iitr.name)).first->c_str();
auto name_id = data_processor.insert_string(_name);
rocpd_insert_region<category::overflow_sampling>(
thread_primary_key, _beg, _end, name_id, _track_name.c_str(),
generate_call_stack_json(iitr).c_str(),
generate_line_info_json(iitr).c_str());
}
}
}
}
void
rocpd_post_process_backtrace_metrics(
cache_backtrace_metrics(
[[maybe_unused]] int64_t _tid,
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data)
{
@@ -1779,121 +1753,22 @@ rocpd_post_process_backtrace_metrics(
{
ROCPROFSYS_VERBOSE(3 || get_debug_sampling(),
"[%li] Post-processing metrics for rocpd...\n", _tid);
backtrace_metrics::init_rocpd(_tid, _valid_metrics); // move to setup
backtrace_metrics::init_cache(_tid, _valid_metrics); // move to setup
for(const auto& itr : _timer_data)
itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end));
backtrace_metrics::fini_rocpd(_tid, _valid_metrics);
itr.m_metrics.cache_backtrace_data(_tid, 0.5 * (itr.m_beg + itr.m_end));
}
#endif
}
void
rocpd_post_process_timer_data(
[[maybe_unused]] int64_t _tid,
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data)
store_sampling_data_in_cache(
[[maybe_unused]] int64_t _tid,
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data,
[[maybe_unused]] const std::vector<overflow_sampling_data>& _overflow_data)
{
#if ROCPROFSYS_USE_ROCM > 0
auto& data_processor = get_data_processor();
const auto& _thread_info = thread_info::get(_tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid);
if(!_thread_info) return;
if(!_timer_data.empty())
{
rocpd_post_process_backtrace_metrics(_tid, _timer_data);
auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start());
auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop());
const auto _track_name = get_track_name<category::timer_sampling>(*_thread_info);
auto thread_primary_key = data_processor.map_thread_id_to_primary_key(
_thread_info->index_data->system_value);
const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]");
rocpd_insert_region<category::timer_sampling>(
thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
for(const auto& itr : _timer_data)
{
size_t _ncount = 0;
uint64_t _beg = itr.m_beg;
uint64_t _end = itr.m_end;
if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue;
for(const auto& iitr : itr.m_stack)
{
auto _ncur = _ncount++;
// the begin/end + HW counters will be same for entire call-stack so only
// annotate the top and the bottom functions to keep the data consumption
// low
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
// Only annotate HW counters when first or last and HW counters are not
// empty
bool _include_hw =
_include_common && !itr.m_metrics.get_hw_counters().empty();
std::string hw_counter_json = "{}";
if(_include_hw)
{
// current values when read
hw_counter_json = generate_hw_counter_json(_tid, itr.m_metrics);
}
if(get_sampling_include_inlines() && iitr.lineinfo)
{
auto _lines = iitr.lineinfo.lines;
std::reverse(_lines.begin(), _lines.end());
size_t _n = 0;
for(const auto& line : _lines)
{
const auto* _name =
static_strings.emplace(demangle(line.name)).first->c_str();
auto inlined_name_id = data_processor.insert_string(_name);
auto inlined_call_stack = ::rocpd::json::create();
inlined_call_stack->set("name", std::string(demangle(line.name)));
inlined_call_stack->set("location", std::string(line.location));
inlined_call_stack->set("line", std::to_string(line.line));
inlined_call_stack->set("inlined", "true");
rocpd_insert_region<category::timer_sampling>(
thread_primary_key, _beg, _end, inlined_name_id,
_track_name.c_str(), inlined_call_stack->to_string().c_str(),
"{}",
// Only include HW counters for first inlined function
(_n == 0) ? hw_counter_json.c_str() : "{}");
}
}
else
{
const auto* _name = static_strings.emplace(iitr.name).first->c_str();
const auto name_id = data_processor.insert_string(_name);
rocpd_insert_region<category::timer_sampling>(
thread_primary_key, _beg, _end, name_id, _track_name.c_str(),
generate_call_stack_json(iitr).c_str(),
generate_line_info_json(iitr).c_str(), hw_counter_json.c_str());
}
}
}
}
#endif
}
void
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data)
{
#if ROCPROFSYS_USE_ROCM > 0
rocpd_post_process_overflow_data(_tid, _overflow_data);
rocpd_post_process_timer_data(_tid, _timer_data);
#else
(void) _tid;
(void) _timer_data;
(void) _overflow_data;
cache_sampling_data(_tid, _timer_data, _overflow_data);
cache_backtrace_metrics(_tid, _timer_data);
#endif
}