diff --git a/.github/workflows/rocprofiler-systems-debian.yml b/.github/workflows/rocprofiler-systems-debian.yml index ae0de579e3..541b3617f9 100644 --- a/.github/workflows/rocprofiler-systems-debian.yml +++ b/.github/workflows/rocprofiler-systems-debian.yml @@ -127,18 +127,29 @@ jobs: -DROCPROFSYS_PYTHON_PREFIX=/opt/conda/envs \ -DROCPROFSYS_PYTHON_ENVS="py3.8;py3.9;py3.10;py3.11;py3.12;py3.13" \ -DROCPROFSYS_MAX_THREADS=64 \ - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv-offload" \ + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv-offload;videodecode;jpegdecode;network" \ -DROCPROFSYS_BUILD_NUMBER=1 \ -DUSE_CLANG_OMP=OFF \ $CMAKE_PREFIX_PATH_ARG \ -- \ -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Test Clean Up + timeout-minutes: 10 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + du /tmp -d 1 -h + du build/rocprof-sys-tests-output -d 1 -h + df -h + rm -fr /tmp/* build/rocprof-sys-tests-output/* + - name: Install timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ run: | cmake --build build --target install --parallel 2 + rm -rf /opt/rocprofiler-systems - name: CPack and Install working-directory: projects/rocprofiler-systems/ diff --git a/.github/workflows/rocprofiler-systems-redhat.yml b/.github/workflows/rocprofiler-systems-redhat.yml index 4cce255caa..b1a51808eb 100644 --- a/.github/workflows/rocprofiler-systems-redhat.yml +++ b/.github/workflows/rocprofiler-systems-redhat.yml @@ -136,6 +136,15 @@ jobs: -- -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Test Clean Up + timeout-minutes: 10 + working-directory: projects/rocprofiler-systems/ + run: | + du /tmp -d 1 -h + du build/rocprof-sys-tests-output -d 1 -h + df -h + rm -fr /tmp/* build/rocprof-sys-tests-output/* + - name: Install timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ diff --git a/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml b/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml index 556dc208bf..77d2a5f66e 100644 --- a/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml +++ b/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml @@ -311,17 +311,28 @@ jobs: -DROCPROFSYS_PYTHON_ENVS="py3.7;py3.8;py3.9;py3.10;py3.11" \ -DROCPROFSYS_STRIP_LIBRARIES=${{ matrix.strip }} \ -DROCPROFSYS_MAX_THREADS=64 \ - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv" \ + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;openmp-target;openmp-vv;videodecode;jpegdecode;network" \ -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} \ -DUSE_CLANG_OMP=OFF \ -- \ -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Test Clean Up + timeout-minutes: 10 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + du /tmp -d 1 -h + du build/rocprof-sys-tests-output -d 1 -h + df -h + rm -fr /tmp/* build/rocprof-sys-tests-output/* + - name: Install timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ - run: + run: | cmake --build build --target install --parallel 2 + rm -fr /opt/rocprofiler-systems-dev/ - name: CPack and Install working-directory: projects/rocprofiler-systems/ diff --git a/.github/workflows/rocprofiler-systems-ubuntu-noble.yml b/.github/workflows/rocprofiler-systems-ubuntu-noble.yml index 3c750e6d9f..e9f142ca69 100644 --- a/.github/workflows/rocprofiler-systems-ubuntu-noble.yml +++ b/.github/workflows/rocprofiler-systems-ubuntu-noble.yml @@ -133,11 +133,22 @@ jobs: -- \ -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Test Clean Up + timeout-minutes: 10 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + du /tmp -d 1 -h + du build/rocprof-sys-tests-output -d 1 -h + df -h + rm -fr /tmp/* build/rocprof-sys-tests-output/* + - name: Install timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ run: | cmake --build build --target install --parallel 2 + rm -fr /opt/rocprofiler-systems/ - name: CPack and Install working-directory: projects/rocprofiler-systems/ diff --git a/projects/rocprofiler-systems/CMakeLists.txt b/projects/rocprofiler-systems/CMakeLists.txt index 07e99458ea..861df97f38 100644 --- a/projects/rocprofiler-systems/CMakeLists.txt +++ b/projects/rocprofiler-systems/CMakeLists.txt @@ -217,6 +217,9 @@ rocprofiler_systems_add_option(ROCPROFSYS_BUILD_DYNINST "Build dyninst from subm rocprofiler_systems_add_option(ROCPROFSYS_BUILD_LIBUNWIND "Build libunwind from submodule" ON ) +rocprofiler_systems_add_option(ROCPROFSYS_BUILD_NLOHMANN_JSON + "Build nlohmann/json from submodule" ON +) rocprofiler_systems_add_option(ROCPROFSYS_BUILD_CODECOV "Build for code coverage" OFF) rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS "Install perfetto tools (i.e. traced, perfetto, etc.)" OFF diff --git a/projects/rocprofiler-systems/cmake/NlohmannJson.cmake b/projects/rocprofiler-systems/cmake/NlohmannJson.cmake new file mode 100644 index 0000000000..7faed38e72 --- /dev/null +++ b/projects/rocprofiler-systems/cmake/NlohmannJson.cmake @@ -0,0 +1,29 @@ +include_guard(GLOBAL) + +if(ROCPROFSYS_BUILD_NLOHMANN_JSON) + message(STATUS "Building nlohmann/json from source") + include(FetchContent) + FetchContent_Declare( + nlohmann_json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG v3.11.3 + SOURCE_DIR + ${PROJECT_BINARY_DIR}/external/nlohmann/src + BINARY_DIR + ${PROJECT_BINARY_DIR}/external/nlohmann/lib + SUBBUILD_DIR + ${PROJECT_BINARY_DIR}/external/nlohmann/subdir + ) + FetchContent_MakeAvailable(nlohmann_json) + + target_include_directories( + rocprofiler-systems-json + SYSTEM + INTERFACE $ + ) + target_link_libraries(rocprofiler-systems-json INTERFACE nlohmann_json) +else() + message(STATUS "Using system nlohmann/json library") + find_package(nlohmann_json REQUIRED) + target_link_libraries(rocprofiler-systems-json INTERFACE nlohmann_json::nlohmann_json) +endif() diff --git a/projects/rocprofiler-systems/cmake/Packages.cmake b/projects/rocprofiler-systems/cmake/Packages.cmake index 75c93858c0..29a1607db2 100644 --- a/projects/rocprofiler-systems/cmake/Packages.cmake +++ b/projects/rocprofiler-systems/cmake/Packages.cmake @@ -56,6 +56,9 @@ rocprofiler_systems_add_interface_library(rocprofiler-systems-perfetto rocprofiler_systems_add_interface_library(rocprofiler-systems-sqlite3 "Use SQLite3 for rocpd data storage" ) +rocprofiler_systems_add_interface_library(rocprofiler-systems-json + "Use nlohmann/json for json data handling" +) rocprofiler_systems_add_interface_library(rocprofiler-systems-timemory "Provides timemory libraries" ) @@ -543,6 +546,14 @@ include(Perfetto) include(SQLite3) +# ----------------------------------------------------------------------------------------# +# +# NlohmannJson +# +# ----------------------------------------------------------------------------------------# + +include(NlohmannJson) + # ----------------------------------------------------------------------------------------# # # ELFIO diff --git a/projects/rocprofiler-systems/source/lib/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/CMakeLists.txt index e6699c833d..d0f17dfe41 100644 --- a/projects/rocprofiler-systems/source/lib/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/CMakeLists.txt @@ -41,6 +41,7 @@ target_link_libraries( $ $ $ + $ $ $ $ diff --git a/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt index 51eb738b79..0d78fa985d 100644 --- a/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt @@ -123,6 +123,7 @@ target_link_libraries( $ $ $ + $ $ $ $ diff --git a/projects/rocprofiler-systems/source/lib/core/agent.hpp b/projects/rocprofiler-systems/source/lib/core/agent.hpp index 6e8babd41b..ba31693dea 100644 --- a/projects/rocprofiler-systems/source/lib/core/agent.hpp +++ b/projects/rocprofiler-systems/source/lib/core/agent.hpp @@ -55,9 +55,6 @@ struct agent size_t device_type_index{ 0 }; size_t base_id{ 0 }; -#if ROCPROFSYS_USE_ROCM > 0 - amdsmi_processor_handle smi_handle = nullptr; -#endif }; } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp b/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp index cfe11b512e..0957ab7713 100644 --- a/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp +++ b/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp @@ -29,12 +29,16 @@ namespace rocprofsys { agent_manager& -agent_manager::get_instance() +get_agent_manager_instance() { - static agent_manager instance; - return instance; + static agent_manager _instance; + return _instance; } +agent_manager::agent_manager(std::vector> agents) +: _agents(std::move(agents)) +{} + void agent_manager::insert_agent(agent& _agent) { diff --git a/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp b/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp index ee3b556514..7ca2d19d48 100644 --- a/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp +++ b/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp @@ -32,8 +32,8 @@ namespace rocprofsys struct agent_manager { - static agent_manager& get_instance(); - + agent_manager() = default; + agent_manager(std::vector> agents); agent_manager(const agent_manager&) = delete; agent_manager& operator=(const agent_manager&) = delete; agent_manager(agent_manager&&) = delete; @@ -57,7 +57,9 @@ private: std::vector> _agents; size_t _gpu_agents_cnt{ 0 }; size_t _cpu_agents_cnt{ 0 }; - agent_manager() = default; }; +agent_manager& +get_agent_manager_instance(); + } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index b98cd3f4f1..afee8ca4d4 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -2365,14 +2365,13 @@ get_tmpdir() } std::string -get_database_absolute_path(std::string_view database_name) +get_database_absolute_path(std::string_view database_name, std::string_view suffix) { const auto* _existing_path = std::getenv("ROCPROFSYS_DATABASE_DIR"); auto _dir = _existing_path ? std::string{ _existing_path } : std::string{}; auto _ext = std::string{ "db" }; - auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(), - settings::default_process_suffix(), + auto _cfg = settings::compose_filename_config{ settings::use_output_suffix(), suffix, false, _dir }; const auto get_path = [](const std::string& path) { @@ -2381,8 +2380,9 @@ get_database_absolute_path(std::string_view database_name) : std::string{}; }; - auto _val = settings::compose_output_filename(std::string(database_name), _ext, _cfg); - _dir = get_path(_val); + auto _val = + settings::compose_output_filename(std::string{ database_name }, _ext, _cfg); + _dir = get_path(_val); setenv("ROCPROFSYS_DATABASE_DIR", _dir.c_str(), 1); diff --git a/projects/rocprofiler-systems/source/lib/core/config.hpp b/projects/rocprofiler-systems/source/lib/core/config.hpp index 14ac0894a7..2213160002 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.hpp +++ b/projects/rocprofiler-systems/source/lib/core/config.hpp @@ -362,7 +362,7 @@ std::string get_tmpdir(); std::string -get_database_absolute_path(std::string_view database_name); +get_database_absolute_path(std::string_view database_name, std::string_view tag); bool& get_use_rocpd() ROCPROFSYS_HOT; diff --git a/projects/rocprofiler-systems/source/lib/core/cpu.cpp b/projects/rocprofiler-systems/source/lib/core/cpu.cpp index 2a484eccfd..9e824b1c18 100644 --- a/projects/rocprofiler-systems/source/lib/core/cpu.cpp +++ b/projects/rocprofiler-systems/source/lib/core/cpu.cpp @@ -166,7 +166,7 @@ query_cpu_agents() return; } - auto& _agent_manager = agent_manager::get_instance(); + auto& _agent_manager = get_agent_manager_instance(); auto cpu_data = get_cpu_info(); for(auto& cpu : cpu_data) diff --git a/projects/rocprofiler-systems/source/lib/core/gpu.cpp b/projects/rocprofiler-systems/source/lib/core/gpu.cpp index e35e15312f..cc19d64a3f 100644 --- a/projects/rocprofiler-systems/source/lib/core/gpu.cpp +++ b/projects/rocprofiler-systems/source/lib/core/gpu.cpp @@ -121,7 +121,7 @@ query_rocm_agents() auto iterator = []([[maybe_unused]] rocprofiler_agent_version_t version, const void** agents, size_t num_agents, [[maybe_unused]] void* user_data) -> rocprofiler_status_t { - auto& _agent_manager = agent_manager::get_instance(); + auto& _agent_manager = get_agent_manager_instance(); for(size_t i = 0; i < num_agents; ++i) { const auto* _agent = static_cast(agents[i]); @@ -153,7 +153,7 @@ query_rocm_agents() 1, "Exception thrown getting the rocm agents: %s. _dev_cnt=%ld\n", _e.what(), _dev_cnt); } - _dev_cnt = agent_manager::get_instance().get_gpu_agents_count(); + _dev_cnt = get_agent_manager_instance().get_gpu_agents_count(); #endif return _dev_cnt; } diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt index db789f7254..9deca07388 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt @@ -20,15 +20,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -set(rocpd_sources - ${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp - ${CMAKE_CURRENT_LIST_DIR}/json.cpp -) +set(rocpd_sources ${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp) -set(rocpd_headers - ${CMAKE_CURRENT_LIST_DIR}/data_processor.hpp - ${CMAKE_CURRENT_LIST_DIR}/json.hpp -) +set(rocpd_headers ${CMAKE_CURRENT_LIST_DIR}/data_processor.hpp) target_sources(rocprofiler-systems-core-library PRIVATE ${rocpd_sources} ${rocpd_headers}) diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp index b1ab7cc350..00174c4be0 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp @@ -24,17 +24,23 @@ #include "core/rocpd/data_storage/database.hpp" #include "core/rocpd/data_storage/table_insert_query.hpp" #include "debug.hpp" +#include +#include namespace rocprofsys { namespace rocpd { -data_processor::data_processor() +data_processor::data_processor(std::shared_ptr database) +: _database(std::move(database)) { - data_storage::database::get_instance().initialize_schema(); - _upid = data_storage::database::get_instance().get_upid(); + if(!_database) + { + throw std::invalid_argument("Provided pointer to a non-existing database!"); + } + _database->initialize_schema(); + _upid = _database->get_upid(); - // Initialize event statement initialize_event_stmt(); initialize_pmc_event_stmt(); initialize_sample_stmt(); @@ -48,22 +54,14 @@ data_processor::data_processor() initialize_memory_alloc_stmt(); } -data_processor& -data_processor::get_instance() -{ - static data_processor _instance; - return _instance; -} - void data_processor::initialize_metadata() { data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( - query.set_table_name("rocpd_metadata_" + _upid) - .set_columns("tag", "value") - .set_values("upid", _upid) - .get_query_string()); + _database->execute_query(query.set_table_name("rocpd_metadata_" + _upid) + .set_columns("tag", "value") + .set_values("upid", _upid) + .get_query_string()); } size_t @@ -73,13 +71,12 @@ data_processor::insert_string(const char* str) if(it != _string_map.end()) return _string_map.at(str); data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( - query.set_table_name("rocpd_string_" + _upid) - .set_columns("guid", "string") - .set_values(_upid, str) - .get_query_string()); + _database->execute_query(query.set_table_name("rocpd_string_" + _upid) + .set_columns("guid", "string") + .set_values(_upid, str) + .get_query_string()); - const auto string_id = data_storage::database::get_instance().get_last_insert_id(); + const auto string_id = _database->get_last_insert_id(); _string_map.emplace(str, string_id); return string_id; } @@ -91,7 +88,7 @@ data_processor::insert_node_info(size_t node_id, size_t hash, const char* machin const char* hardware_name, const char* domain_name) { data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( + _database->execute_query( query.set_table_name("rocpd_info_node_" + _upid) .set_columns("id", "guid", "hash", "machine_id", "system_name", "hostname", "release", "version", "hardware_name", "domain_name") @@ -107,13 +104,13 @@ data_processor::insert_process_info(size_t nid, size_t ppid, size_t pid, size_t const char* extdata) { data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( - query.set_table_name("rocpd_info_process_" + _upid) - .set_columns("id", "guid", "nid", "ppid", "pid", "init", "fini", "start", - "end", "command", "environment", "extdata") - .set_values(pid, _upid, nid, ppid, pid, init, fini, start, end, command, - environment, extdata) - .get_query_string()); + _database->execute_query(query.set_table_name("rocpd_info_process_" + _upid) + .set_columns("id", "guid", "nid", "ppid", "pid", "init", + "fini", "start", "end", "command", + "environment", "extdata") + .set_values(pid, _upid, nid, ppid, pid, init, fini, + start, end, command, environment, extdata) + .get_query_string()); } size_t @@ -125,7 +122,7 @@ data_processor::insert_agent(size_t node_id, size_t pid, const char* agent_type, const char* extdata) { data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( + _database->execute_query( query.set_table_name("rocpd_info_agent_" + _upid) .set_columns("guid", "nid", "pid", "type", "absolute_index", "logical_index", "type_index", "uuid", "name", "model_name", "vendor_name", @@ -135,7 +132,7 @@ data_processor::insert_agent(size_t node_id, size_t pid, const char* agent_type, user_name, extdata) .get_query_string()); - return data_storage::database::get_instance().get_last_insert_id(); + return _database->get_last_insert_id(); } void @@ -151,13 +148,13 @@ data_processor::insert_track(const char* track_name, size_t node_id, size_t proc auto name_id = insert_string(track_name); data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( + _database->execute_query( query.set_table_name("rocpd_track_" + _upid) .set_columns("guid", "nid", "pid", "tid", "name_id", "extdata") .set_values(_upid, node_id, process_id, thread_id, name_id, extdata) .get_query_string()); - auto track_id = data_storage::database::get_instance().get_last_insert_id(); + auto track_id = _database->get_last_insert_id(); _tracks[track_name] = track_name_map{ track_id, name_id }; } @@ -191,9 +188,9 @@ data_processor::insert_pmc_description( component, units, value_type, block, expression, is_constant, is_derived, extdata) .get_query_string(); - data_storage::database::get_instance().execute_query(query); + _database->execute_query(query); - auto pmc_id = data_storage::database::get_instance().get_last_insert_id(); + auto pmc_id = _database->get_last_insert_id(); _pmc_descriptor_map.emplace( std::pair{ { agent_id, name }, pmc_id }); } @@ -248,7 +245,7 @@ data_processor::insert_event(size_t string_primary_key, size_t stack_id, { _insert_event_statement(_upid.c_str(), string_primary_key, stack_id, parent_stack_id, correlation_id, call_stack, line_info, extdata); - return data_storage::database::get_instance().get_last_insert_id(); + return _database->get_last_insert_id(); } void @@ -261,9 +258,9 @@ data_processor::initialize_event_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); _insert_event_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _database->create_statement_executor( + query); } void @@ -275,8 +272,8 @@ data_processor::initialize_pmc_event_stmt() .set_values('?', '?', '?', '?', '?') .get_query_string(); _insert_pmc_event_statement = - data_storage::database::get_instance() - .create_statement_executor( + _database + ->create_statement_executor( query); } @@ -289,9 +286,8 @@ data_processor::initialize_sample_stmt() .set_values('?', '?', '?', '?', '?') .get_query_string(); _insert_sample_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _database->create_statement_executor(query); } void @@ -304,9 +300,9 @@ data_processor::initialize_region_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); _insert_region_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _database + ->create_statement_executor(query); } void @@ -323,13 +319,10 @@ data_processor::initialize_kernel_dispatch_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); - _insert_kernel_dispatch_statement = - data_storage::database::get_instance() - .create_statement_executor( - query); + _insert_kernel_dispatch_statement = _database->create_statement_executor< + const char*, size_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, + uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, size_t, size_t, + size_t, size_t, size_t, const char*>(query); } void @@ -344,12 +337,9 @@ data_processor::initialize_memory_copy_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); - _insert_memory_copy_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _insert_memory_copy_statement = _database->create_statement_executor< + const char*, size_t, size_t, size_t, uint64_t, uint64_t, size_t, size_t, size_t, + size_t, size_t, size_t, size_t, size_t, size_t, size_t, const char*>(query); } void @@ -367,11 +357,11 @@ data_processor::initialize_kernel_symbol_stmt() '?', '?', '?') .get_query_string(); _insert_kernel_symbol_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _database->create_statement_executor( + query); } void @@ -385,10 +375,9 @@ data_processor::initialize_code_object_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); _insert_code_object_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _database->create_statement_executor(query); } void @@ -401,9 +390,9 @@ data_processor::initialize_args_stmt() .set_values('?', '?', '?', '?', '?', '?', '?') .get_query_string(); _insert_args_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _database->create_statement_executor( + query); } void @@ -417,12 +406,9 @@ data_processor::initialize_memory_alloc_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); - _insert_memory_alloc_statement = - data_storage::database::get_instance() - .create_statement_executor< - const char*, size_t, size_t, size_t, size_t, const char*, const char*, - uint64_t, uint64_t, size_t, size_t, size_t, size_t, size_t, const char*>( - query); + _insert_memory_alloc_statement = _database->create_statement_executor< + const char*, size_t, size_t, size_t, size_t, const char*, const char*, uint64_t, + uint64_t, size_t, size_t, size_t, size_t, size_t, const char*>(query); // Statement without agent_id query = query_builder.set_table_name("rocpd_memory_allocate_" + _upid) @@ -432,11 +418,9 @@ data_processor::initialize_memory_alloc_stmt() .set_values('?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?') .get_query_string(); - _insert_memory_alloc_no_agent_statement = - data_storage::database::get_instance() - .create_statement_executor(query); + _insert_memory_alloc_no_agent_statement = _database->create_statement_executor< + const char*, size_t, size_t, size_t, const char*, const char*, uint64_t, uint64_t, + size_t, size_t, size_t, size_t, size_t, const char*>(query); } void @@ -451,7 +435,7 @@ data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t proc const char* name, const char* extdata) { data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( + _database->execute_query( query.set_table_name("rocpd_info_stream_" + _upid) .set_columns("id", "guid", "nid", "pid", "name", "extdata") .set_values(stream_id, _upid, node_id, process_id, name, extdata) @@ -463,7 +447,7 @@ data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t proces const char* name, const char* extdata) { data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( + _database->execute_query( query.set_table_name("rocpd_info_queue_" + _upid) .set_columns("id", "guid", "nid", "pid", "name", "extdata") .set_values(queue_id, _upid, node_id, process_id, name, extdata) @@ -575,15 +559,15 @@ data_processor::insert_thread_info(size_t node_id, size_t parent_process_id, } data_storage::queries::table_insert_query query; - data_storage::database::get_instance().execute_query( - query.set_table_name("rocpd_info_thread_" + _upid) - .set_columns("guid", "nid", "ppid", "pid", "tid", "name", "start", "end", - "extdata") - .set_values(_upid.c_str(), node_id, parent_process_id, process_id, thread_id, - name, start, end, extdata) - .get_query_string()); + _database->execute_query(query.set_table_name("rocpd_info_thread_" + _upid) + .set_columns("guid", "nid", "ppid", "pid", "tid", "name", + "start", "end", "extdata") + .set_values(_upid.c_str(), node_id, parent_process_id, + process_id, thread_id, name, start, end, + extdata) + .get_query_string()); - auto thread_idx = data_storage::database::get_instance().get_last_insert_id(); + auto thread_idx = _database->get_last_insert_id(); _thread_id_map.emplace(thread_id, thread_idx); return thread_idx; } @@ -604,7 +588,7 @@ void data_processor::flush() { // Flush all pending data to the database - data_storage::database::get_instance().flush(); + _database->flush(); } } // namespace rocpd diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp index 0eeb9a533f..2a38c357f4 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp @@ -22,11 +22,11 @@ #pragma once +#include "core/rocpd/data_storage/database.hpp" #include #include -#include +#include #include -#include #include #include @@ -102,7 +102,13 @@ private: }; public: - static data_processor& get_instance(); + explicit data_processor(std::shared_ptr database); + + data_processor() = delete; + data_processor(const data_processor&) = delete; + data_processor& operator=(const data_processor&) = delete; + data_processor(const data_processor&&) = delete; + data_processor& operator=(const data_processor&&) = delete; size_t insert_string(const char* str); @@ -203,10 +209,6 @@ public: void flush(); private: - data_processor(); - data_processor(data_processor&) = delete; - data_processor& operator=(const data_processor&) = delete; - void initialize_pmc_event_stmt(); void initialize_event_stmt(); void initialize_sample_stmt(); @@ -220,6 +222,7 @@ private: void initialize_memory_alloc_stmt(); private: + std::shared_ptr _database; std::unordered_map _tracks; std::unordered_map _pmc_descriptor_map; diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp index 9cd99925ff..f8c76e2a54 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -50,17 +51,11 @@ namespace rocpd { namespace data_storage { -database& -database::get_instance() +database::database(int pid, int ppid) { - static database _instance; - return _instance; -} - -database::database() -{ - auto db_name = std::string_view{ "rocpd.db" }; - auto abs_db_path = rocprofsys::get_database_absolute_path(db_name); + auto _tag = std::to_string(pid); + auto db_name = std::string{ "rocpd" }; + auto abs_db_path = rocprofsys::get_database_absolute_path(db_name, _tag); create_directory_for_database_file(abs_db_path); ROCPROFSYS_VERBOSE(0, "Database: %s\r\n", abs_db_path.c_str()); @@ -68,6 +63,7 @@ database::database() "database open failed!"); validate_sqlite3_result(sqlite3_open(abs_db_path.c_str(), &_sqlite3_db), "", "database open failed!"); + m_upid = generate_upid(pid, ppid); } database::~database() @@ -120,8 +116,10 @@ database::initialize_schema() std::regex guid_pattern("\\{\\{guid\\}\\}"); std::regex view_upid_pattern("\\{\\{view_upid\\}\\}"); - query = std::regex_replace(query, upid_pattern, "_" + get_upid()); - query = std::regex_replace(query, guid_pattern, get_upid()); + auto upid = get_upid(); + + query = std::regex_replace(query, upid_pattern, "_" + upid); + query = std::regex_replace(query, guid_pattern, upid); query = std::regex_replace(query, view_upid_pattern, ""); validate_sqlite3_result( @@ -141,12 +139,15 @@ database::execute_query(const std::string& query) std::string database::get_upid() { - static std::string _upid = []() { - auto n_info = node_info::get_instance(); - auto guid = common::md5sum{ n_info.id, getpid(), getppid() }; - return guid.hexdigest(); - }(); - return _upid; + return m_upid; +} + +std::string +database::generate_upid(const int pid, const int ppid) +{ + auto n_info = node_info::get_instance(); + auto guid = common::md5sum{ n_info.id, pid, ppid }; + return guid.hexdigest(); } size_t diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.hpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.hpp index 2fdcf80825..503108d061 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.hpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.hpp @@ -38,27 +38,29 @@ static std::mutex _mutex; class database { public: - static database& get_instance(); - - database(database&) = delete; - database& operator=(database&) = delete; + explicit database(int pid, int ppid); + database() = delete; + database(database&) = delete; + database& operator=(database&) = delete; + database(database&&) = default; + database& operator=(database&&) = default; void flush(); ~database(); private: - database(); - template - inline void validate_sqlite3_result(int sqlite3_error_code, const char* query, - Args&&... args) + void validate_sqlite3_result(int sqlite3_error_code, const char* query, + Args&&... args) { std::stringstream ss; ss << "\n===========================================================\n"; ss << "Database Error\n"; ((ss << args << " "), ...); ss << "\nQuery: " << query << "\n"; + // Fetch error message of last sqlite3_* call + const auto* error_message = sqlite3_errstr(sqlite3_error_code); switch(sqlite3_error_code) { case SQLITE_OK: @@ -98,7 +100,7 @@ private: } break; } - ss << " [Sqlite3 error: " << sqlite3_errstr(sqlite3_error_code); + ss << " [Sqlite3 error: " << error_message; ss << " (Extended error message: " << sqlite3_errmsg(_sqlite3_db_temp) << ")]"; throw std::runtime_error(ss.str()); } @@ -110,17 +112,16 @@ private: std::is_same_v, int32_t> || std::is_same_v, uint32_t>), int> = 0> - inline void bind_value([[maybe_unused]] sqlite3_stmt* stmt, - [[maybe_unused]] int position, [[maybe_unused]] T& _value, - [[maybe_unused]] const std::string& query) + void bind_value([[maybe_unused]] sqlite3_stmt* stmt, [[maybe_unused]] int position, + [[maybe_unused]] T& _value, [[maybe_unused]] const std::string& query) { throw std::runtime_error("Unsupported type for binding!"); } template (), int> = 0> - inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value, - const std::string& query) + void bind_value(sqlite3_stmt* stmt, int position, T&& _value, + const std::string& query) { validate_sqlite3_result( sqlite3_bind_text(stmt, position, _value, -1, SQLITE_STATIC), query.c_str(), @@ -129,8 +130,8 @@ private: template >, int> = 0> - inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value, - const std::string& query) + void bind_value(sqlite3_stmt* stmt, int position, T&& _value, + const std::string& query) { validate_sqlite3_result( sqlite3_bind_double(stmt, position, _value), query.c_str(), @@ -140,8 +141,8 @@ private: template , int64_t> || std::is_same_v, uint64_t>, int> = 0> - inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value, - const std::string& query) + void bind_value(sqlite3_stmt* stmt, int position, T&& _value, + const std::string& query) { validate_sqlite3_result(sqlite3_bind_int64(stmt, position, _value), query.c_str(), "Failed to bind int64_t/uint64_t! Position: ", position, @@ -151,8 +152,8 @@ private: template , int32_t> || std::is_same_v, uint32_t>, int> = 0> - inline void bind_value(sqlite3_stmt* stmt, int position, T&& _value, - const std::string& query) + void bind_value(sqlite3_stmt* stmt, int position, T&& _value, + const std::string& query) { validate_sqlite3_result(sqlite3_bind_int(stmt, position, _value), query.c_str(), "Failed to bind int32_t/uint32_t! Position: ", position, @@ -192,11 +193,16 @@ public: }; } - static std::string get_upid(); + std::string get_upid(); private: - sqlite3* _sqlite3_db{ nullptr }; - sqlite3* _sqlite3_db_temp{ nullptr }; + static std::string generate_upid(const int pid, const int ppid); + +private: + sqlite3* _sqlite3_db{ nullptr }; + sqlite3* _sqlite3_db_temp{ nullptr }; + std::string m_tag; + std::string m_upid; }; } // namespace data_storage diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/json.cpp b/projects/rocprofiler-systems/source/lib/core/rocpd/json.cpp deleted file mode 100644 index aa7a81bb3c..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/json.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// MIT License -// -// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "json.hpp" -#include - -namespace rocpd -{ - -std::shared_ptr -json::create() -{ - return std::shared_ptr(new json()); -} - -void -json::set(const std::string& key, const json_value& value) -{ - data[key] = std::make_shared(value); -} - -std::string -json::to_string() const -{ - std::ostringstream oss; - oss << "{"; - bool first = true; - - for(const auto& [key, value] : data) - { - if(!first) oss << ", "; - first = false; - - oss << "\"" << key << "\": " << stringify(value); - } - - oss << "}"; - return oss.str(); -} - -std::string -json::stringify(const std::shared_ptr& value) -{ - std::ostringstream oss; - std::visit( - [&oss](auto&& arg) { - using T = std::decay_t; - if constexpr(std::is_same_v) - oss << "\"" << arg << "\""; - else if constexpr(std::is_same_v) - oss << (arg ? "true" : "false"); - else if constexpr(std::is_same_v) - oss << "null"; - else if constexpr(std::is_same_v>) - { - oss << "["; - bool first = true; - for(const auto& item : arg) - { - if(!first) oss << ", "; - first = false; - oss << item.to_string(); - } - oss << "]"; - } - else if constexpr(std::is_same_v>) - { - oss << arg->to_string(); - } - else - { - // handle int + double - oss << arg; - } - }, - *value); - return oss.str(); -} - -} // namespace rocpd diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/json.hpp b/projects/rocprofiler-systems/source/lib/core/rocpd/json.hpp deleted file mode 100644 index d47eaf4bb0..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/json.hpp +++ /dev/null @@ -1,57 +0,0 @@ -// MIT License -// -// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include -#include -#include -#include -#include - -namespace rocpd -{ - -class json -{ -public: - static std::shared_ptr create(); - - using json_value = - std::variant, - std::nullptr_t, std::shared_ptr>; - - void set(const std::string& key, const json_value& value); - - std::string to_string() const; - -private: - json() = default; - -private: - static std::string stringify(const std::shared_ptr& value); - -private: - std::unordered_map> data; -}; - -} // namespace rocpd diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp index 33241a4f19..9239457891 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include using namespace std::chrono_literals; @@ -45,20 +46,27 @@ constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms; constexpr auto NUM_OF_THREADS = 1; } // namespace -buffer_storage::buffer_storage(pid_t _pid) +buffer_storage::buffer_storage() { ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); m_thread_pool = std::make_unique(NUM_OF_THREADS); m_thread_pool->initialize_threadpool(NUM_OF_THREADS); m_task_group = std::make_unique>(m_thread_pool.get()); +} + +void +buffer_storage::start_flushing_thread(pid_t _pid) +{ + ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); m_task_group->exec([this, _pid]() { - std::ofstream _ofs(filename, std::ios::binary | std::ios::out); + auto filepath = get_buffered_storage_filename(get_root_process_id(), getpid()); + std::ofstream _ofs(filepath, std::ios::binary | std::ios::out); if(!_ofs) { std::stringstream _ss; - _ss << "Error opening file for writing: " << filename; + _ss << "Error opening file for writing: " << filepath; throw std::runtime_error(_ss.str()); } @@ -114,9 +122,23 @@ buffer_storage::buffer_storage(pid_t _pid) }); } +buffer_storage::~buffer_storage() +{ + shutdown(); + if(m_thread_pool && m_thread_pool->is_alive()) + { + m_thread_pool->destroy_threadpool(); + } +} + void buffer_storage::shutdown() { + if(!m_running) + { + return; + } + ROCPROFSYS_DEBUG("Buffer storage shutting down.."); m_running = false; m_shutdown_condition.notify_all(); @@ -131,7 +153,11 @@ buffer_storage::shutdown() std::mutex _exit_mutex; std::unique_lock _exit_lock{ _exit_mutex }; m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; }); - m_thread_pool->destroy_threadpool(); + + if(m_thread_pool && m_thread_pool->is_alive()) + { + m_thread_pool->destroy_threadpool(); + } } void diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp index fdddf14d93..c72a6313a7 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -62,9 +61,10 @@ public: } constexpr bool is_supported_type = (supported_types::is_supported && ...); - static_assert(is_supported_type, "Supported types are const char*, char*, " - "unsigned long, unsigned int, long, unsigned " - "char, std::vector and int."); + static_assert(is_supported_type, + "Supported types are const char*, char*, " + "unsigned long, unsigned int, long, unsigned " + "char, std::vector, double, and int."); auto arg_size = get_size(values...); auto total_size = arg_size + sizeof(type) + sizeof(size_t); @@ -102,9 +102,12 @@ public: (store_value(values), ...); } + void start_flushing_thread(pid_t pid); + ~buffer_storage(); + private: friend class cache_manager; - buffer_storage(pid_t _pid); + buffer_storage(); void shutdown(); bool is_running() const; void fragment_memory(); @@ -119,7 +122,7 @@ private: }; using supported_types = typelist, uint8_t, int64_t>; + std::vector, uint8_t, int64_t, double>; template static constexpr bool is_string_literal_v = diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp index dbecaf01ca..73aa345bf7 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp @@ -21,15 +21,91 @@ // SOFTWARE. #include "cache_manager.hpp" +#include "agent_manager.hpp" #include "core/config.hpp" #include "core/trace_cache/storage_parser.hpp" #include "debug.hpp" +#include "library/runtime.hpp" +#include "trace_cache/cache_utility.hpp" +#include "trace_cache/metadata_registry.hpp" #include "trace_cache/rocpd_post_processing.hpp" +#include +#include +#include namespace rocprofsys { namespace trace_cache { +namespace +{ +std::vector +list_dir_files(const std::string& path) +{ + DIR* dir = opendir(path.c_str()); + if(dir == nullptr) + { + ROCPROFSYS_THROW("Error opening directory: %s", path.c_str()); + } + + std::vector result{}; + dirent* entry; + + while((entry = readdir(dir)) != nullptr) + { + if(std::string(entry->d_name) != "." && std::string(entry->d_name) != "..") + { + result.emplace_back(entry->d_name); + } + } + + closedir(dir); + return result; +} + +struct cache_files +{ + std::string buff_storage; + std::string metadata; +}; + +std::map +get_cache_files() +{ + const auto root_pid = get_root_process_id(); + const auto tmp_files = list_dir_files("/tmp/"); + + std::map cache_map{}; + + auto parse_and_fill_cache = [&](const std::string& filename) { + const std::regex buff_regex(R"(buffered_storage_(\d+)_(\d+)\.bin)"); + const std::regex meta_regex(R"(metadata_(\d+)_(\d+)\.json)"); + std::smatch match; + + if(std::regex_match(filename, match, buff_regex)) + { + int parent_pid = std::stoi(match[1]); + int pid = std::stoi(match[2]); + if(parent_pid == root_pid) + { + cache_map[pid].buff_storage = "/tmp/" + filename; + } + } + else if(std::regex_match(filename, match, meta_regex)) + { + int parent_pid = std::stoi(match[1]); + int pid = std::stoi(match[2]); + if(parent_pid == root_pid) + { + cache_map[pid].metadata = "/tmp/" + filename; + } + } + }; + + std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache); + return cache_map; +} +} // namespace cache_manager& cache_manager::get_instance() @@ -38,35 +114,82 @@ cache_manager::get_instance() return instance; } -cache_manager::cache_manager() -: m_postprocessing{ m_metadata } -{ - m_postprocessing.register_parser_callback(m_parser); -} - void -cache_manager::post_process() +cache_manager::post_process_bulk() { - if(m_storage.is_running()) + if(is_root_process()) { - ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down " - "cache storage. Calling shutdown explicitly..\n"); - shutdown(); - } + if(m_storage.is_running()) + { + ROCPROFSYS_WARNING(2, + "Postprocessing called without previously shutting down " + "cache storage. Calling shutdown explicitly..\n"); + shutdown(); + } - if(get_use_rocpd()) - { - ROCPROFSYS_PRINT( - "Generating rocpd with collected data. This may take a while..\n"); - } - post_process_metadata(); - m_parser.consume_storage(); -} + if(get_use_rocpd()) + { + ROCPROFSYS_PRINT( + "Generating rocpd with collected data. This may take a while..\n"); -void -cache_manager::post_process_metadata() -{ - m_postprocessing.post_process_metadata(); + auto _cache_files = get_cache_files(); + + std::vector rocpd_threads; + ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + + rocpd_threads.emplace_back([this]() { + auto pid = getpid(); + auto ppid = get_root_process_id(); + rocpd_post_processing _post_processing( + m_metadata, get_agent_manager_instance(), pid, ppid); + storage_parser _parser( + get_buffered_storage_filename(get_root_process_id(), getpid())); + _post_processing.register_parser_callback(_parser); + _post_processing.post_process_metadata(); + _parser.consume_storage(); + }); + + for(const auto& [pid, files] : _cache_files) + { + if(!files.buff_storage.empty() && !files.metadata.empty()) + { + rocpd_threads.emplace_back([pid = pid, files = files]() { + ROCPROFSYS_DEBUG( + "Creating database for [%d] from buffered storage " + "file: %s and from metadata file: %s\n", + pid, files.buff_storage.c_str(), files.metadata.c_str()); + + std::vector> _agents; + metadata_registry _metadata; + + auto res = _metadata.load_from_file(files.metadata, _agents); + if(!res) + { + ROCPROFSYS_WARNING(0, + "Load from file for metadata failed: %s\n", + files.metadata.c_str()); + return; + } + + agent_manager _agent_manager{ _agents }; + auto ppid = get_root_process_id(); + rocpd_post_processing _post_processing(_metadata, _agent_manager, + pid, ppid); + storage_parser _parser(files.buff_storage); + _post_processing.register_parser_callback(_parser); + _post_processing.post_process_metadata(); + _parser.consume_storage(); + std::remove(files.metadata.c_str()); // Remove metadata file + }); + } + } + + for(auto& thread : rocpd_threads) + { + thread.join(); + } + } + } } void diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp index acbc447a34..86953f0454 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp @@ -23,7 +23,6 @@ #pragma once #include "buffer_storage.hpp" -#include "core/trace_cache/rocpd_post_processing.hpp" #include "metadata_registry.hpp" #include "storage_parser.hpp" @@ -37,24 +36,22 @@ class cache_manager public: static cache_manager& get_instance(); buffer_storage& get_buffer_storage() { return m_storage; } - metadata_registry& get_metadata_regsitry() { return m_metadata; } + metadata_registry& get_metadata_registry() { return m_metadata; } void shutdown(); - void post_process(); + void post_process_bulk(); private: void post_process_metadata(); - cache_manager(); + cache_manager() = default; - buffer_storage m_storage{ getpid() }; - metadata_registry m_metadata; - storage_parser m_parser{ getpid() }; - rocpd_post_processing m_postprocessing; + buffer_storage m_storage; + metadata_registry m_metadata; }; inline metadata_registry& get_metadata_registry() { - return cache_manager::get_instance().get_metadata_regsitry(); + return cache_manager::get_instance().get_metadata_registry(); } inline buffer_storage& diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp index d59cf8c3e5..b8a1bedd95 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp @@ -21,6 +21,7 @@ // SOFTWARE. #pragma once +#include "library/runtime.hpp" #include "sample_type.hpp" #include #include @@ -33,7 +34,18 @@ namespace trace_cache { constexpr size_t buffer_size = 100 * tim::units::megabyte; constexpr size_t flush_threshold = 80 * tim::units::megabyte; -const auto filename = "/tmp/buffered_storage_" + std::to_string(getpid()) + ".bin"; + +const auto tmp_directory = std::string{ "/tmp/" }; + +const auto get_buffered_storage_filename = [](const int& ppid, const int& pid) { + return std::string{ tmp_directory + "buffered_storage_" + std::to_string(ppid) + "_" + + std::to_string(pid) + ".bin" }; +}; + +const auto get_metadata_filepath = [](const int& ppid, const int& pid) { + return std::string{ tmp_directory + "metadata_" + std::to_string(ppid) + "_" + + std::to_string(pid) + ".json" }; +}; constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t); using buffer_array_t = std::array; diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp index 0284f3f585..9eed5c9d5e 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp @@ -21,10 +21,15 @@ // SOFTWARE. #include "metadata_registry.hpp" +#include "agent_manager.hpp" #include "core/debug.hpp" #include #include +#include + +#include + namespace rocprofsys { namespace trace_cache @@ -51,6 +56,428 @@ assign_set_to_vector(T& result) { return [&result](const auto& _data) { result.assign(_data.cbegin(), _data.cend()); }; } + +nlohmann::json +to_json(const info::process& process) +{ + nlohmann::json result; + result["pid"] = process.pid; + result["ppid"] = process.ppid; + result["command"] = process.command; + result["start"] = process.start; + result["end"] = process.end; + return result; +} + +info::process +from_json_process(const nlohmann::json& _json) +{ + info::process p; + p.pid = _json["pid"].get(); + p.ppid = _json["ppid"].get(); + p.command = _json["command"].get(); + p.start = _json["start"].get(); + p.end = _json["end"].get(); + return p; +} + +nlohmann::json +to_json(const info::pmc& pmc) +{ + nlohmann::json result; + result["type"] = static_cast(pmc.type); + result["agent_type_index"] = static_cast(pmc.agent_type_index); + result["target_arch"] = pmc.target_arch; + result["event_code"] = static_cast(pmc.event_code); + result["instance_id"] = static_cast(pmc.instance_id); + result["name"] = pmc.name; + result["symbol"] = pmc.symbol; + result["description"] = pmc.description; + result["long_description"] = pmc.long_description; + result["component"] = pmc.component; + result["units"] = pmc.units; + result["value_type"] = pmc.value_type; + result["block"] = pmc.block; + result["expression"] = pmc.expression; + result["is_constant"] = pmc.is_constant; + result["is_derived"] = pmc.is_derived; + result["extdata"] = pmc.extdata; + return result; +} + +info::pmc +from_json_pmc(const nlohmann::json& _json) +{ + info::pmc p; + p.type = static_cast(_json["type"].get()); + p.agent_type_index = _json["agent_type_index"].get(); + p.target_arch = _json["target_arch"].get(); + p.event_code = _json["event_code"].get(); + p.instance_id = _json["instance_id"].get(); + p.name = _json["name"].get(); + p.symbol = _json["symbol"].get(); + p.description = _json["description"].get(); + p.long_description = _json["long_description"].get(); + p.component = _json["component"].get(); + p.units = _json["units"].get(); + p.value_type = _json["value_type"].get(); + p.block = _json["block"].get(); + p.expression = _json["expression"].get(); + p.is_constant = _json["is_constant"].get(); + p.is_derived = _json["is_derived"].get(); + p.extdata = _json["extdata"].get(); + return p; +} + +nlohmann::json +to_json(const info::thread& thread) +{ + nlohmann::json result; + result["parent_process_id"] = thread.parent_process_id; + result["process_id"] = thread.process_id; + result["thread_id"] = static_cast(thread.thread_id); + result["start"] = thread.start; + result["end"] = thread.end; + result["extdata"] = thread.extdata; + return result; +} + +info::thread +from_json_thread(const nlohmann::json& _json) +{ + info::thread t; + t.parent_process_id = _json["parent_process_id"].get(); + t.process_id = _json["process_id"].get(); + t.thread_id = _json["thread_id"].get(); + t.start = _json["start"].get(); + t.end = _json["end"].get(); + t.extdata = _json["extdata"].get(); + return t; +} + +nlohmann::json +to_json(const info::track& track) +{ + nlohmann::json result; + result["track_name"] = track.track_name; + if(track.thread_id.has_value()) + { + result["thread_id"] = static_cast(track.thread_id.value()); + } + else + { + result["thread_id"] = nullptr; + } + result["extdata"] = track.extdata; + return result; +} + +info::track +from_json_track(const nlohmann::json& _json) +{ + info::track t; + t.track_name = _json["track_name"].get(); + if(_json["thread_id"].is_null()) + { + t.thread_id = std::nullopt; + } + else + { + t.thread_id = _json["thread_id"].get(); + } + t.extdata = _json["extdata"].get(); + return t; +} + +#if ROCPROFSYS_USE_ROCM + +nlohmann::json +to_json(const rocprofiler_callback_tracing_code_object_load_data_t& code_object) +{ + nlohmann::json result; + result["code_object_id"] = static_cast(code_object.code_object_id); + result["uri"] = std::string(code_object.uri); + result["load_base"] = static_cast(code_object.load_base); + result["load_size"] = static_cast(code_object.load_size); + result["load_delta"] = static_cast(code_object.load_delta); + result["storage_type"] = static_cast(code_object.storage_type); +# if(ROCPROFILER_VERSION >= 600) + result["agent_id_handle"] = static_cast(code_object.agent_id.handle); +# else + result["agent_id_handle"] = static_cast(code_object.rocp_agent.handle); +# endif + return result; +} + +rocprofiler_callback_tracing_code_object_load_data_t +from_json_code_object(const nlohmann::json& _json) +{ + rocprofiler_callback_tracing_code_object_load_data_t co = {}; + co.code_object_id = _json["code_object_id"].get(); + auto uri_str = _json["uri"].get(); + co.uri = new char[uri_str.size() + 1]; + strncpy(const_cast(co.uri), uri_str.c_str(), uri_str.size() + 1); + co.load_base = _json["load_base"].get(); + co.load_size = _json["load_size"].get(); + co.load_delta = _json["load_delta"].get(); + co.storage_type = static_cast( + _json["storage_type"].get()); + auto handle = _json["agent_id_handle"].get(); +# if(ROCPROFILER_VERSION >= 600) + co.agent_id.handle = handle; +# else + co.rocp_agent.handle = handle; +# endif + return co; +} + +nlohmann::json +to_json(const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& + kernel_symbol) +{ + nlohmann::json result; + result["kernel_id"] = static_cast(kernel_symbol.kernel_id); + result["code_object_id"] = static_cast(kernel_symbol.code_object_id); + result["kernel_name"] = std::string(kernel_symbol.kernel_name); + result["kernel_object"] = static_cast(kernel_symbol.kernel_object); + result["kernarg_segment_size"] = static_cast(kernel_symbol.kernarg_segment_size); + result["kernarg_segment_alignment"] = + static_cast(kernel_symbol.kernarg_segment_alignment); + result["group_segment_size"] = static_cast(kernel_symbol.group_segment_size); + result["private_segment_size"] = static_cast(kernel_symbol.private_segment_size); + result["sgpr_count"] = static_cast(kernel_symbol.sgpr_count); + result["arch_vgpr_count"] = static_cast(kernel_symbol.arch_vgpr_count); + result["accum_vgpr_count"] = static_cast(kernel_symbol.accum_vgpr_count); + return result; +} + +rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t +from_json_kernel_symbol(const nlohmann::json& _json) +{ + rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t ks = {}; + ks.kernel_id = _json["kernel_id"].get(); + ks.code_object_id = _json["code_object_id"].get(); + auto kernel_name_str = _json["kernel_name"].get(); + ks.kernel_name = new char[kernel_name_str.size() + 1]; + strncpy(const_cast(ks.kernel_name), kernel_name_str.c_str(), + sizeof(ks.kernel_name) + 1); + ks.kernel_object = _json["kernel_object"].get(); + ks.kernarg_segment_size = _json["kernarg_segment_size"].get(); + ks.kernarg_segment_alignment = _json["kernarg_segment_alignment"].get(); + ks.group_segment_size = _json["group_segment_size"].get(); + ks.private_segment_size = _json["private_segment_size"].get(); + ks.sgpr_count = _json["sgpr_count"].get(); + ks.arch_vgpr_count = _json["arch_vgpr_count"].get(); + ks.accum_vgpr_count = _json["accum_vgpr_count"].get(); + return ks; +} +#endif + +nlohmann::json +to_json(const agent& _agent) +{ + nlohmann::json result; + result["type"] = _agent.type; + result["handle"] = _agent.handle; + result["device_id"] = _agent.device_id; + result["node_id"] = _agent.node_id; + result["logical_node_id"] = _agent.logical_node_id; + result["logical_node_type_id"] = _agent.logical_node_type_id; + result["name"] = _agent.name; + result["model_name"] = _agent.model_name; + result["vendor_name"] = _agent.vendor_name; + result["product_name"] = _agent.product_name; + result["device_type_index"] = _agent.device_type_index; + return result; +} + +std::shared_ptr +from_json_agent(const nlohmann::json& _json) +{ + auto a = std::make_shared(); + a->type = _json["type"].get(); + a->handle = _json["handle"].get(); + a->device_id = _json["device_id"].get(); + a->node_id = _json["node_id"].get(); + a->logical_node_id = _json["logical_node_id"].get(); + a->logical_node_type_id = _json["logical_node_type_id"].get(); + a->name = _json["name"].get(); + a->model_name = _json["model_name"].get(); + a->vendor_name = _json["vendor_name"].get(); + a->product_name = _json["product_name"].get(); + a->device_type_index = _json["device_type_index"].get(); + return a; +} + +nlohmann::json +to_json(const metadata_registry& _registry, + const std::vector>& _agents) +{ + nlohmann::json result; + + auto process_info = _registry.get_process_info(); + result["process"] = to_json(process_info); + + auto pmc_list = _registry.get_pmc_info_list(); + nlohmann::json pmc_array = nlohmann::json::array(); + for(const auto& pmc : pmc_list) + { + pmc_array.push_back(to_json(pmc)); + } + result["pmc_infos"] = pmc_array; + + auto thread_list = _registry.get_thread_info_list(); + nlohmann::json thread_array = nlohmann::json::array(); + for(const auto& thread : thread_list) + { + thread_array.push_back(to_json(thread)); + } + result["threads"] = thread_array; + + auto track_list = _registry.get_track_info_list(); + nlohmann::json track_array = nlohmann::json::array(); + for(const auto& track : track_list) + { + track_array.push_back(to_json(track)); + } + result["tracks"] = track_array; + + auto queue_list = _registry.get_queue_list(); + for(const auto& queue : queue_list) + { + result["queues"].push_back(static_cast(queue)); + } + + auto stream_list = _registry.get_stream_list(); + for(const auto& stream : stream_list) + { + result["streams"].push_back(static_cast(stream)); + } + + auto string_list = _registry.get_string_list(); + for(const auto& str : string_list) + { + result["strings"].push_back(str); + } + +#if ROCPROFSYS_USE_ROCM + auto code_object_list = _registry.get_code_object_list(); + nlohmann::json code_object_array = nlohmann::json::array(); + for(const auto& code_object : code_object_list) + { + code_object_array.push_back(to_json(code_object)); + } + result["code_objects"] = code_object_array; + + auto kernel_symbol_list = _registry.get_kernel_symbol_list(); + nlohmann::json kernel_symbol_array = nlohmann::json::array(); + for(const auto& kernel_symbol : kernel_symbol_list) + { + kernel_symbol_array.push_back(to_json(kernel_symbol)); + } + result["kernel_symbols"] = kernel_symbol_array; +#endif + + for(const auto& agent : _agents) + { + if(agent == nullptr) + { + continue; + } + result["agents"].push_back(to_json(*agent)); + } + + return result; +} + +void +from_json(metadata_registry& _registry, std::vector>& _agents, + const nlohmann::json& _json) +{ + const auto& process_json = _json["process"]; + auto process = from_json_process(process_json); + _registry.set_process(process); + + const auto& pmc_array = _json["pmc_infos"]; + for(const auto& pmc_json : pmc_array) + { + auto pmc = from_json_pmc(pmc_json); + _registry.add_pmc_info(pmc); + } + + const auto& thread_array = _json["threads"]; + for(const auto& thread_json : thread_array) + { + auto thread = from_json_thread(thread_json); + _registry.add_thread_info(thread); + } + + const auto& track_array = _json["tracks"]; + for(const auto& track_json : track_array) + { + auto track = from_json_track(track_json); + _registry.add_track(track); + } + + const auto& queue_array = _json["queues"]; + for(const auto& queue_json : queue_array) + { + auto handle = queue_json.get(); + _registry.add_queue(static_cast(handle)); + } + + const auto& stream_array = _json["streams"]; + for(const auto& stream_json : stream_array) + { + auto handle = stream_json.get(); + _registry.add_stream(static_cast(handle)); + } + + const auto& string_array = _json["strings"]; + for(const auto& string_json : string_array) + { + auto str = string_json.get(); + _registry.add_string(str); + } + +#if ROCPROFSYS_USE_ROCM + if(_json.contains("code_objects")) + { + const auto& code_object_array = _json["code_objects"]; + for(const auto& code_object_json : code_object_array) + { + auto code_object = from_json_code_object(code_object_json); + _registry.add_code_object(code_object); + } + } + + if(_json.contains("kernel_symbols")) + { + const auto& kernel_symbol_array = _json["kernel_symbols"]; + for(const auto& kernel_symbol_json : kernel_symbol_array) + { + auto kernel_symbol = from_json_kernel_symbol(kernel_symbol_json); + _registry.add_kernel_symbol(kernel_symbol); + } + } +#endif + + if(!_agents.empty()) + { + ROCPROFSYS_WARNING(0, "Given agents vector is not empty. Clearing it.."); + _agents.clear(); + } + + if(_json.contains("agents")) + { + const auto& agents_array = _json["agents"]; + for(const auto& agent_json : agents_array) + { + _agents.push_back(from_json_agent(agent_json)); + } + } +} + } // namespace void @@ -279,8 +706,8 @@ metadata_registry::get_kernel_symbol_list() const return result; } -// As the underlying implementation of callback_name_info_t resizes the category storage -// during emplace, this special method is required +// As the underlying implementation of callback_name_info_t resizes the category +// storage during emplace, this special method is required void metadata_registry::overwrite_callback_names( std::initializer_list< @@ -404,5 +831,53 @@ metadata_registry::metadata_registry() #endif } +bool +metadata_registry::save_to_file(const std::string& filepath, + const std::vector>& _agents) const +{ + try + { + auto json = to_json(*this, _agents); + auto json_string = json.dump(); + + std::ofstream file(filepath); + if(!file.is_open()) + { + return false; + } + + file << json_string; + file.close(); + return true; + } catch(const std::exception& e) + { + return false; + } +} + +bool +metadata_registry::load_from_file(const std::string& filepath, + std::vector>& _agents) +{ + try + { + std::ifstream file(filepath); + if(!file.is_open()) + { + return false; + } + + nlohmann::json json; + file >> json; + file.close(); + + rocprofsys::trace_cache::from_json(*this, _agents, json); + return true; + } catch(const std::exception& e) + { + return false; + } +} + } // namespace trace_cache } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp index 499f4fff28..54927872a6 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #if ROCPROFSYS_USE_ROCM > 0 # include @@ -56,6 +57,8 @@ struct process pid_t pid; // < Unique pid_t ppid; std::string command; + uint32_t start; + uint32_t end; }; template @@ -197,6 +200,11 @@ struct metadata_registry std::vector get_stream_list() const; std::vector get_string_list() const; + bool save_to_file(const std::string& filepath, + const std::vector>& _agents) const; + bool load_from_file(const std::string& filepath, + std::vector>& _agents); + #if ROCPROFSYS_USE_ROCM > 0 void add_code_object( const rocprofiler_callback_tracing_code_object_load_data_t& code_object); diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp index ababb0d974..7cedbb7135 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp @@ -22,17 +22,18 @@ #include "trace_cache/rocpd_post_processing.hpp" #include "agent_manager.hpp" -#include "common.hpp" #include "config.hpp" #include "debug.hpp" #include "library/thread_info.hpp" #include "node_info.hpp" #include "rocpd/data_processor.hpp" +#include "rocpd/data_storage/database.hpp" #include "trace_cache/metadata_registry.hpp" #include "trace_cache/sample_type.hpp" #include "trace_cache/storage_parser.hpp" #include #include +#include #include #include #include @@ -49,11 +50,6 @@ namespace trace_cache { namespace { -rocpd::data_processor& -get_data_processor() -{ - return rocpd::data_processor::get_instance(); -} #if ROCPROFSYS_USE_ROCM > 0 auto @@ -69,6 +65,12 @@ get_handle_from_code_object( #endif } // namespace +std::shared_ptr +rocpd_post_processing::get_data_processor() const +{ + return m_data_processor; +} + postprocessing_callback rocpd_post_processing::get_kernel_dispatch_callback() const { @@ -76,17 +78,16 @@ rocpd_post_processing::get_kernel_dispatch_callback() const #if ROCPROFSYS_USE_ROCM > 0 auto _kds = static_cast(parsed); - auto& data_processor = get_data_processor(); - auto& agent_manager = agent_manager::get_instance(); + auto data_processor = get_data_processor(); auto& n_info = node_info::get_instance(); auto process = m_metadata.get_process_info(); auto agent_primary_key = - agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id; + m_agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id; auto thread_primary_key = - data_processor.map_thread_id_to_primary_key(_kds.thread_id); + data_processor->map_thread_id_to_primary_key(_kds.thread_id); - auto category_id = data_processor.insert_string( + auto category_id = data_processor->insert_string( trait::name::value); auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id); @@ -97,17 +98,17 @@ rocpd_post_processing::get_kernel_dispatch_callback() const return; } - auto region_name_primary_key = data_processor.insert_string( + auto region_name_primary_key = data_processor->insert_string( tim::demangle(kernel_symbol->kernel_name).c_str()); auto stack_id = _kds.correlation_id_internal; auto parent_stack_id = _kds.correlation_id_ancestor; auto correlation_id = 0; - auto event_id = data_processor.insert_event(category_id, stack_id, - parent_stack_id, correlation_id); + auto event_id = data_processor->insert_event(category_id, stack_id, + parent_stack_id, correlation_id); - data_processor.insert_kernel_dispatch( + data_processor->insert_kernel_dispatch( n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id, _kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle, _kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size, @@ -125,36 +126,35 @@ rocpd_post_processing::get_memory_copy_callback() const #if ROCPROFSYS_USE_ROCM > 0 auto _mcs = static_cast(parsed); - auto& data_processor = get_data_processor(); - auto& agent_manager = agent_manager::get_instance(); + auto data_processor = get_data_processor(); auto& n_info = node_info::get_instance(); auto process = m_metadata.get_process_info(); auto _name = std::string{ m_metadata.get_buffer_name_info().at( static_cast(_mcs.kind), static_cast(_mcs.operation)) }; - auto name_primary_key = data_processor.insert_string(_name.c_str()); + auto name_primary_key = data_processor->insert_string(_name.c_str()); auto category_primary_key = - data_processor.insert_string(trait::name::value); + data_processor->insert_string(trait::name::value); auto thread_primary_key = - data_processor.map_thread_id_to_primary_key(_mcs.thread_id); + data_processor->map_thread_id_to_primary_key(_mcs.thread_id); auto dst_agent_primary_key = - agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id; + m_agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id; auto src_agent_primary_key = - agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id; + m_agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id; auto stack_id = _mcs.correlation_id_internal; auto parent_stack_id = _mcs.correlation_id_ancestor; auto correlation_id = 0; auto queue_id = 0; - auto event_primary_key = data_processor.insert_event( + auto event_primary_key = data_processor->insert_event( category_primary_key, stack_id, parent_stack_id, correlation_id); - data_processor.insert_memory_copy( + data_processor->insert_memory_copy( n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp, _mcs.end_timestamp, name_primary_key, dst_agent_primary_key, _mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value, @@ -213,12 +213,11 @@ rocpd_post_processing::get_memory_allocate_callback() const return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { # if ROCPROFSYS_USE_ROCM > 0 auto _mas = static_cast(parsed); - auto& data_processor = get_data_processor(); - auto& agent_manager = agent_manager::get_instance(); + auto data_processor = get_data_processor(); auto& n_info = node_info::get_instance(); auto process = m_metadata.get_process_info(); auto thread_primary_key = - data_processor.map_thread_id_to_primary_key(_mas.thread_id); + data_processor->map_thread_id_to_primary_key(_mas.thread_id); auto agent_primary_key = std::optional{}; const auto invalid_context = ROCPROFILER_CONTEXT_NONE; @@ -226,7 +225,7 @@ rocpd_post_processing::get_memory_allocate_callback() const { { agent_primary_key = - agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id; + m_agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id; } const auto* _name = m_metadata.get_buffer_name_info().at( static_cast(_mas.kind), @@ -239,13 +238,13 @@ rocpd_post_processing::get_memory_allocate_callback() const auto correlation_id = 0; auto queue_id = 0; - auto category_primary_key = data_processor.insert_string( + auto category_primary_key = data_processor->insert_string( trait::name::value); - auto event_primary_key = data_processor.insert_event( + auto event_primary_key = data_processor->insert_event( category_primary_key, stack_id, parent_stack_id, correlation_id); - data_processor.insert_memory_alloc( + data_processor->insert_memory_alloc( n_info.id, process.pid, thread_primary_key, agent_primary_key, type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp, _mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle, @@ -306,73 +305,100 @@ rocpd_post_processing::get_region_callback() const return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { #if ROCPROFSYS_USE_ROCM > 0 auto _rs = static_cast(parsed); - auto& data_processor = get_data_processor(); + auto data_processor = get_data_processor(); auto& n_info = node_info::get_instance(); auto process = m_metadata.get_process_info(); auto thread_primary_key = - data_processor.map_thread_id_to_primary_key(_rs.thread_id); + data_processor->map_thread_id_to_primary_key(_rs.thread_id); - auto name_primary_key = data_processor.insert_string(_rs.name.c_str()); - auto category_primary_key = data_processor.insert_string(_rs.category.c_str()); + auto name_primary_key = data_processor->insert_string(_rs.name.c_str()); + auto category_primary_key = data_processor->insert_string(_rs.category.c_str()); size_t stack_id = _rs.correlation_id_internal; size_t parent_stack_id = _rs.correlation_id_ancestor; size_t correlation_id = 0; auto event_primary_key = - data_processor.insert_event(category_primary_key, stack_id, parent_stack_id, - correlation_id, _rs.call_stack.c_str()); + data_processor->insert_event(category_primary_key, stack_id, parent_stack_id, + correlation_id, _rs.call_stack.c_str()); auto args = parse_args(_rs.args_str); for(const auto& arg : args) { - data_processor.insert_args(event_primary_key, arg.arg_number, - arg.arg_type.c_str(), arg.arg_name.c_str(), - arg.arg_value.c_str()); + data_processor->insert_args(event_primary_key, arg.arg_number, + arg.arg_type.c_str(), arg.arg_name.c_str(), + arg.arg_value.c_str()); } - data_processor.insert_region(n_info.id, process.pid, thread_primary_key, - _rs.start_timestamp, _rs.end_timestamp, - name_primary_key, event_primary_key); + data_processor->insert_region(n_info.id, process.pid, thread_primary_key, + _rs.start_timestamp, _rs.end_timestamp, + name_primary_key, event_primary_key); #endif }; } +postprocessing_callback +rocpd_post_processing::get_backtrace_sample_callback() const +{ + return [&](const storage_parsed_type_base& parsed) { + auto _bts = static_cast(parsed); + auto data_processor = get_data_processor(); + auto& n_info = node_info::get_instance(); + auto process = m_metadata.get_process_info(); + auto thread_primary_key = + data_processor->map_thread_id_to_primary_key(_bts.thread_id); + auto name_primary_key = data_processor->insert_string(_bts.name.c_str()); + auto category_primary_key = data_processor->insert_string(_bts.category.c_str()); + + auto event_primary_key = data_processor->insert_event( + category_primary_key, 0, 0, 0, _bts.call_stack.c_str(), + _bts.line_info.c_str(), _bts.extdata.c_str()); + + data_processor->insert_region(n_info.id, process.pid, thread_primary_key, + _bts.start_timestamp, _bts.end_timestamp, + name_primary_key, event_primary_key); + data_processor->insert_sample(_bts.track_name.c_str(), _bts.start_timestamp, + event_primary_key); + }; +} + postprocessing_callback rocpd_post_processing::get_in_time_sample_callback() const { return [&](const storage_parsed_type_base& parsed) { - auto _its = static_cast(parsed); - auto& data_processor = get_data_processor(); - auto track_primary_key = data_processor.insert_string(_its.track_name.c_str()); + auto _its = static_cast(parsed); + auto data_processor = get_data_processor(); + auto track_primary_key = data_processor->insert_string(_its.track_name.c_str()); - auto event_id = data_processor.insert_event( + auto event_id = data_processor->insert_event( track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id, _its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str()); - data_processor.insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id, - "{}"); + data_processor->insert_sample(_its.track_name.c_str(), _its.timestamp_ns, + event_id, "{}"); }; } postprocessing_callback rocpd_post_processing::get_pmc_event_with_sample_callback() const { return [&](const storage_parsed_type_base& parsed) { - auto _pmc = static_cast(parsed); - auto& data_processor = get_data_processor(); - auto track_primary_key = data_processor.insert_string(_pmc.track_name.c_str()); + auto _pmc = static_cast(parsed); + auto data_processor = get_data_processor(); + auto track_primary_key = data_processor->insert_string(_pmc.track_name.c_str()); - auto& agent_manager = agent_manager::get_instance(); - auto agent_primary_key = - agent_manager.get_agent_by_handle(_pmc.agent_handle).base_id; + auto agent_primary_key = + m_agent_manager + .get_agent_by_id(_pmc.device_id, + static_cast(_pmc.device_type)) + .base_id; - auto event_id = data_processor.insert_event( + auto event_id = data_processor->insert_event( track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id, _pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str()); - data_processor.insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id, - "{}"); + data_processor->insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, + event_id, "{}"); - data_processor.insert_pmc_event(event_id, agent_primary_key, - _pmc.pmc_info_name.c_str(), _pmc.value); + data_processor->insert_pmc_event(event_id, agent_primary_key, + _pmc.pmc_info_name.c_str(), _pmc.value); }; } @@ -450,22 +476,21 @@ rocpd_post_processing::get_amd_smi_sample_callback() const return [&](const storage_parsed_type_base& parsed) { auto _amd_smi = static_cast(parsed); - auto& data_processor = get_data_processor(); + auto data_processor = get_data_processor(); const auto* _name = trait::name::value; - auto name_primary_key = data_processor.insert_string(_name); - auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0); + auto name_primary_key = data_processor->insert_string(_name); + auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0); - auto& _agent_manager = agent_manager::get_instance(); - auto base_id = - _agent_manager.get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU) + auto base_id = + m_agent_manager.get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU) .base_id; auto insert_event_and_sample = [&](bool enabled, const char* pmc_name, const char* track_name, double value) { if(!enabled) return; - data_processor.insert_pmc_event(event_id, base_id, pmc_name, value); - data_processor.insert_sample(track_name, _amd_smi.timestamp, event_id); + data_processor->insert_pmc_event(event_id, base_id, pmc_name, value); + data_processor->insert_sample(track_name, _amd_smi.timestamp, event_id); }; using pos = trace_cache::amd_smi_sample::settings_positions; @@ -591,20 +616,19 @@ rocpd_post_processing::get_cpu_freq_sample_callback() const return [&](const storage_parsed_type_base& parsed) { auto _cpu_freq_sample = static_cast(parsed); - auto& data_processor = get_data_processor(); + auto data_processor = get_data_processor(); const auto* _name = trait::name::value; - auto name_primary_key = data_processor.insert_string(_name); - auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0); + auto name_primary_key = data_processor->insert_string(_name); + auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0); auto device_id = 0; - auto& agent_mngr = agent_manager::get_instance(); - auto base_id = - agent_mngr.get_agent_by_type_index(device_id, agent_type::CPU).base_id; + auto base_id = + m_agent_manager.get_agent_by_type_index(device_id, agent_type::CPU).base_id; auto insert_event_and_sample = [&](const char* name, double value) { - data_processor.insert_pmc_event(event_id, base_id, name, value); - data_processor.insert_sample(name, _cpu_freq_sample.timestamp, event_id); + data_processor->insert_pmc_event(event_id, base_id, name, value); + data_processor->insert_sample(name, _cpu_freq_sample.timestamp, event_id); }; insert_event_and_sample(trait::name::value, @@ -635,8 +659,12 @@ rocpd_post_processing::get_cpu_freq_sample_callback() const }; } -rocpd_post_processing::rocpd_post_processing(metadata_registry& md) +rocpd_post_processing::rocpd_post_processing(metadata_registry& md, + agent_manager& agent_mngr, int pid, int ppid) : m_metadata(md) +, m_agent_manager(agent_mngr) +, m_data_processor(std::make_shared( + std::make_shared(pid, ppid))) {} void @@ -663,7 +691,17 @@ rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser& get_amd_smi_sample_callback()); parser.register_type_callback(entry_type::cpu_freq_sample, get_cpu_freq_sample_callback()); - ROCPROFSYS_DEBUG("Buffer parser callbacks are registered.."); + parser.register_type_callback(entry_type::backtrace_region_sample, + get_backtrace_sample_callback()); + ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..\n"); + + parser.register_on_finished_callback( + std::make_unique>([this]() { + if(m_data_processor != nullptr) + { + m_data_processor->flush(); + } + })); #endif } @@ -675,25 +713,25 @@ rocpd_post_processing::post_process_metadata() { return; } - ROCPROFSYS_DEBUG("Post processing metadata.."); - auto& data_processor = get_data_processor(); - auto& agent_mngr = agent_manager::get_instance(); - auto n_info = node_info::get_instance(); + ROCPROFSYS_DEBUG("Post processing metadata..\n"); + auto data_processor = get_data_processor(); + auto n_info = node_info::get_instance(); - data_processor.insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(), - n_info.system_name.c_str(), n_info.node_name.c_str(), - n_info.release.c_str(), n_info.version.c_str(), - n_info.machine.c_str(), n_info.domain_name.c_str()); + data_processor->insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(), + n_info.system_name.c_str(), n_info.node_name.c_str(), + n_info.release.c_str(), n_info.version.c_str(), + n_info.machine.c_str(), n_info.domain_name.c_str()); auto process_info = m_metadata.get_process_info(); - data_processor.insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0, - 0, 0, 0, process_info.command.c_str(), "{}"); + data_processor->insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0, + 0, process_info.start, process_info.end, + process_info.command.c_str(), "{}"); - const auto& agents = agent_mngr.get_agents(); + const auto& agents = m_agent_manager.get_agents(); int counter = 0; for(const auto& rocpd_agent : agents) { - auto _base_id = rocpd::data_processor::get_instance().insert_agent( + auto _base_id = data_processor->insert_agent( n_info.id, process_info.pid, ((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++, rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id, @@ -705,7 +743,7 @@ rocpd_post_processing::post_process_metadata() auto _string_list = m_metadata.get_string_list(); for(auto& _string : _string_list) { - data_processor.insert_string(std::string(_string).c_str()); + data_processor->insert_string(std::string(_string).c_str()); } auto _thread_info_list = m_metadata.get_thread_info_list(); @@ -719,18 +757,18 @@ rocpd_post_processing::post_process_metadata() { auto thread_id = track.thread_id.has_value() - ? std::make_optional(data_processor.map_thread_id_to_primary_key( + ? std::make_optional(data_processor->map_thread_id_to_primary_key( track.thread_id.value())) : std::nullopt; - data_processor.insert_track(track.track_name.c_str(), n_info.id, process_info.pid, - thread_id); + data_processor->insert_track(track.track_name.c_str(), n_info.id, + process_info.pid, thread_id); } auto _code_object_list = m_metadata.get_code_object_list(); for(const auto& code_object : _code_object_list) { auto dev_id = - agent_mngr.get_agent_by_handle(get_handle_from_code_object(code_object)) + m_agent_manager.get_agent_by_handle(get_handle_from_code_object(code_object)) .base_id; const char* strg_type = "UNKNOWN"; @@ -740,17 +778,17 @@ rocpd_post_processing::post_process_metadata() case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break; default: break; } - data_processor.insert_code_object(code_object.code_object_id, n_info.id, - process_info.pid, dev_id, code_object.uri, - code_object.load_base, code_object.load_size, - code_object.load_delta, strg_type); + data_processor->insert_code_object(code_object.code_object_id, n_info.id, + process_info.pid, dev_id, code_object.uri, + code_object.load_base, code_object.load_size, + code_object.load_delta, strg_type); } auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list(); for(const auto& kernel_symbol : _kernel_symbols_list) { auto kernel_name = tim::demangle(kernel_symbol.kernel_name); - data_processor.insert_kernel_symbol( + data_processor->insert_kernel_symbol( kernel_symbol.kernel_id, n_info.id, process_info.pid, kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(), kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size, @@ -758,7 +796,7 @@ rocpd_post_processing::post_process_metadata() kernel_symbol.private_segment_size, kernel_symbol.sgpr_count, kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count); - data_processor.insert_string(kernel_name.c_str()); + data_processor->insert_string(kernel_name.c_str()); } auto _queue_list = m_metadata.get_queue_list(); @@ -766,8 +804,8 @@ rocpd_post_processing::post_process_metadata() { std::stringstream ss; ss << "Queue " << queue_handle; - data_processor.insert_queue_info(queue_handle, n_info.id, process_info.pid, - ss.str().c_str()); + data_processor->insert_queue_info(queue_handle, n_info.id, process_info.pid, + ss.str().c_str()); } auto _stream_list = m_metadata.get_stream_list(); @@ -775,8 +813,8 @@ rocpd_post_processing::post_process_metadata() { std::stringstream ss; ss << "Stream " << stream_handle; - data_processor.insert_stream_info(stream_handle, n_info.id, process_info.pid, - ss.str().c_str()); + data_processor->insert_stream_info(stream_handle, n_info.id, process_info.pid, + ss.str().c_str()); } auto buffer_info_list = m_metadata.get_buffer_name_info(); @@ -784,7 +822,7 @@ rocpd_post_processing::post_process_metadata() { for(const auto& item : buffer_info.items()) { - data_processor.insert_string(*item.second); + data_processor->insert_string(*item.second); } } @@ -793,7 +831,7 @@ rocpd_post_processing::post_process_metadata() { for(const auto& item : cb_info.items()) { - data_processor.insert_string(*item.second); + data_processor->insert_string(*item.second); } } @@ -801,10 +839,11 @@ rocpd_post_processing::post_process_metadata() for(const auto& pmc_info : pmc_info_list) { const auto agent_primary_key = - agent_mngr.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type) + m_agent_manager + .get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type) .base_id; - data_processor.insert_pmc_description( + data_processor->insert_pmc_description( n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(), pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(), pmc_info.symbol.c_str(), pmc_info.description.c_str(), @@ -820,7 +859,7 @@ rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info, const node_info& n_info, const info::process& process_info) const { - const auto& extended_info = thread_info::get(t_info.thread_id, SequentTID); + const auto& extended_info = thread_info::get(t_info.thread_id, SystemTID); if(extended_info.has_value()) { t_info.start = extended_info->get_start(); @@ -829,9 +868,9 @@ rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info, std::stringstream ss; ss << "Thread " << t_info.thread_id; - get_data_processor().insert_thread_info(n_info.id, process_info.ppid, - process_info.pid, t_info.thread_id, - ss.str().c_str(), t_info.start, t_info.end); + get_data_processor()->insert_thread_info(n_info.id, process_info.ppid, + process_info.pid, t_info.thread_id, + ss.str().c_str(), t_info.start, t_info.end); } } // namespace trace_cache diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp index b968697c9a..91331a0ff5 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp @@ -21,7 +21,9 @@ // SOFTWARE. #pragma once +#include "agent_manager.hpp" #include "core/node_info.hpp" +#include "core/rocpd/data_processor.hpp" #include "core/trace_cache/metadata_registry.hpp" #include "core/trace_cache/storage_parser.hpp" @@ -33,11 +35,14 @@ namespace trace_cache class rocpd_post_processing { public: - rocpd_post_processing(metadata_registry& metadata); + rocpd_post_processing(metadata_registry& metadata, agent_manager& agent_mngr, int pid, + int ppid); void register_parser_callback(storage_parser& parser); void post_process_metadata(); + std::shared_ptr get_data_processor() const; + private: using primary_key = size_t; @@ -54,8 +59,11 @@ private: postprocessing_callback get_pmc_event_with_sample_callback() const; postprocessing_callback get_amd_smi_sample_callback() const; postprocessing_callback get_cpu_freq_sample_callback() const; + postprocessing_callback get_backtrace_sample_callback() const; - metadata_registry& m_metadata; + metadata_registry& m_metadata; + agent_manager& m_agent_manager; + std::shared_ptr m_data_processor; }; } // namespace trace_cache diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp index fa4fa7c582..855e0cedbd 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #if ROCPROFSYS_USE_ROCM > 0 @@ -149,15 +148,12 @@ struct region_sample : storage_parsed_type_base uint64_t thread_id; std::string name; - // Correlation fields uint64_t correlation_id_internal; uint64_t correlation_id_ancestor; - // Timing fields uint64_t start_timestamp; uint64_t end_timestamp; - // Additional fields std::string call_stack; std::string args_str; std::string category; @@ -177,9 +173,10 @@ struct in_time_sample : storage_parsed_type_base struct pmc_event_with_sample : in_time_sample { - size_t agent_handle; + uint32_t device_id; + uint8_t device_type; std::string pmc_info_name; - size_t value; + double value; }; struct amd_smi_sample : storage_parsed_type_base @@ -219,6 +216,40 @@ struct cpu_freq_sample : storage_parsed_type_base std::vector freqs; }; +struct backtrace_region_sample : storage_parsed_type_base +{ + backtrace_region_sample() = default; + backtrace_region_sample(uint32_t _type, uint64_t _thread_id, std::string _track_name, + std::string _name, uint64_t _start_timestamp, + uint64_t _end_timestamp, std::string _category, + std::string _call_stack, std::string _line_info, + std::string _extdata) + : type(_type) + , thread_id(_thread_id) + , track_name(std::move(_track_name)) + , name(std::move(_name)) + , start_timestamp(_start_timestamp) + , end_timestamp(_end_timestamp) + , category(std::move(_category)) + , call_stack(std::move(_call_stack)) + , line_info(std::move(_line_info)) + , extdata(std::move(_extdata)) + {} + + uint32_t type; + uint64_t thread_id; + std::string track_name; + std::string name; + + uint64_t start_timestamp; + uint64_t end_timestamp; + + std::string category; + std::string call_stack; + std::string line_info; + std::string extdata; +}; + enum class entry_type : uint32_t { in_time_sample = 0x0000, @@ -229,9 +260,10 @@ enum class entry_type : uint32_t #if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600) memory_alloc = 0x0005, #endif - amd_smi_sample = 0x0006, - cpu_freq_sample = 0x0007, - fragmented_space = 0xFFFF + amd_smi_sample = 0x0006, + cpu_freq_sample = 0x0007, + backtrace_region_sample = 0x0008, + fragmented_space = 0xFFFF }; } // namespace trace_cache } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp index 577d5818cc..9c388a7e22 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include @@ -34,8 +36,8 @@ namespace rocprofsys namespace trace_cache { -storage_parser::storage_parser(pid_t _pid) -: m_pid(_pid) +storage_parser::storage_parser(std::string _filename) +: m_filename(std::move(_filename)) {} void @@ -46,22 +48,24 @@ storage_parser::register_type_callback( m_callbacks[type].push_back(callback); } +void +storage_parser::register_on_finished_callback( + std::unique_ptr> callback) +{ + m_on_finished_callback = std::move(callback); +} + void storage_parser::consume_storage() { - ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s", filename.c_str()); - if(m_pid != getpid()) - { - ROCPROFSYS_DEBUG( - "Storage parser is not created in same process as shutting down.."); - return; - } + ROCPROFSYS_VERBOSE(0, "Consuming buffered storage with filename: %s\n", + m_filename.c_str()); - std::ifstream ifs(filename, std::ios::binary); + std::ifstream ifs(m_filename, std::ios::binary); if(!ifs) { std::stringstream ss; - ss << "Error opening file for reading: " << filename << "\n"; + ss << "Error opening file for reading: " << m_filename << "\n"; throw std::runtime_error(ss.str()); } @@ -90,10 +94,10 @@ storage_parser::consume_storage() if(ifs.bad()) { - ROCPROFSYS_WARNING( - 1, - "Bad read while consuming buffered storage. Filename: %s. Bytes read: %d", - filename.c_str(), static_cast(ifs.tellg())); + ROCPROFSYS_WARNING(1, + "Bad read while consuming buffered storage. Filename: %s. " + "Bytes read: %d\n", + m_filename.c_str(), static_cast(ifs.tellg())); continue; } @@ -196,7 +200,7 @@ storage_parser::consume_storage() _pmc_event_with_sample.parent_stack_id, _pmc_event_with_sample.correlation_id, _pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info, - _pmc_event_with_sample.agent_handle, + _pmc_event_with_sample.device_id, _pmc_event_with_sample.device_type, _pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value); invoke_callbacks(header.type, _pmc_event_with_sample); break; @@ -225,14 +229,33 @@ storage_parser::consume_storage() invoke_callbacks(header.type, _cpu_freq_sample); break; } + case entry_type::backtrace_region_sample: + { + backtrace_region_sample _backtrace_region_sample; + parse_data( + sample.data(), _backtrace_region_sample.type, + _backtrace_region_sample.thread_id, + _backtrace_region_sample.track_name, _backtrace_region_sample.name, + _backtrace_region_sample.start_timestamp, + _backtrace_region_sample.end_timestamp, + _backtrace_region_sample.category, + _backtrace_region_sample.call_stack, + _backtrace_region_sample.line_info, _backtrace_region_sample.extdata); + invoke_callbacks(header.type, _backtrace_region_sample); + } default: break; } } ifs.close(); - ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system", - filename.c_str()); - std::remove(filename.c_str()); + ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system\n", + m_filename.c_str()); + std::remove(m_filename.c_str()); + + if(m_on_finished_callback != nullptr) + { + (*m_on_finished_callback)(); + } } void @@ -241,7 +264,7 @@ storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base auto _callback_list = m_callbacks.find(type); if(_callback_list == m_callbacks.end()) { - ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing"); + ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing\n"); return; } diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp index 7d569709e0..afc56c1e69 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp @@ -49,10 +49,12 @@ public: const postprocessing_callback& callback); void consume_storage(); + void register_on_finished_callback(std::unique_ptr> callback); private: friend class cache_manager; - storage_parser(pid_t _pid); + storage_parser(std::string _filename); + template static void process_arg(const uint8_t*& data_pos, T& arg) { @@ -82,10 +84,11 @@ private: (process_arg(data_pos, args), ...); } -private: - pid_t m_pid; - void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed); + void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed); + + std::string m_filename; std::map> m_callbacks; + std::unique_ptr> m_on_finished_callback{ nullptr }; }; } // namespace trace_cache diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp index 64d5f6bdf7..395aee22a2 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp @@ -45,6 +45,8 @@ #include "core/rocpd/data_processor.hpp" #include "core/timemory.hpp" #include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/cache_utility.hpp" +#include "core/trace_cache/metadata_registry.hpp" #include "core/utility.hpp" #include "library/causal/data.hpp" #include "library/causal/experiment.hpp" @@ -117,6 +119,22 @@ namespace auto _timemory_manager = tim::manager::instance(); auto _timemory_settings = tim::settings::shared_instance(); +void +set_metadata_process_start_timestamp(int64_t _ts) +{ + auto process_info = trace_cache::get_metadata_registry().get_process_info(); + process_info.start = _ts; + trace_cache::get_metadata_registry().set_process(process_info); +} + +void +set_metadata_process_end_timestamp(int64_t _ts) +{ + auto process_info = trace_cache::get_metadata_registry().get_process_info(); + process_info.end = _ts; + trace_cache::get_metadata_registry().set_process(process_info); +} + bool ensure_initialization(bool _offset, int64_t _glob_n, int64_t _offset_n) { @@ -559,6 +577,8 @@ rocprofsys_init_tooling_hidden(void) get_main_bundle()->start(); ROCPROFSYS_DEBUG_F("State: %s -> State::Active\n", std::to_string(get_state()).c_str()); + + trace_cache::get_buffer_storage().start_flushing_thread(getpid()); set_state(State::Active); // set to active as very last operation } }; @@ -701,6 +721,8 @@ rocprofsys_init_hidden(const char* _mode, bool _is_binary_rewrite, const char* _ if(get_state() == State::Active) rocprofsys_finalize_hidden(); }); + set_metadata_process_start_timestamp(comp::wall_clock::record()); + ROCPROFSYS_CONDITIONAL_BASIC_PRINT_F( get_debug_env() || get_verbose_env() > 2, "mode: %s | is binary rewrite: %s | command: %s\n", _mode, @@ -757,8 +779,13 @@ rocprofsys_finalize_hidden(void) std::to_string(get_state()).c_str()); return; } - else if(_is_child) + + set_metadata_process_end_timestamp(comp::wall_clock::record()); + + if(_is_child) { + set_state(State::Finalized); + #if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 // Flush buffered traces in case of child process if(get_use_rocm()) @@ -767,17 +794,13 @@ rocprofsys_finalize_hidden(void) rocprofiler_sdk::shutdown(); } #endif - auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance(); + auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance(); + const auto _agents = get_agent_manager_instance().get_agents(); _manager.shutdown(); - _manager.post_process(); + const auto metadata_filepath = + trace_cache::get_metadata_filepath(get_root_process_id(), getpid()); + _manager.get_metadata_registry().save_to_file(metadata_filepath, _agents); -#if ROCPROFSYS_USE_ROCM > 0 - if(get_use_rocpd()) - { - rocpd::data_processor::get_instance().flush(); - } -#endif - set_state(State::Finalized); std::quick_exit(EXIT_SUCCESS); return; } @@ -871,12 +894,6 @@ rocprofsys_finalize_hidden(void) } #endif - { - auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance(); - _manager.shutdown(); - _manager.post_process(); - } - ROCPROFSYS_DEBUG_F("Stopping and destroying instrumentation bundles...\n"); for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) { @@ -1013,6 +1030,12 @@ rocprofsys_finalize_hidden(void) _perfetto_output_error); } + { + auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance(); + _manager.shutdown(); + _manager.post_process_bulk(); + } + if(_timemory_manager && _timemory_manager != nullptr) { _timemory_manager->add_metadata([](auto& ar) { @@ -1067,12 +1090,6 @@ rocprofsys_finalize_hidden(void) [](int) {}); common::destroy_static_objects(); -#if ROCPROFSYS_USE_ROCM > 0 - if(get_use_rocpd()) - { - rocpd::data_processor::get_instance().flush(); - } -#endif } //======================================================================================// diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp index 74477c42f6..90ce90198a 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp @@ -21,21 +21,21 @@ // SOFTWARE. #include "library/components/backtrace_metrics.hpp" -#include "core/agent.hpp" -#include "core/agent_manager.hpp" #include "core/common.hpp" #include "core/components/fwd.hpp" #include "core/config.hpp" #include "core/debug.hpp" -#include "core/node_info.hpp" #include "core/perfetto.hpp" -#include "core/rocpd/data_processor.hpp" +#include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/cache_utility.hpp" +#include "core/trace_cache/metadata_registry.hpp" #include "library/components/ensure_storage.hpp" #include "library/ptl.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" #include "library/tracing.hpp" +#include #include #include #include @@ -142,12 +142,6 @@ backtrace_metrics::get_hw_counter_labels(int64_t _tid) return (_v) ? *_v : std::vector{}; } -rocpd::data_processor& -get_data_processor() -{ - return rocpd::data_processor::get_instance(); -} - void backtrace_metrics::start() {} @@ -170,38 +164,31 @@ get_enabled(tim::type_list) } void -rocpd_init_categories() +metadata_init_categories() { static bool _is_initialized = false; if(_is_initialized) return; - auto& data_processor = get_data_processor(); - - data_processor.insert_string(trait::name::value); - data_processor.insert_string(trait::name::value); - data_processor.insert_string(trait::name::value); - data_processor.insert_string(trait::name::value); - data_processor.insert_string(trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); _is_initialized = true; } template void -rocpd_init_tracks(int64_t _tid) +apply_for_all_thread_names(int64_t _tid, std::function _apply) { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - const auto& t_info = thread_info::get(_tid, SequentTID); - auto _tid_name = JOIN("", '[', _tid, ']'); - - auto thread_idx = data_processor.insert_thread_info( - n_info.id, getppid(), getpid(), t_info->index_data->system_value, - JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}"); - if constexpr(std::is_same_v) { - // Initialize hw_counter_tracks and create one track for each hardware counter auto _hw_cnt_labels = *get_papi_labels(_tid); for(auto& itr : _hw_cnt_labels) { @@ -209,97 +196,98 @@ rocpd_init_tracks(int64_t _tid) if(_desc.empty()) _desc = itr; ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str()); - std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); - data_processor.insert_track(track_name.c_str(), n_info.id, getpid(), - thread_idx, "{}"); + std::stringstream track_name_ss; + track_name_ss << "Thread " << _desc << " [" << _tid << "] (S)"; + _apply(track_name_ss.str()); } } else - data_processor.insert_track( - JOIN('_', trait::name::value, _tid_name).c_str(), n_info.id, - getpid(), thread_idx, "{}"); + { + std::stringstream track_name_ss; + track_name_ss << trait::name::value << " [" << _tid << "]"; + _apply(track_name_ss.str()); + } } template void -rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid) +metadata_init_tracks(int64_t _tid) { - auto& data_processor = get_data_processor(); - auto _tid_name = JOIN("", '[', _tid, ']'); + const auto& t_info = thread_info::get(_tid, SequentTID); + auto thread_id = static_cast(t_info->index_data->system_value); - size_t EVENT_CODE = 0; - size_t INSTANCE_ID = 0; - const char* LONG_DESCRIPTION = ""; - const char* COMPONENT = ""; - const char* BLOCK = ""; - const char* EXPRESSION = ""; - auto ni = node_info::get_instance(); - const auto TARGET_ARCH = "CPU"; + trace_cache::get_metadata_registry().add_thread_info( + { getppid(), getpid(), thread_id, static_cast(t_info->get_start()), + static_cast(t_info->get_stop()), "{}" }); - auto& agent_mngr = agent_manager::get_instance(); - auto base_id = agent_mngr.get_agent_by_id(dev_id, agent_type::CPU).base_id; + apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { + trace_cache::get_metadata_registry().add_track({ _track_name, thread_id, "{}" }); + }); +} - if constexpr(std::is_same_v) - { - auto _hw_cnt_labels = *get_papi_labels(_tid); - for(auto& itr : _hw_cnt_labels) - { - std::string _desc = tim::papi::get_event_info(itr).short_descr; - if(_desc.empty()) _desc = itr; - ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str()); +template +void +metadata_initialize_backtrace_metrics_pmc(size_t dev_id, const char* _units, int64_t _tid) +{ + constexpr size_t EVENT_CODE = 0; + constexpr size_t INSTANCE_ID = 0; + const char* LONG_DESCRIPTION = ""; + const char* COMPONENT = ""; + const char* BLOCK = ""; + const char* EXPRESSION = ""; + const char* TARGET_ARCH = "CPU"; - std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); - - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - track_name.c_str(), trait::name::value, - trait::name::description, LONG_DESCRIPTION, COMPONENT, units, - "ABS", BLOCK, EXPRESSION, 0, 0); - } - } - else - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - JOIN("_", trait::name::value, _tid_name).c_str(), - trait::name::value, trait::name::description, - LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0); + apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, _track_name, + trait::name::value, trait::name::description, + LONG_DESCRIPTION, COMPONENT, _units, trace_cache::ABSOLUTE, BLOCK, + EXPRESSION, 0, 0, "{}" }); + }); } template void -rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp, - Value value, int64_t _tid) +cache_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp_ns, + Value value, int64_t _tid) { - auto& data_processor = get_data_processor(); - auto _tid_name = JOIN("", '[', _tid, ']'); + auto _tid_name = JOIN("", '[', _tid, ']'); - auto string_primary_key = data_processor.insert_string(trait::name::value); - auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0); - auto& agent_mngr = agent_manager::get_instance(); - auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id; + size_t stack_id = 0; + size_t parent_stack_id = 0; + size_t correlation_id = 0; + const auto* event_metadata = ""; + const auto* call_stack = ""; + const auto* line_info = ""; - auto insert_event_and_sample = [&](const char* _name, double _value) { - data_processor.insert_pmc_event(event_id, base_id, _name, _value); - data_processor.insert_sample(_name, timestamp, event_id); + auto insert_event_and_sample = [&](const char* _track_name, double _value) { + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::pmc_event_with_sample, _track_name, timestamp_ns, + event_metadata, stack_id, parent_stack_id, correlation_id, call_stack, + line_info, device_id, static_cast(agent_type::CPU), _track_name, + _value); }; if constexpr(std::is_same_v) { - auto _hw_cnt_labels = *get_papi_labels(_tid); const auto& hw_counters = static_cast(value); - for(size_t i = 0; i < _hw_cnt_labels.size() && i < hw_counters.size(); ++i) - { - std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr; - if(_desc.empty()) _desc = _hw_cnt_labels[i]; - std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); - insert_event_and_sample(track_name.c_str(), hw_counters.at(i)); - } + size_t idx = 0; + apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { + if(idx < hw_counters.size()) + { + insert_event_and_sample(_track_name.c_str(), hw_counters.at(idx)); + } + ++idx; + }); } else - insert_event_and_sample( - JOIN("_", trait::name::value, _tid_name).c_str(), value); + { + apply_for_all_thread_names(_tid, [&](const std::string& _track_name) { + insert_event_and_sample(_track_name.c_str(), value); + }); + } } } // namespace @@ -473,84 +461,39 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid) } void -backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid) +backtrace_metrics::init_cache(int64_t _tid, valid_array_t _valid) { - rocpd_init_categories(); + metadata_init_categories(); if(get_valid(category::thread_cpu_time{}, _valid)) { - rocpd_init_tracks(_tid); - rocpd_initialize_backtrace_metrics_pmc(0, "sec", _tid); - } - if(get_valid(category::thread_peak_memory{}, _valid)) - { - rocpd_init_tracks(_tid); - rocpd_initialize_backtrace_metrics_pmc(0, "MB", + metadata_init_tracks(_tid); + metadata_initialize_backtrace_metrics_pmc(0, "sec", _tid); } - if(get_valid(category::thread_context_switch{}, _valid)) - { - rocpd_init_tracks(_tid); - rocpd_initialize_backtrace_metrics_pmc(0, "", - _tid); - } - if(get_valid(category::thread_page_fault{}, _valid)) - { - rocpd_init_tracks(_tid); - rocpd_initialize_backtrace_metrics_pmc(0, "", _tid); - } - if(get_valid(type_list{}, _valid) && - get_valid(category::thread_hardware_counter{}, _valid)) - { - rocpd_init_tracks(_tid); - rocpd_initialize_backtrace_metrics_pmc(0, "", - _tid); - } -} - -void -backtrace_metrics::fini_rocpd(int64_t _tid, valid_array_t _valid) -{ - const auto& _thread_info = thread_info::get(_tid, SequentTID); - - ROCPROFSYS_CI_THROW(!_thread_info, "Error! missing thread info for tid=%li\n", _tid); - if(!_thread_info) return; - - uint64_t _ts = _thread_info->get_stop(); - - if(get_valid(category::thread_cpu_time{}, _valid)) - { - rocpd_process_backtrace_metrics_events( - 0, _ts, 0, _tid); - } - if(get_valid(category::thread_peak_memory{}, _valid)) { - rocpd_process_backtrace_metrics_events( - 0, _ts, 0, _tid); + metadata_init_tracks(_tid); + metadata_initialize_backtrace_metrics_pmc(0, "MB", + _tid); } - if(get_valid(category::thread_context_switch{}, _valid)) { - rocpd_process_backtrace_metrics_events( - 0, _ts, 0, _tid); + metadata_init_tracks(_tid); + metadata_initialize_backtrace_metrics_pmc(0, "", + _tid); } - if(get_valid(category::thread_page_fault{}, _valid)) { - rocpd_process_backtrace_metrics_events( - 0, _ts, 0, _tid); + metadata_init_tracks(_tid); + metadata_initialize_backtrace_metrics_pmc(0, "", + _tid); } - if(get_valid(type_list{}, _valid) && get_valid(category::thread_hardware_counter{}, _valid)) { - auto _hw_cnt_labels = *get_papi_labels(_tid); - hw_counter_data_t zero_counters{}; - zero_counters.fill(0.0); - - rocpd_process_backtrace_metrics_events(0, _ts, zero_counters, - _tid); + metadata_init_tracks(_tid); + metadata_initialize_backtrace_metrics_pmc( + 0, "", _tid); } } @@ -636,41 +579,40 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const } void -backtrace_metrics::post_process_rocpd([[maybe_unused]] int64_t _tid, - [[maybe_unused]] uint64_t _ts) const +backtrace_metrics::cache_backtrace_data([[maybe_unused]] int64_t _tid, + [[maybe_unused]] uint64_t _ts) const { #if ROCPROFSYS_USE_ROCM > 0 auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); }; if(is_category_enabled(category::thread_cpu_time{})) { - rocpd_process_backtrace_metrics_events( + cache_backtrace_metrics_events( 0, _ts, m_cpu / units::sec, _tid); } if(is_category_enabled(category::thread_peak_memory{})) { - rocpd_process_backtrace_metrics_events( + cache_backtrace_metrics_events( 0, _ts, m_mem_peak / units::megabyte, _tid); } if(is_category_enabled(category::thread_context_switch{})) { - rocpd_process_backtrace_metrics_events( + cache_backtrace_metrics_events( 0, _ts, m_ctx_swch, _tid); } if(is_category_enabled(category::thread_page_fault{})) { - rocpd_process_backtrace_metrics_events( + cache_backtrace_metrics_events( 0, _ts, m_page_flt, _tid); } if(is_category_enabled(type_list{}) && is_category_enabled(category::thread_hardware_counter{})) { - rocpd_process_backtrace_metrics_events(0, _ts, m_hw_counter, - _tid); + cache_backtrace_metrics_events(0, _ts, m_hw_counter, _tid); } #endif } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.hpp index c7ff288059..912d4d75cb 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.hpp @@ -83,8 +83,7 @@ struct backtrace_metrics : comp::empty_base static void configure(bool, int64_t _tid = threading::get_id()); static void init_perfetto(int64_t _tid, valid_array_t); static void fini_perfetto(int64_t _tid, valid_array_t); - static void init_rocpd(int64_t _tid, valid_array_t); - static void fini_rocpd(int64_t _tid, valid_array_t); + static void init_cache(int64_t _tid, valid_array_t); static std::vector get_hw_counter_labels(int64_t); template @@ -115,7 +114,7 @@ struct backtrace_metrics : comp::empty_base const auto& get_hw_counters() const { return m_hw_counter; } void post_process_perfetto(int64_t _tid, uint64_t _ts) const; - void post_process_rocpd(int64_t _tid, uint64_t _ts) const; + void cache_backtrace_data(int64_t _tid, uint64_t _ts) const; backtrace_metrics& operator-=(const backtrace_metrics&); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp index 2d5d0bf432..7a1d1fe162 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp @@ -21,7 +21,6 @@ // SOFTWARE. #include "library/components/comm_data.hpp" -#include "core/agent_manager.hpp" #include "core/components/fwd.hpp" #include "core/config.hpp" #include "core/node_info.hpp" @@ -135,9 +134,6 @@ template void cache_comm_data_events(const uint32_t device_id, int bytes) { - auto& agents = agent_manager::get_instance(); - auto agent = agents.get_agent_by_type_index(device_id, agent_type::CPU); - static std::mutex _mutex{}; static uint64_t value = 0; uint64_t _now = 0; @@ -154,12 +150,13 @@ cache_comm_data_events(const uint32_t device_id, int bytes) const size_t correlation_id = 0; const std::string call_stack = "{}"; const std::string line_info = "{}"; - const size_t agent_handle = agent.handle; trace_cache::get_buffer_storage().store( trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns, event_metadata.c_str(), stack_id, parent_stack_id, correlation_id, - call_stack.c_str(), line_info.c_str(), agent_handle, track_name.c_str(), value); + call_stack.c_str(), line_info.c_str(), device_id, + static_cast(agent_type::CPU), track_name.c_str(), + static_cast(value)); } } // namespace diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp index ce242ffbca..923b38fd9d 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp @@ -244,7 +244,7 @@ config() // the agents seems to be assigned per device basis not per core. // TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU // implementation. - auto cpu_agents = agent_manager::get_instance().get_agents_by_type(agent_type::CPU); + auto cpu_agents = get_agent_manager_instance().get_agents_by_type(agent_type::CPU); for(auto& agent : cpu_agents) { metadata_initialize_cpu_freq_pmc(agent->device_id); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp index 23cb895356..5d9f2cad90 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp @@ -31,7 +31,6 @@ #include "core/defines.hpp" #include "core/node_info.hpp" #include "core/perfetto.hpp" -#include "core/rocpd/json.hpp" #include "core/trace_cache/cache_manager.hpp" #include "core/trace_cache/sample_type.hpp" #include "library/components/category_region.hpp" @@ -46,6 +45,8 @@ #include #include +#include + #include #include #include @@ -177,11 +178,11 @@ void cache_kokkos_event(const char* name, const char* event_type, const char* target, uint64_t timestamp_ns) { - auto event_metadata = rocpd::json::create(); + nlohmann::json event_metadata; - event_metadata->set("name", name); - event_metadata->set("event_type", event_type); - event_metadata->set("target", target); + event_metadata["name"] = name; + event_metadata["event_type"] = event_type; + event_metadata["target"] = target; const size_t stack_id = 0; const size_t parent_stack_id = 0; @@ -192,7 +193,7 @@ cache_kokkos_event(const char* name, const char* event_type, const char* target, rocprofsys::trace_cache::get_buffer_storage().store( rocprofsys::trace_cache::entry_type::in_time_sample, rocprofsys::trait::name::value, timestamp_ns, - event_metadata->to_string().c_str(), stack_id, parent_stack_id, correlation_id, + event_metadata.dump().c_str(), stack_id, parent_stack_id, correlation_id, call_stack, line_info); } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp index 47057e0417..8ba498b2cd 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -29,7 +29,6 @@ #include "core/debug.hpp" #include "core/gpu.hpp" #include "core/perfetto.hpp" -#include "core/rocpd/json.hpp" #include "core/state.hpp" #include "core/trace_cache/buffer_storage.hpp" #include "core/trace_cache/cache_manager.hpp" @@ -65,6 +64,8 @@ #include #include +#include + #include #include #include @@ -368,7 +369,8 @@ consume_args(Tp&&...) auto get_backtrace(std::optional>& _bt_data) { - auto backtrace = ::rocpd::json::create(); + auto backtrace = nlohmann::json(); + if(_bt_data && !_bt_data->empty()) { const std::string _unk = "??"; @@ -384,9 +386,9 @@ get_backtrace(std::optional>& _bt_data (_linfo && _linfo.line > 0) ? join("", _linfo.line) : ((itr.lineno == 0) ? std::string{ "?" } : join("", itr.lineno)); - auto _entry = join("", demangle(*_func), " @ ", - join(':', ::basename(_loc->c_str()), _line)); - backtrace->set(join("", "frame#", _bt_cnt++), _entry); + auto _entry = join("", demangle(*_func), " @ ", + join(':', ::basename(_loc->c_str()), _line)); + backtrace[join("", "frame#", _bt_cnt++)] = _entry; } } return backtrace; @@ -410,26 +412,6 @@ get_parent_stack_id([[maybe_unused]] const CorrelationIdType& correlation_id) #endif } -auto -get_extdata(const rocprofiler_callback_tracing_record_t& record) -{ - constexpr auto message_key = "message"; - auto args = callback_arg_array_t{}; - auto extdata = ::rocpd::json::create(); - - rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, &args); - - for(auto [key, val] : args) - { - if(key == message_key) - { - extdata->set(key, val); - } - } - - return extdata; -} - struct scope_destructor { /// \fn scope_destructor(FuncT&& _fini, InitT&& _init) @@ -867,7 +849,7 @@ tool_tracing_callback_stop( cache_category(); cache_add_thread_info(record.thread_id); std::string args_str = get_args_string(args); - cache_region(&record, _beg_ts, _end_ts, call_stack->to_string(), args_str, + cache_region(&record, _beg_ts, _end_ts, call_stack.dump(), args_str, trait::name::value); } } @@ -952,7 +934,7 @@ ompt_cache_instant_event( cache_category(); cache_add_thread_info(record.thread_id); - cache_region(&record, _instant_ts, _instant_ts, call_stack->to_string(), + cache_region(&record, _instant_ts, _instant_ts, call_stack.dump(), get_args_string(args), trait::name::value); } @@ -966,7 +948,7 @@ ompt_cache_orphan_event( cache_category(); cache_add_thread_info(stored_data.record.thread_id); cache_region(&stored_data.record, stored_data._beg_ts, stored_data._beg_ts, - call_stack->to_string(), get_args_string(stored_data.args), + call_stack.dump(), get_args_string(stored_data.args), trait::name::value); } @@ -1034,7 +1016,7 @@ ompt_pop_standard_callback( auto call_stack = get_backtrace(_bt_data); cache_category(); cache_add_thread_info(record.thread_id); - cache_region(&record, stored_data._beg_ts, _end_ts, call_stack->to_string(), + cache_region(&record, stored_data._beg_ts, _end_ts, call_stack.dump(), get_args_string(stored_data.args), trait::name::value); } @@ -1084,7 +1066,7 @@ ompt_pop_parallel_callback( cache_category(); cache_add_thread_info(record.thread_id); - cache_region(&record, stored_data._beg_ts, _end_ts, call_stack->to_string(), + cache_region(&record, stored_data._beg_ts, _end_ts, call_stack.dump(), get_args_string(stored_data.args), trait::name::value); } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp index 7bc5ac6441..83ca3b305c 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp @@ -21,6 +21,7 @@ // SOFTWARE. #include "library/rocprofiler-sdk/counters.hpp" +#include "core/agent_manager.hpp" #include "core/trace_cache/cache_manager.hpp" #include "core/trace_cache/metadata_registry.hpp" #include "library/rocprofiler-sdk/fwd.hpp" @@ -117,10 +118,13 @@ counter_event::operator()(const client_data* tool_data, ::perfetto::CounterTrack const size_t agent_handle = record.record_counter.agent_id.handle; const size_t value = record.record_counter.counter_value; + auto agent = get_agent_manager_instance().get_agent_by_handle(agent_handle); + trace_cache::get_buffer_storage().store( trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), _timing.start, event_metadata.c_str(), stack_id, parent_stack_id, - correlation_id, call_stack.c_str(), line_info.c_str(), agent_handle, + correlation_id, call_stack.c_str(), line_info.c_str(), + static_cast(agent.device_id), static_cast(agent.type), track_name.c_str(), value); } } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp index 95368705a4..06b2756e66 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp @@ -131,7 +131,7 @@ client_data::initialize() void client_data::initialize_event_info() { - if(agent_manager::get_instance().get_agents().empty()) + if(get_agent_manager_instance().get_agents().empty()) { initialize(); } @@ -231,7 +231,7 @@ client_data::initialize_event_info() void client_data::set_agents() { - auto& agent_mngr = agent_manager::get_instance(); + auto& agent_mngr = get_agent_manager_instance(); auto fill_agents = [&](agent_type type, std::vector& out) { const auto& _agents = agent_mngr.get_agents_by_type(type); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp index 3736f7df07..3d8ea17dd7 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp @@ -198,7 +198,7 @@ client_data::get_buffers() const inline const rocprofsys_agent_t* client_data::get_agent(rocprofiler_agent_id_t _id) const { - const auto& agent = agent_manager::get_instance().get_agent_by_handle(_id.handle); + const auto& agent = get_agent_manager_instance().get_agent_by_handle(_id.handle); return &agent; } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp index 76088023af..4c08236b96 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp @@ -29,7 +29,6 @@ #include "core/node_info.hpp" #include "core/perf.hpp" #include "core/rocpd/data_processor.hpp" -#include "core/rocpd/json.hpp" #include "core/state.hpp" #include "core/trace_cache/cache_manager.hpp" #include "core/utility.hpp" @@ -73,6 +72,8 @@ #include #include +#include + #include #include #include @@ -168,21 +169,21 @@ get_category_track_name(uint64_t tid) std::string generate_call_stack_json(const tim::unwind::processed_entry& stack_entry) { - auto call_stack = ::rocpd::json::create(); + nlohmann::json call_stack; - call_stack->set("name", std::string(demangle(stack_entry.name))); - call_stack->set("pc", as_hex(stack_entry.address)); - call_stack->set("file", std::string(stack_entry.location)); + call_stack["name"] = std::string(demangle(stack_entry.name)); + call_stack["pc"] = as_hex(stack_entry.address); + call_stack["file"] = std::string(stack_entry.location); - return call_stack->to_string(); + return call_stack.dump(); } std::string generate_line_info_json(const tim::unwind::processed_entry& line_info_entry) { - auto line_info = ::rocpd::json::create(); - line_info->set("line_address", as_hex(line_info_entry.line_address)); - line_info->set("name", std::string(demangle(line_info_entry.name))); + nlohmann::json line_info; + line_info["line_address"] = as_hex(line_info_entry.line_address); + line_info["name"] = std::string(demangle(line_info_entry.name)); if(line_info_entry.lineinfo && !line_info_entry.lineinfo.lines.empty()) { @@ -190,43 +191,15 @@ generate_line_info_json(const tim::unwind::processed_entry& line_info_entry) std::reverse(_lines.begin(), _lines.end()); for(const auto& line : _lines) { - auto inlined = ::rocpd::json::create(); - inlined->set("name", std::string(demangle(line.name))); - inlined->set("location", std::string(line.location)); - inlined->set("line", std::to_string(line.line)); - line_info->set("inlined", inlined); + nlohmann::json inlined; + inlined["name"] = std::string(demangle(line.name)); + inlined["location"] = std::string(line.location); + inlined["line"] = std::to_string(line.line); + line_info["inlined"] = inlined; } } - return line_info->to_string(); -} - -std::string -generate_hw_counter_json(int64_t _tid, const backtrace_metrics& metrics) -{ - auto extdata = ::rocpd::json::create(); - - if(!metrics.get_hw_counters().empty()) - { - auto _labels = backtrace_metrics::get_hw_counter_labels(_tid); - auto _hw_cnt_vals = metrics.get_hw_counters(); - - auto hw_counters = ::rocpd::json::create(); - for(size_t i = 0; i < _labels.size(); ++i) - { - hw_counters->set(_labels.at(i), _hw_cnt_vals.at(i)); - } - - extdata->set("hw_counters", hw_counters); - } - - return extdata->to_string(); -} - -rocpd::data_processor& -get_data_processor() -{ - return rocpd::data_processor::get_instance(); + return line_info.dump(); } template @@ -237,11 +210,13 @@ get_track_name(const thread_info& _thread_info) size_t sequent_value = _thread_info.index_data->sequent_value; constexpr auto sample_type = std::is_same_v ? "Timer" : "Overflow"; - return JOIN(" ", "Thread", sequent_value, sample_type, "(S)", thread_id); + std::stringstream name_ss; + name_ss << "Thread " << sequent_value << " " << sample_type << " (S) " << thread_id; + return name_ss.str(); } void -rocpd_initialize_sampling_category() +metadata_initialize_sampling_category() { static bool _is_initialized = false; if(_is_initialized) return; @@ -257,7 +232,7 @@ rocpd_initialize_sampling_category() } void -rocpd_initialize_thread_info(size_t tid) +metadata_initialize_thread_info(size_t tid) { const auto& _thread_info = thread_info::get(tid, SequentTID); ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid); @@ -271,7 +246,7 @@ rocpd_initialize_thread_info(size_t tid) } void -rocpd_init_track(int64_t tid) +metadata_initialize_track(int64_t tid) { const auto& _thread_info = thread_info::get(tid, SequentTID); ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid); @@ -290,22 +265,98 @@ rocpd_init_track(int64_t tid) { _overflow_track_name, thread_id, "{}" }); } -template -void -rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t name_id, - const char* track, const char* call_stack = "{}", - const char* line_info = "{}", const char* extdata = "{}") +// Added + +struct timer_sampling_data { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - auto string_primary_key = data_processor.insert_string(trait::name::value); + int64_t m_tid = -1; + uint64_t m_beg = 0; + uint64_t m_end = 0; + std::vector m_stack = {}; + backtrace_metrics m_metrics = {}; +}; - auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0, call_stack, - line_info, extdata); +struct overflow_sampling_data +{ + int64_t m_tid = -1; + uint64_t m_beg = 0; + uint64_t m_end = 0; + std::vector m_stack = {}; +}; - data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time, - name_id, event_id); - data_processor.insert_sample(track, start_time, event_id); +std::vector +parse_timer_data(int64_t _tid, const bundle_t* _init, + const std::vector& _data); + +std::vector +parse_overflow_data(int64_t _tid, const bundle_t*, const std::vector& _data); + +// TODO: should we remove _tid? it's inside timer_data and overflow_data +void +cache_sampling_data(int64_t _tid, const std::vector& _timer_data, + const std::vector& _overflow_data) +{ + ROCPROFSYS_VERBOSE(3 || get_debug_sampling(), + "[%li] Storing sampling data to trace cache...\n", _tid); + + const auto& _thread_info = thread_info::get(_tid, SequentTID); + ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid); + if(!_thread_info) return; + + // Store timer sampling data + for(const auto& itr : _timer_data) + { + if(!_thread_info->is_valid_lifetime({ itr.m_beg, itr.m_end })) continue; + + for(const auto& iitr : itr.m_stack) + { + auto _name = std::string(demangle(iitr.name)); + auto _track_name = get_track_name(*_thread_info); + auto _call_stack = generate_call_stack_json(iitr); + auto _line_info = generate_line_info_json(iitr); + + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::backtrace_region_sample, + static_cast(ROCPROFSYS_CATEGORY_TIMER_SAMPLING), + static_cast(_thread_info->index_data->system_value), + _track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end, + trait::name::value, _call_stack.c_str(), + _line_info.c_str(), "{}"); + } + } + + auto _overflow_event = + get_setting_value("ROCPROFSYS_SAMPLING_OVERFLOW_EVENT").value_or(""); + + if(!_overflow_event.empty()) + { + const auto _overflow_prefix = std::string_view{ "PERF_COUNT_" }; + const auto _overflow_pos = _overflow_event.find(_overflow_prefix); + if(_overflow_pos != std::string::npos) + _overflow_event = + _overflow_event.substr(_overflow_pos + _overflow_prefix.length()); + } + + for(const auto& itr : _overflow_data) + { + if(!_thread_info->is_valid_lifetime({ itr.m_beg, itr.m_end })) continue; + + for(const auto& iitr : itr.m_stack) + { + auto _name = std::string(demangle(iitr.name)); + auto _track_name = get_track_name(*_thread_info); + auto _call_stack = generate_call_stack_json(iitr); + auto _line_info = generate_line_info_json(iitr); + + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::backtrace_region_sample, + static_cast(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING), + static_cast(_thread_info->index_data->system_value), + _track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end, + trait::name::value, _call_stack.c_str(), + _line_info.c_str(), "{}"); + } + } } auto& @@ -871,9 +922,9 @@ configure(bool _setup, int64_t _tid) } } } - rocpd_initialize_sampling_category(); - rocpd_initialize_thread_info(_tid); - rocpd_init_track(_tid); + metadata_initialize_sampling_category(); + metadata_initialize_thread_info(_tid); + metadata_initialize_track(_tid); *_running = true; sampling::get_sampler_init(_tid)->sample(); @@ -937,28 +988,11 @@ configure(bool _setup, int64_t _tid) return (_signal_types) ? *_signal_types : std::set{}; } -struct timer_sampling_data -{ - int64_t m_tid = -1; - uint64_t m_beg = 0; - uint64_t m_end = 0; - std::vector m_stack = {}; - backtrace_metrics m_metrics = {}; -}; - -struct overflow_sampling_data -{ - int64_t m_tid = -1; - uint64_t m_beg = 0; - uint64_t m_end = 0; - std::vector m_stack = {}; -}; - std::vector -post_process_timer_data(int64_t, const bundle_t*, const std::vector&); +parse_timer_data(int64_t, const bundle_t*, const std::vector&); std::vector -post_process_overflow_data(int64_t, const bundle_t*, const std::vector&); +parse_overflow_data(int64_t, const bundle_t*, const std::vector&); void post_process_perfetto(int64_t, const std::vector&, @@ -969,8 +1003,9 @@ post_process_timemory(int64_t, const std::vector&, const std::vector&); void -post_process_rocpd(int64_t _tid, const std::vector& _timer_data, - const std::vector& _overflow_data); +store_sampling_data_in_cache(int64_t _tid, + const std::vector& _timer_data, + const std::vector& _overflow_data); auto static_strings = std::set{}; @@ -1145,12 +1180,12 @@ post_process() "Sampler data for thread %lu has %zu valid entries...\n", i, _data.size()); - auto _timer_data = post_process_timer_data(i, _init, _data); - auto _overflow_data = post_process_overflow_data(i, _init, _data); + auto _timer_data = parse_timer_data(i, _init, _data); + auto _overflow_data = parse_overflow_data(i, _init, _data); if(get_use_perfetto()) post_process_perfetto(i, _timer_data, _overflow_data); if(get_use_timemory()) post_process_timemory(i, _timer_data, _overflow_data); - if(get_use_rocpd()) post_process_rocpd(i, _timer_data, _overflow_data); + store_sampling_data_in_cache(i, _timer_data, _overflow_data); } else { @@ -1191,8 +1226,7 @@ post_process() namespace { std::vector -post_process_timer_data(int64_t _tid, const bundle_t* _init, - const std::vector& _data) +parse_timer_data(int64_t _tid, const bundle_t* _init, const std::vector& _data) { auto _results = std::vector{}; @@ -1238,8 +1272,7 @@ post_process_timer_data(int64_t _tid, const bundle_t* _init, } std::vector -post_process_overflow_data(int64_t _tid, const bundle_t*, - const std::vector& _data) +parse_overflow_data(int64_t _tid, const bundle_t*, const std::vector& _data) { auto _results = std::vector{}; @@ -1704,66 +1737,7 @@ post_process_timemory(int64_t _tid, const std::vector& _tim } void -rocpd_post_process_overflow_data( - int64_t _tid, const std::vector& _overflow_data) -{ - auto& data_processor = get_data_processor(); - - const auto& _thread_info = thread_info::get(_tid, SequentTID); - ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid); - if(!_thread_info) return; - - auto _overflow_event = - get_setting_value("ROCPROFSYS_SAMPLING_OVERFLOW_EVENT").value_or(""); - - if(!_overflow_event.empty() && !_overflow_data.empty()) - { - auto _beg_ns = std::max(_overflow_data.front().m_beg, _thread_info->get_start()); - auto _end_ns = std::min(_overflow_data.back().m_end, _thread_info->get_stop()); - - const auto _overflow_prefix = std::string_view{ "PERF_COUNT_" }; - const auto _overflow_pos = _overflow_event.find(_overflow_prefix); - if(_overflow_pos != std::string::npos) - _overflow_event = - _overflow_event.substr(_overflow_pos + _overflow_prefix.length()); - - const auto* _main_name = - static_strings.emplace(join(" ", _overflow_event, "samples [rocprof-sys]")) - .first->c_str(); - auto main_name_id = data_processor.insert_string(_main_name); - - size_t thread_id = _thread_info->index_data->system_value; - - auto thread_primary_key = data_processor.map_thread_id_to_primary_key(thread_id); - const auto _track_name = - get_track_name(*_thread_info); - - rocpd_insert_region( - thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str()); - - for(const auto& itr : _overflow_data) - { - auto _beg = itr.m_beg; - auto _end = itr.m_end; - - if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue; - - for(const auto& iitr : itr.m_stack) - { - const auto* _name = - static_strings.emplace(demangle(iitr.name)).first->c_str(); - auto name_id = data_processor.insert_string(_name); - rocpd_insert_region( - thread_primary_key, _beg, _end, name_id, _track_name.c_str(), - generate_call_stack_json(iitr).c_str(), - generate_line_info_json(iitr).c_str()); - } - } - } -} - -void -rocpd_post_process_backtrace_metrics( +cache_backtrace_metrics( [[maybe_unused]] int64_t _tid, [[maybe_unused]] const std::vector& _timer_data) { @@ -1779,121 +1753,22 @@ rocpd_post_process_backtrace_metrics( { ROCPROFSYS_VERBOSE(3 || get_debug_sampling(), "[%li] Post-processing metrics for rocpd...\n", _tid); - backtrace_metrics::init_rocpd(_tid, _valid_metrics); // move to setup + backtrace_metrics::init_cache(_tid, _valid_metrics); // move to setup for(const auto& itr : _timer_data) - itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end)); - backtrace_metrics::fini_rocpd(_tid, _valid_metrics); + itr.m_metrics.cache_backtrace_data(_tid, 0.5 * (itr.m_beg + itr.m_end)); } #endif } void -rocpd_post_process_timer_data( - [[maybe_unused]] int64_t _tid, - [[maybe_unused]] const std::vector& _timer_data) +store_sampling_data_in_cache( + [[maybe_unused]] int64_t _tid, + [[maybe_unused]] const std::vector& _timer_data, + [[maybe_unused]] const std::vector& _overflow_data) { #if ROCPROFSYS_USE_ROCM > 0 - auto& data_processor = get_data_processor(); - - const auto& _thread_info = thread_info::get(_tid, SequentTID); - ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid); - if(!_thread_info) return; - - if(!_timer_data.empty()) - { - rocpd_post_process_backtrace_metrics(_tid, _timer_data); - - auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start()); - auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop()); - - const auto _track_name = get_track_name(*_thread_info); - - auto thread_primary_key = data_processor.map_thread_id_to_primary_key( - _thread_info->index_data->system_value); - - const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]"); - rocpd_insert_region( - thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str()); - - auto _labels = backtrace_metrics::get_hw_counter_labels(_tid); - for(const auto& itr : _timer_data) - { - size_t _ncount = 0; - uint64_t _beg = itr.m_beg; - uint64_t _end = itr.m_end; - if(!_thread_info->is_valid_lifetime({ _beg, _end })) continue; - - for(const auto& iitr : itr.m_stack) - { - auto _ncur = _ncount++; - // the begin/end + HW counters will be same for entire call-stack so only - // annotate the top and the bottom functions to keep the data consumption - // low - bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size()); - - // Only annotate HW counters when first or last and HW counters are not - // empty - bool _include_hw = - _include_common && !itr.m_metrics.get_hw_counters().empty(); - - std::string hw_counter_json = "{}"; - if(_include_hw) - { - // current values when read - hw_counter_json = generate_hw_counter_json(_tid, itr.m_metrics); - } - - if(get_sampling_include_inlines() && iitr.lineinfo) - { - auto _lines = iitr.lineinfo.lines; - std::reverse(_lines.begin(), _lines.end()); - size_t _n = 0; - for(const auto& line : _lines) - { - const auto* _name = - static_strings.emplace(demangle(line.name)).first->c_str(); - auto inlined_name_id = data_processor.insert_string(_name); - - auto inlined_call_stack = ::rocpd::json::create(); - inlined_call_stack->set("name", std::string(demangle(line.name))); - inlined_call_stack->set("location", std::string(line.location)); - inlined_call_stack->set("line", std::to_string(line.line)); - inlined_call_stack->set("inlined", "true"); - - rocpd_insert_region( - thread_primary_key, _beg, _end, inlined_name_id, - _track_name.c_str(), inlined_call_stack->to_string().c_str(), - "{}", - // Only include HW counters for first inlined function - (_n == 0) ? hw_counter_json.c_str() : "{}"); - } - } - else - { - const auto* _name = static_strings.emplace(iitr.name).first->c_str(); - const auto name_id = data_processor.insert_string(_name); - rocpd_insert_region( - thread_primary_key, _beg, _end, name_id, _track_name.c_str(), - generate_call_stack_json(iitr).c_str(), - generate_line_info_json(iitr).c_str(), hw_counter_json.c_str()); - } - } - } - } -#endif -} - -void -post_process_rocpd(int64_t _tid, const std::vector& _timer_data, - const std::vector& _overflow_data) -{ -#if ROCPROFSYS_USE_ROCM > 0 - rocpd_post_process_overflow_data(_tid, _overflow_data); - rocpd_post_process_timer_data(_tid, _timer_data); -#else - (void) _tid; - (void) _timer_data; - (void) _overflow_data; + cache_sampling_data(_tid, _timer_data, _overflow_data); + cache_backtrace_metrics(_tid, _timer_data); #endif }