From fe5d0743752d84ac0bdf223c646b00b0b7a1c833 Mon Sep 17 00:00:00 2001 From: Ammar ELWazir Date: Tue, 28 Nov 2023 10:04:37 -0600 Subject: [PATCH] Misc updates for distribution (#233) * Adding tools support * cmake formatting (cmake-format) (#227) Co-authored-by: SrirakshaNag * Checking to do rebase * Adding rocprofv2 script * cmake formatting (cmake-format) (#229) Co-authored-by: bgopesh * Fixing build for the tool * Removing the requirement for rocm_version * Update rocprofiler_utilities.cmake * C++ filesystem fixes - added source/lib/common/filesystem.hpp - support older compilers which have and do not have - added samples/common/filesystem.hpp - samples now depend on "common" library which provides the correct filesystem header - renamed rocprofiler-stdcxxfs interface target to rocprofiler-cxx-filesystem - support old LLVM in addition to GNU - fix bin/rocprof/rocprof.cpp - was using VLA * Fix rocprofiler-drm include directories - OpenSUSE only has include/libdrm/drm.h (no include/drm/drm.h) * Tools fixes * Fix for the tools * Fix rocprofv2 script * Fixing Filesystem Issues * source formatting (clang-format v11) (#234) Co-authored-by: ammarwa * Vlaindic/pc sampling api update (#235) * pcs: updating PC sampling API * source formatting (clang-format v11) (#232) Co-authored-by: vlaindic --------- Co-authored-by: vlaindic Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: vlaindic * Vlaindic/pc sampling api update for ammar branch (#244) *Updating the documentation inside pc_sampling.h --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: vlaindic * pcs: use @p in front of params * pcs: documenting struct fields updated * Fixing PC Sampling Documentation issues * Fixing PC Sampling Documentation * Relocated tools directory to source/lib/rocprofiler-tool * Fixes/updates to rocprofiler-tool - updated CMake - Fixed miscellaneous issues in the code (VLAs, etc.) - Updated rocprofv2 to reflect some minor env variables changes in rocprofiler-tool - Fixed clang-tidy warnings * Update lib/rocprofiler-tool/CMakeLists.txt - link to atomic library * Add $ORIGIN/.. RUNPATH to rocprofiler-tool * Adding readme file for tools * Renaming the tools readme file * Update ReadMe.md * Update ReadMe.md * Documentation updates - overview and explanation of design and concepts * Fix lib/rocprofiler-tool/README.md - delete ReadMe.md * Hacks for build * Update Filesystem * cmake formatting (cmake-format) (#248) Co-authored-by: ammarwa * source formatting (clang-format v11) (#249) Co-authored-by: ammarwa * source formatting (clang-format v11) (#250) Co-authored-by: ammarwa * Addressing review comments on the tool readme file * Revert "Hacks for build" This reverts commit d6688cb3d1226c46fc97e37ced889a5b0d180940. * Fixes for GCC 7.5 compiler in OpenSUSE 15.4 * Update lib/rocprofiler-tool/CMakeLists.txt - link to AQL profile library * Fix lib/rocprofiler-tool/README.md - fix markdown * Fix lib/rocprofiler-tool - fix usage of hsa_ven_amd_loader_query_host_address * Fix unused variable warnings - byproduct of variables only used in assert statements * Update docs - update about.md - more "Important Changes" section here - update tool_library_overview.md - extend "Tool Library Design" section - write "Tool Initialization" section - write "Tool Finalization" section * Add ghc::filesystem submodule * Implement usage of ghc::filesystem * Add ROCPROFILER_BUILD_GHC_FS option - option to use external/filesystem (ghc) * Update samples/counter-collection - compile flags - common library - fixes for warnings * Update tests/kernel-tracing/CMakeLists.txt - change install location of kernel-tracing-test-tool and install rpath * Update samples/common/CMakeLists.txt - compile features requiring C++17 * Update lib/rocprofiler-tool/tool.cpp - remove include - comment out unused variable - remove unused functions - move some functions into anonymous namespace --------- Co-authored-by: Sriraksha Nagaraj Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: SrirakshaNag Co-authored-by: gobhardw Co-authored-by: bgopesh Co-authored-by: Jonathan R. Madsen Co-authored-by: ammarwa Co-authored-by: vlaindic Co-authored-by: vlaindic Co-authored-by: Vladimir Indic <139573562+vlaindic@users.noreply.github.com> Co-authored-by: Benjamin Welton Co-authored-by: Jonathan R. Madsen --- .gitmodules | 3 + CMakeLists.txt | 1 - cmake/rocprofiler_build_settings.cmake | 2 +- cmake/rocprofiler_config_interfaces.cmake | 42 +- cmake/rocprofiler_interfaces.cmake | 3 +- cmake/rocprofiler_options.cmake | 4 + cmake/rocprofiler_utilities.cmake | 1 + external/CMakeLists.txt | 17 + external/filesystem | 1 + samples/CMakeLists.txt | 12 +- samples/api_buffered_tracing/CMakeLists.txt | 9 +- samples/api_buffered_tracing/client.cpp | 32 +- samples/api_callback_tracing/CMakeLists.txt | 9 +- samples/api_callback_tracing/client.cpp | 28 +- samples/code_object_tracing/CMakeLists.txt | 6 +- samples/code_object_tracing/client.cpp | 27 +- samples/common/CMakeLists.txt | 37 ++ samples/common/defines.hpp | 39 ++ samples/common/filesystem.hpp | 77 +++ samples/counter_collection/CMakeLists.txt | 13 +- samples/counter_collection/client.cpp | 16 +- .../print_functional_counters.cpp | 17 +- samples/intercept_table/CMakeLists.txt | 9 +- samples/intercept_table/client.cpp | 29 +- samples/pc_sampling/CMakeLists.txt | 34 -- samples/pc_sampling/common.h | 147 ------ ...ost-trap-retries-service-instantiation.cpp | 190 ------- samples/pc_sampling/single-user-host-trap.cpp | 87 ---- .../single-user-multiple-agents.cpp | 225 --------- source/bin/CMakeLists.txt | 7 + source/bin/rocprofv2 | 83 ++++ source/docs/about.md | 27 +- source/docs/buffered_tracing.md | 14 + source/docs/callback_tracing.md | 13 + source/docs/features.md | 6 +- source/docs/index.md | 4 + source/docs/intercept_table.md | 9 + source/docs/tool_library_overview.md | 254 ++++++++++ source/include/rocprofiler/buffer_tracing.h | 11 + source/include/rocprofiler/counters.h | 16 +- source/include/rocprofiler/dispatch_profile.h | 10 +- source/include/rocprofiler/fwd.h | 32 +- .../include/rocprofiler/internal_threading.h | 1 + source/include/rocprofiler/pc_sampling.h | 245 ++++++++- source/include/rocprofiler/registration.h | 4 + source/lib/CMakeLists.txt | 1 + source/lib/common/CMakeLists.txt | 2 +- source/lib/common/config.cpp | 11 +- .../common/container/record_header_buffer.cpp | 16 +- .../common/container/record_header_buffer.hpp | 7 +- source/lib/common/container/small_vector.hpp | 2 + source/lib/common/filesystem.hpp | 77 +++ source/lib/common/xml.cpp | 1 + source/lib/rocprofiler-tool/CMakeLists.txt | 32 ++ source/lib/rocprofiler-tool/README.md | 33 ++ source/lib/rocprofiler-tool/helper.cpp | 353 +++++++++++++ source/lib/rocprofiler-tool/helper.hpp | 149 ++++++ source/lib/rocprofiler-tool/tool.cpp | 464 ++++++++++++++++++ source/lib/rocprofiler-tool/trace_buffer.hpp | 318 ++++++++++++ source/lib/rocprofiler/CMakeLists.txt | 11 +- source/lib/rocprofiler/agent.cpp | 10 +- source/lib/rocprofiler/aql/tests/aql_test.cpp | 17 +- source/lib/rocprofiler/counters/metrics.cpp | 7 +- .../counters/parser/tests/CMakeLists.txt | 4 +- source/lib/rocprofiler/hsa/agent_cache.cpp | 3 +- .../lib/rocprofiler/hsa/queue_controller.cpp | 8 +- .../lib/rocprofiler/hsa/queue_controller.hpp | 2 - source/lib/rocprofiler/internal_threading.cpp | 6 +- source/lib/rocprofiler/pc_sampling.cpp | 14 +- .../pc_sampling/parser/tests/pcs_parser.cpp | 13 +- .../lib/rocprofiler/tests/details/agent.cpp | 4 +- .../tests/external_correlation.cpp | 3 +- .../lib/rocprofiler/tests/intercept_table.cpp | 3 +- source/lib/rocprofiler/tests/registration.cpp | 5 +- .../tests/buffering/buffering-save-load.cpp | 6 +- tests/kernel-tracing/CMakeLists.txt | 7 +- tests/kernel-tracing/kernel-tracing.cpp | 2 +- 77 files changed, 2501 insertions(+), 943 deletions(-) create mode 160000 external/filesystem create mode 100644 samples/common/CMakeLists.txt create mode 100644 samples/common/defines.hpp create mode 100644 samples/common/filesystem.hpp delete mode 100644 samples/pc_sampling/CMakeLists.txt delete mode 100644 samples/pc_sampling/common.h delete mode 100644 samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp delete mode 100644 samples/pc_sampling/single-user-host-trap.cpp delete mode 100644 samples/pc_sampling/single-user-multiple-agents.cpp create mode 100755 source/bin/rocprofv2 create mode 100644 source/docs/buffered_tracing.md create mode 100644 source/docs/callback_tracing.md create mode 100644 source/docs/intercept_table.md create mode 100644 source/docs/tool_library_overview.md create mode 100644 source/lib/common/filesystem.hpp create mode 100644 source/lib/rocprofiler-tool/CMakeLists.txt create mode 100644 source/lib/rocprofiler-tool/README.md create mode 100644 source/lib/rocprofiler-tool/helper.cpp create mode 100644 source/lib/rocprofiler-tool/helper.hpp create mode 100644 source/lib/rocprofiler-tool/tool.cpp create mode 100644 source/lib/rocprofiler-tool/trace_buffer.hpp diff --git a/.gitmodules b/.gitmodules index 7b75d7ff0c..1ad4746ab1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "external/cereal"] path = external/cereal url = https://github.com/jrmadsen/cereal.git +[submodule "external/filesystem"] + path = external/filesystem + url = https://github.com/gulrak/filesystem.git diff --git a/CMakeLists.txt b/CMakeLists.txt index afd86e4281..abe4c282a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,7 +105,6 @@ if(ROCPROFILER_BUILD_SAMPLES) add_subdirectory(samples) endif() -# include(rocprofiler_config_install) include(rocprofiler_config_packaging) diff --git a/cmake/rocprofiler_build_settings.cmake b/cmake/rocprofiler_build_settings.cmake index 01c5852e2e..a781a00f3d 100644 --- a/cmake/rocprofiler_build_settings.cmake +++ b/cmake/rocprofiler_build_settings.cmake @@ -120,7 +120,7 @@ endif() rocprofiler_target_compile_options( rocprofiler-developer-flags LANGUAGES C CXX - INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra" + INTERFACE "-Werror" "-Wdouble-promotion" "-Wshadow" "-Wextra" "-Wvla" "-Wno-missing-field-initializers") if(ROCPROFILER_BUILD_DEVELOPER) diff --git a/cmake/rocprofiler_config_interfaces.cmake b/cmake/rocprofiler_config_interfaces.cmake index 80181221eb..3698e19af1 100644 --- a/cmake/rocprofiler_config_interfaces.cmake +++ b/cmake/rocprofiler_config_interfaces.cmake @@ -6,7 +6,6 @@ include_guard(DIRECTORY) # External Packages are found here # # ######################################################################################## - target_include_directories( rocprofiler-headers INTERFACE $ @@ -24,6 +23,7 @@ string(REPLACE ":" ";" CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH};${CMAKE_PREFIX set(ROCPROFILER_DEFAULT_ROCM_PATH /opt/rocm CACHE PATH "Default search path for ROCM") + if(EXISTS ${ROCPROFILER_DEFAULT_ROCM_PATH}) get_filename_component(_ROCPROFILER_DEFAULT_ROCM_PATH "${ROCPROFILER_DEFAULT_ROCM_PATH}" REALPATH) @@ -41,7 +41,6 @@ endif() # Threading # # ----------------------------------------------------------------------------------------# - set(CMAKE_THREAD_PREFER_PTHREAD ON) set(THREADS_PREFER_PTHREAD_FLAG OFF) @@ -55,6 +54,7 @@ if(pthread_LIBRARY) target_link_libraries(rocprofiler-threading INTERFACE ${pthread_LIBRARY}) else() find_package(Threads ${rocprofiler_FIND_QUIETLY} ${rocprofiler_FIND_REQUIREMENT}) + if(Threads_FOUND) target_link_libraries(rocprofiler-threading INTERFACE Threads::Threads) endif() @@ -65,10 +65,10 @@ endif() # dynamic linking (dl) and runtime (rt) libraries # # ----------------------------------------------------------------------------------------# - foreach(_LIB dl rt) find_library(${_LIB}_LIBRARY NAMES ${_LIB}) find_package_handle_standard_args(${_LIB}-library REQUIRED_VARS ${_LIB}_LIBRARY) + if(${_LIB}_LIBRARY) target_link_libraries(rocprofiler-threading INTERFACE ${${_LIB}_LIBRARY}) endif() @@ -76,17 +76,16 @@ endforeach() # ----------------------------------------------------------------------------------------# # -# stdc++fs (filesystem) library +# filesystem library # # ----------------------------------------------------------------------------------------# -find_library(stdcxxfs_LIBRARY NAMES stdc++fs) -find_package_handle_standard_args(stdcxxfs-library REQUIRED_VARS stdcxxfs_LIBRARY) - -if(stdcxxfs_LIBRARY) - target_link_libraries(rocprofiler-stdcxxfs INTERFACE ${stdcxxfs_LIBRARY}) -else() - target_link_libraries(rocprofiler-stdcxxfs INTERFACE stdc++fs) +if(NOT ROCPROFILER_BUILD_GHC_FS) + if(CMAKE_CXX_COMPILER_IS_GNU AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) + target_link_libraries(rocprofiler-cxx-filesystem INTERFACE stdc++fs) + elseif(CMAKE_CXX_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) + target_link_libraries(rocprofiler-cxx-filesystem INTERFACE c++fs) + endif() endif() # ----------------------------------------------------------------------------------------# @@ -94,11 +93,14 @@ endif() # HIP # # ----------------------------------------------------------------------------------------# +find_package(rocm_version) + +if(rocm_version_FOUND) + list(APPEND CMAKE_PREFIX_PATH "${rocm_version_DIR}" "${rocm_version_DIR}/llvm") + list(APPEND CMAKE_MODULE_PATH "${rocm_version_DIR}/hip/cmake" + "${rocm_version_DIR}/lib/cmake") +endif() -find_package(rocm_version REQUIRED) -list(APPEND CMAKE_PREFIX_PATH "${rocm_version_DIR}" "${rocm_version_DIR}/llvm") -list(APPEND CMAKE_MODULE_PATH "${rocm_version_DIR}/hip/cmake" - "${rocm_version_DIR}/lib/cmake") find_package(hip REQUIRED CONFIG) target_link_libraries(rocprofiler-hip INTERFACE hip::host) @@ -107,7 +109,6 @@ target_link_libraries(rocprofiler-hip INTERFACE hip::host) # HSA runtime # # ----------------------------------------------------------------------------------------# - find_package( hsa-runtime64 REQUIRED @@ -120,6 +121,7 @@ find_package( ${ROCM_PATH}) string(REPLACE "." ";" HSA_RUNTIME_VERSION "${hsa-runtime64_VERSION}") + # the following values are encoded into version.h list(GET HSA_RUNTIME_VERSION 0 HSA_RUNTIME_VERSION_MAJOR) list(GET HSA_RUNTIME_VERSION 1 HSA_RUNTIME_VERSION_MINOR) @@ -131,7 +133,6 @@ target_link_libraries(rocprofiler-hsa-runtime INTERFACE hsa-runtime64::hsa-runti # amd comgr # # ----------------------------------------------------------------------------------------# - find_package( amd_comgr REQUIRED @@ -152,7 +153,6 @@ target_link_libraries(rocprofiler-amd-comgr INTERFACE amd_comgr) # PTL (Parallel Tasking Library) # # ----------------------------------------------------------------------------------------# - target_link_libraries(rocprofiler-ptl INTERFACE PTL::ptl-static) # ----------------------------------------------------------------------------------------# @@ -160,7 +160,6 @@ target_link_libraries(rocprofiler-ptl INTERFACE PTL::ptl-static) # amd aql # # ----------------------------------------------------------------------------------------# - find_library( hsa-amd-aqlprofile64_library NAMES hsa-amd-aqlprofile64 hsa-amd-aqlprofile @@ -174,20 +173,19 @@ target_link_libraries(rocprofiler-hsa-aql INTERFACE ${hsa-amd-aqlprofile64_libra # drm # # ----------------------------------------------------------------------------------------# - find_path( drm_INCLUDE_DIR NAMES drm.h HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu - PATH_SUFFIXES include/drm include REQUIRED) + PATH_SUFFIXES include/drm include/libdrm include REQUIRED) find_path( xf86drm_INCLUDE_DIR NAMES xf86drm.h HINTS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu PATHS ${rocm_version_DIR} ${ROCM_PATH} /opt/amdgpu - PATH_SUFFIXES include/drm include REQUIRED) + PATH_SUFFIXES include/drm include/libdrm include REQUIRED) find_library( drm_LIBRARY diff --git a/cmake/rocprofiler_interfaces.cmake b/cmake/rocprofiler_interfaces.cmake index a9fc11cdec..24c7b3a221 100644 --- a/cmake/rocprofiler_interfaces.cmake +++ b/cmake/rocprofiler_interfaces.cmake @@ -48,7 +48,8 @@ rocprofiler_add_interface_library(rocprofiler-amd-comgr "AMD comgr library" INTE rocprofiler_add_interface_library(rocprofiler-gtest "Google Test library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-glog "Google Log library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-fmt "C++ format string library" INTERNAL) -rocprofiler_add_interface_library(rocprofiler-stdcxxfs "C++ filesystem library" INTERNAL) +rocprofiler_add_interface_library(rocprofiler-cxx-filesystem "C++ filesystem library" + INTERNAL) rocprofiler_add_interface_library(rocprofiler-ptl "Parallel Tasking Library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-hsa-aql "AQL library" INTERNAL) rocprofiler_add_interface_library(rocprofiler-drm "drm (amdgpu) library" INTERNAL) diff --git a/cmake/rocprofiler_options.cmake b/cmake/rocprofiler_options.cmake index a62ba7d0fb..8bf0432426 100644 --- a/cmake/rocprofiler_options.cmake +++ b/cmake/rocprofiler_options.cmake @@ -47,6 +47,10 @@ foreach(_PLUGIN "ATT" "CTF" "PERFETTO") "Enable building the ${_PLUGIN} plugin" ON) endforeach() +rocprofiler_add_option( + ROCPROFILER_BUILD_GHC_FS + "Enable building with ghc::filesystem library (via submodule) instead of the C++ filesystem library" + ON) rocprofiler_add_option(ROCPROFILER_BUILD_FMT "Enable building fmt library internally" ON) rocprofiler_add_option(ROCPROFILER_BUILD_GLOG "Enable building glog (Google logging) library internally" ON) diff --git a/cmake/rocprofiler_utilities.cmake b/cmake/rocprofiler_utilities.cmake index da9ee7c618..fb288460dd 100644 --- a/cmake/rocprofiler_utilities.cmake +++ b/cmake/rocprofiler_utilities.cmake @@ -791,6 +791,7 @@ function(ROCPROFILER_PYTHON_CONSOLE_SCRIPT SCRIPT_NAME SCRIPT_SUBMODULE) set(Python3_ROOT_DIR "${ARG_ROOT_DIR}") find_package(Python3 ${ARG_VERSION} EXACT QUIET MODULE COMPONENTS Interpreter) set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}") + execute_process(COMMAND ${Python3_EXECUTABLE} -m pip install pandas) configure_file(${PROJECT_SOURCE_DIR}/cmake/Templates/console-script.in ${PROJECT_BINARY_DIR}/bin/${SCRIPT_NAME}-${ARG_VERSION} @ONLY) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 72906d839f..cf61f07c18 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -7,6 +7,23 @@ include(rocprofiler_utilities) set(BUILD_TESTING OFF) +# filesystem library +if(ROCPROFILER_BUILD_GHC_FS) + rocprofiler_checkout_git_submodule( + RECURSIVE + RELATIVE_PATH external/filesystem + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + REPO_URL https://github.com/gulrak/filesystem.git + REPO_BRANCH "v1.5.14") + + target_compile_definitions( + rocprofiler-cxx-filesystem + INTERFACE $) + target_include_directories( + rocprofiler-cxx-filesystem SYSTEM + INTERFACE $) +endif() + if(ROCPROFILER_BUILD_TESTS) if(ROCPROFILER_BUILD_GTEST) set(INSTALL_GTEST diff --git a/external/filesystem b/external/filesystem new file mode 160000 index 0000000000..8a2edd6d92 --- /dev/null +++ b/external/filesystem @@ -0,0 +1 @@ +Subproject commit 8a2edd6d92ed820521d42c94d179462bf06b5ed3 diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 8a4fb0fecf..100eedfcd3 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -14,18 +14,12 @@ endif() enable_testing() include(CTest) -add_library(rocprofiler-samples-build-flags INTERFACE) -add_library(rocprofiler::samples-build-flags ALIAS rocprofiler-samples-build-flags) -target_compile_options(rocprofiler-samples-build-flags INTERFACE -W -Wall -Wextra - -Wshadow) - -if(ROCPROFILER_BUILD_CI OR ROCPROFILER_BUILD_WERROR) - target_compile_options(rocprofiler-samples-build-flags INTERFACE -Werror) -endif() +# common utilities for samples +add_subdirectory(common) +# actual samples add_subdirectory(api_callback_tracing) add_subdirectory(api_buffered_tracing) add_subdirectory(code_object_tracing) add_subdirectory(counter_collection) add_subdirectory(intercept_table) -add_subdirectory(pc_sampling) diff --git a/samples/api_buffered_tracing/CMakeLists.txt b/samples/api_buffered_tracing/CMakeLists.txt index 3afc6d943f..a8bff6e349 100644 --- a/samples/api_buffered_tracing/CMakeLists.txt +++ b/samples/api_buffered_tracing/CMakeLists.txt @@ -33,8 +33,8 @@ add_library(buffered-api-tracing-client SHARED) target_sources(buffered-api-tracing-client PRIVATE client.cpp client.hpp) target_link_libraries( buffered-api-tracing-client - PRIVATE rocprofiler::rocprofiler - $) + PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) find_package(Threads REQUIRED) @@ -42,9 +42,8 @@ find_package(Threads REQUIRED) add_executable(buffered-api-tracing) target_sources(buffered-api-tracing PRIVATE main.cpp) target_link_libraries( - buffered-api-tracing - PRIVATE buffered-api-tracing-client Threads::Threads - $) + buffered-api-tracing PRIVATE buffered-api-tracing-client Threads::Threads + rocprofiler::samples-build-flags) add_test(NAME buffered-api-tracing COMMAND $) diff --git a/samples/api_buffered_tracing/client.cpp b/samples/api_buffered_tracing/client.cpp index 5fcee4e2f6..4e76f72521 100644 --- a/samples/api_buffered_tracing/client.cpp +++ b/samples/api_buffered_tracing/client.cpp @@ -41,6 +41,9 @@ #include #include +#include "common/defines.hpp" +#include "common/filesystem.hpp" + #include #include #include @@ -48,9 +51,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -59,22 +62,6 @@ #include #include -#define ROCPROFILER_CALL(result, msg) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ - { \ - std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ - std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ - << " failed with error code " << CHECKSTATUS << ": " << status_msg \ - << std::endl; \ - std::stringstream errmsg{}; \ - errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ - << status_msg << ")"; \ - throw std::runtime_error(errmsg.str()); \ - } \ - } - namespace client { namespace @@ -110,8 +97,6 @@ kernel_symbol_map_t client_kernels = {}; void print_call_stack(const call_stack_t& _call_stack) { - namespace fs = ::std::filesystem; - auto ofname = std::string{"api_buffered_trace.log"}; if(auto* eofname = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE")) ofname = eofname; @@ -141,8 +126,8 @@ print_call_stack(const call_stack_t& _call_stack) for(const auto& itr : _call_stack) { *ofs << std::left << std::setw(2) << ++n << "/" << std::setw(2) << _call_stack.size() - << " [" << fs::path{itr.file}.filename() << ":" << itr.line << "] " << std::setw(20) - << itr.function; + << " [" << common::fs::path{itr.file}.filename() << ":" << itr.line << "] " + << std::setw(20) << itr.function; if(!itr.context.empty()) *ofs << " :: " << itr.context; *ofs << "\n"; } @@ -487,8 +472,9 @@ identify(uint64_t val) { auto _tid = rocprofiler_thread_id_t{}; rocprofiler_get_thread_id(&_tid); - rocprofiler_push_external_correlation_id( - client_ctx, _tid, rocprofiler_user_data_t{.value = val}); + rocprofiler_user_data_t user_data = {}; + user_data.value = val; + rocprofiler_push_external_correlation_id(client_ctx, _tid, user_data); } void diff --git a/samples/api_callback_tracing/CMakeLists.txt b/samples/api_callback_tracing/CMakeLists.txt index 387869ccb8..ca58be9ef9 100644 --- a/samples/api_callback_tracing/CMakeLists.txt +++ b/samples/api_callback_tracing/CMakeLists.txt @@ -33,8 +33,8 @@ add_library(callback-api-tracing-client SHARED) target_sources(callback-api-tracing-client PRIVATE client.cpp client.hpp) target_link_libraries( callback-api-tracing-client - PRIVATE rocprofiler::rocprofiler - $) + PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) find_package(Threads REQUIRED) @@ -42,9 +42,8 @@ find_package(Threads REQUIRED) add_executable(callback-api-tracing) target_sources(callback-api-tracing PRIVATE main.cpp) target_link_libraries( - callback-api-tracing - PRIVATE callback-api-tracing-client Threads::Threads - $) + callback-api-tracing PRIVATE callback-api-tracing-client Threads::Threads + rocprofiler::samples-build-flags) add_test(NAME callback-api-tracing COMMAND $) diff --git a/samples/api_callback_tracing/client.cpp b/samples/api_callback_tracing/client.cpp index e529dbd565..e756519080 100644 --- a/samples/api_callback_tracing/client.cpp +++ b/samples/api_callback_tracing/client.cpp @@ -36,15 +36,18 @@ #include #include +#include "common/defines.hpp" +#include "common/filesystem.hpp" + #include #include #include #include #include #include -#include #include #include +#include #include #include #include @@ -52,23 +55,6 @@ #include #include #include - -#define ROCPROFILER_CALL(result, msg) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ - { \ - std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ - std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ - << " failed with error code " << CHECKSTATUS << ": " << status_msg \ - << std::endl; \ - std::stringstream errmsg{}; \ - errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ - << status_msg << ")"; \ - throw std::runtime_error(errmsg.str()); \ - } \ - } - namespace client { namespace @@ -99,8 +85,6 @@ rocprofiler_context_id_t client_ctx = {}; void print_call_stack(const call_stack_t& _call_stack) { - namespace fs = ::std::filesystem; - auto ofname = std::string{"api_callback_trace.log"}; if(auto* eofname = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE")) ofname = eofname; @@ -131,8 +115,8 @@ print_call_stack(const call_stack_t& _call_stack) for(const auto& itr : _call_stack) { *ofs << std::left << std::setw(2) << ++n << "/" << std::setw(2) << _call_stack.size() - << " [" << fs::path{itr.file}.filename() << ":" << itr.line << "] " << std::setw(20) - << itr.function; + << " [" << common::fs::path{itr.file}.filename() << ":" << itr.line << "] " + << std::setw(20) << itr.function; if(!itr.context.empty()) *ofs << " :: " << itr.context; *ofs << "\n"; } diff --git a/samples/code_object_tracing/CMakeLists.txt b/samples/code_object_tracing/CMakeLists.txt index 36ac56567e..a141443def 100644 --- a/samples/code_object_tracing/CMakeLists.txt +++ b/samples/code_object_tracing/CMakeLists.txt @@ -33,8 +33,8 @@ add_library(code-object-tracing-client SHARED) target_sources(code-object-tracing-client PRIVATE client.cpp) target_link_libraries( code-object-tracing-client - PRIVATE rocprofiler::rocprofiler - $) + PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) find_package(Threads REQUIRED) @@ -43,7 +43,7 @@ add_executable(code-object-tracing) target_sources(code-object-tracing PRIVATE main.cpp) target_link_libraries( code-object-tracing PRIVATE code-object-tracing-client Threads::Threads - $) + rocprofiler::samples-build-flags) add_test(NAME code-object-tracing COMMAND $) diff --git a/samples/code_object_tracing/client.cpp b/samples/code_object_tracing/client.cpp index 8419cfb90a..ea997911c1 100644 --- a/samples/code_object_tracing/client.cpp +++ b/samples/code_object_tracing/client.cpp @@ -37,6 +37,9 @@ #include #include +#include "common/defines.hpp" +#include "common/filesystem.hpp" + #include #include #include @@ -45,9 +48,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -57,22 +60,6 @@ #include #include -#define ROCPROFILER_CALL(result, msg) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ - { \ - std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ - std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ - << " failed with error code " << CHECKSTATUS << ": " << status_msg \ - << std::endl; \ - std::stringstream errmsg{}; \ - errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ - << status_msg << ")"; \ - throw std::runtime_error(errmsg.str()); \ - } \ - } - namespace client { namespace @@ -172,8 +159,6 @@ cxa_demangle(std::string_view _mangled_name, int* _status) void print_call_stack(const call_stack_t& _call_stack) { - namespace fs = ::std::filesystem; - auto ofname = std::string{"code_object_trace.log"}; if(auto* eofname = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE")) ofname = eofname; @@ -203,8 +188,8 @@ print_call_stack(const call_stack_t& _call_stack) for(const auto& itr : _call_stack) { *ofs << std::left << std::setw(2) << ++n << "/" << std::setw(2) << _call_stack.size() - << " [" << fs::path{itr.file}.filename() << ":" << itr.line << "] " << std::setw(20) - << itr.function; + << " [" << common::fs::path{itr.file}.filename() << ":" << itr.line << "] " + << std::setw(20) << itr.function; if(!itr.context.empty()) *ofs << " :: " << itr.context; *ofs << "\n"; } diff --git a/samples/common/CMakeLists.txt b/samples/common/CMakeLists.txt new file mode 100644 index 0000000000..8369b6daab --- /dev/null +++ b/samples/common/CMakeLists.txt @@ -0,0 +1,37 @@ +# +# common utilities for samples +# + +# build flags +add_library(rocprofiler-samples-build-flags INTERFACE) +add_library(rocprofiler::samples-build-flags ALIAS rocprofiler-samples-build-flags) +target_compile_options(rocprofiler-samples-build-flags INTERFACE -W -Wall -Wextra + -Wshadow) +target_compile_features(rocprofiler-samples-build-flags INTERFACE cxx_std_17) + +if(ROCPROFILER_BUILD_CI OR ROCPROFILER_BUILD_WERROR) + target_compile_options(rocprofiler-samples-build-flags INTERFACE -Werror) +endif() + +# common utilities +cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH COMMON_LIBRARY_INCLUDE_DIR) + +add_library(rocprofiler-samples-common-library INTERFACE) +add_library(rocprofiler::samples-common-library ALIAS rocprofiler-samples-common-library) +target_link_libraries(rocprofiler-samples-common-library + INTERFACE rocprofiler::samples-build-flags) +target_compile_features(rocprofiler-samples-common-library INTERFACE cxx_std_17) +target_include_directories(rocprofiler-samples-common-library + INTERFACE ${COMMON_LIBRARY_INCLUDE_DIR}) + +set(EXTERNAL_SUBMODULE_DIR "${PROJECT_SOURCE_DIR}/../external") +cmake_path(ABSOLUTE_PATH EXTERNAL_SUBMODULE_DIR NORMALIZE) + +if(EXISTS ${EXTERNAL_SUBMODULE_DIR}/filesystem/include/ghc/filesystem.hpp) + target_compile_definitions( + rocprofiler-samples-common-library + INTERFACE $) + target_include_directories( + rocprofiler-samples-common-library SYSTEM + INTERFACE $) +endif() diff --git a/samples/common/defines.hpp b/samples/common/defines.hpp new file mode 100644 index 0000000000..cb647e8238 --- /dev/null +++ b/samples/common/defines.hpp @@ -0,0 +1,39 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ + << " failed with error code " << CHECKSTATUS << ": " << status_msg \ + << std::endl; \ + std::stringstream errmsg{}; \ + errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ + << status_msg << ")"; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } diff --git a/samples/common/filesystem.hpp b/samples/common/filesystem.hpp new file mode 100644 index 0000000000..4d6048dea5 --- /dev/null +++ b/samples/common/filesystem.hpp @@ -0,0 +1,77 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#if !defined(ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM 1 +# else +# define ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM 0 +# endif +# else +# define ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM 0 +# endif +#endif + +#if ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM == 0 +# if defined __has_include +# if __has_include() +# include +# endif +# endif + +# if defined(__cpp_lib_filesystem) +# define ROCPROFILER_SAMPLES_HAS_CPP_LIB_FILESYSTEM 1 +# else +# if defined __has_include +# if __has_include() +# define ROCPROFILER_SAMPLES_HAS_CPP_LIB_FILESYSTEM 1 +# endif +# endif +# endif +#endif + +// include the correct filesystem header +#if defined(ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM) && \ + ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM > 0 +# include +#elif defined(ROCPROFILER_SAMPLES_HAS_CPP_LIB_FILESYSTEM) && \ + ROCPROFILER_SAMPLES_HAS_CPP_LIB_FILESYSTEM > 0 +# include +#else +# include +#endif + +namespace common +{ +#if defined(ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM) && \ + ROCPROFILER_SAMPLES_HAS_GHC_LIB_FILESYSTEM > 0 +namespace fs = ::ghc::filesystem; // NOLINT +#elif defined(ROCPROFILER_SAMPLES_HAS_CPP_LIB_FILESYSTEM) && \ + ROCPROFILER_SAMPLES_HAS_CPP_LIB_FILESYSTEM > 0 +namespace fs = ::std::filesystem; // NOLINT +#else +namespace fs = ::std::experimental::filesystem; // NOLINT +#endif +} // namespace common diff --git a/samples/counter_collection/CMakeLists.txt b/samples/counter_collection/CMakeLists.txt index 639aafa26d..d02899422a 100644 --- a/samples/counter_collection/CMakeLists.txt +++ b/samples/counter_collection/CMakeLists.txt @@ -31,7 +31,11 @@ endif() add_library(counter-collection-buffer-client SHARED) target_sources(counter-collection-buffer-client PRIVATE client.cpp client.hpp) -target_link_libraries(counter-collection-buffer-client PRIVATE rocprofiler::rocprofiler) +target_link_libraries( + counter-collection-buffer-client + PUBLIC rocprofiler::samples-build-flags + PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library) + set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) add_executable(counter-collection-buffer) target_sources(counter-collection-buffer PRIVATE main.cpp) @@ -55,8 +59,11 @@ set_tests_properties( add_library(counter-collection-functional-counter-client SHARED) target_sources(counter-collection-functional-counter-client PRIVATE print_functional_counters.cpp client.hpp) -target_link_libraries(counter-collection-functional-counter-client - PRIVATE rocprofiler::rocprofiler) +target_link_libraries( + counter-collection-functional-counter-client + PUBLIC rocprofiler::samples-build-flags + PRIVATE rocprofiler::rocprofiler rocprofiler::samples-common-library) + add_executable(counter-collection-print-functional-counters) target_sources(counter-collection-print-functional-counters PRIVATE main.cpp) target_link_libraries( diff --git a/samples/counter_collection/client.cpp b/samples/counter_collection/client.cpp index 4fd71ad803..03ceaaac02 100644 --- a/samples/counter_collection/client.cpp +++ b/samples/counter_collection/client.cpp @@ -78,9 +78,9 @@ get_output_stream() static std::ostream* isTerm = []() -> std::ostream* { if(auto* outfile = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE")) { - if(outfile == "stdout") + if(std::string_view{outfile} == "stdout") return static_cast(&std::cout); - else if(outfile == "stderr") + else if(std::string_view{outfile} == "stderr") return &std::cerr; } return nullptr; @@ -125,12 +125,12 @@ buffered_callback(rocprofiler_context_id_t, } void -dispatch_callback(rocprofiler_queue_id_t queue_id, - const rocprofiler_agent_t* agent, - rocprofiler_correlation_id_t correlation_id, - const hsa_kernel_dispatch_packet_t* dispatch_packet, - void* callback_data_args, - rocprofiler_profile_config_id_t* config) +dispatch_callback(rocprofiler_queue_id_t /*queue_id*/, + const rocprofiler_agent_t* agent, + rocprofiler_correlation_id_t /*correlation_id*/, + const hsa_kernel_dispatch_packet_t* /*dispatch_packet*/, + void* /*callback_data_args*/, + rocprofiler_profile_config_id_t* config) { /** * This simple example uses the same profile counter set for all agents. diff --git a/samples/counter_collection/print_functional_counters.cpp b/samples/counter_collection/print_functional_counters.cpp index 6ff9d243a3..9287a7fd8e 100644 --- a/samples/counter_collection/print_functional_counters.cpp +++ b/samples/counter_collection/print_functional_counters.cpp @@ -108,12 +108,12 @@ buffered_callback(rocprofiler_context_id_t, } void -dispatch_callback(rocprofiler_queue_id_t queue_id, - const rocprofiler_agent_t* agent, - rocprofiler_correlation_id_t correlation_id, - const hsa_kernel_dispatch_packet_t* dispatch_packet, - void* callback_data_args, - rocprofiler_profile_config_id_t* config) +dispatch_callback(rocprofiler_queue_id_t /*queue_id*/, + const rocprofiler_agent_t* agent, + rocprofiler_correlation_id_t /*correlation_id*/, + const hsa_kernel_dispatch_packet_t* /*dispatch_packet*/, + void* /*callback_data_args*/, + rocprofiler_profile_config_id_t* config) { auto& cap = *get_capture(); auto wlock = std::unique_lock{cap.m_mutex}; @@ -205,9 +205,8 @@ tool_fini(void*) std::clog << "In tool fini\n"; - auto cap_ptr = get_capture(); - auto& cap = *get_capture(); - auto wlock = std::unique_lock{cap.m_mutex}; + auto& cap = *get_capture(); + auto wlock = std::unique_lock{cap.m_mutex}; if(cap.captured.size() != cap.expected.size()) { diff --git a/samples/intercept_table/CMakeLists.txt b/samples/intercept_table/CMakeLists.txt index fdbdd6b259..18cda6fc41 100644 --- a/samples/intercept_table/CMakeLists.txt +++ b/samples/intercept_table/CMakeLists.txt @@ -33,17 +33,16 @@ add_library(intercept-table-client SHARED) target_sources(intercept-table-client PRIVATE client.cpp client.hpp) target_link_libraries( intercept-table-client - PRIVATE rocprofiler::rocprofiler - $) + PRIVATE rocprofiler::rocprofiler rocprofiler::samples-build-flags + rocprofiler::samples-common-library) set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) find_package(Threads REQUIRED) add_executable(intercept-table) target_sources(intercept-table PRIVATE main.cpp) -target_link_libraries( - intercept-table PRIVATE intercept-table-client Threads::Threads - $) +target_link_libraries(intercept-table PRIVATE intercept-table-client Threads::Threads + rocprofiler::samples-build-flags) add_test(NAME intercept-table COMMAND $) diff --git a/samples/intercept_table/client.cpp b/samples/intercept_table/client.cpp index 09b2bd4dcf..3e8bf6e662 100644 --- a/samples/intercept_table/client.cpp +++ b/samples/intercept_table/client.cpp @@ -36,15 +36,18 @@ #include #include +#include "common/defines.hpp" +#include "common/filesystem.hpp" + #include #include #include #include #include #include -#include #include #include +#include #include #include #include @@ -52,22 +55,8 @@ #include #include #include - -#define ROCPROFILER_CALL(result, msg) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ - { \ - std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ - std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ - << " failed with error code " << CHECKSTATUS << ": " << status_msg \ - << std::endl; \ - std::stringstream errmsg{}; \ - errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ - << status_msg << ")"; \ - throw std::runtime_error(errmsg.str()); \ - } \ - } +#include "common/defines.hpp" +#include "common/filesystem.hpp" namespace client { @@ -93,8 +82,6 @@ std::map client_wrap_data = {}; void print_call_stack(const call_stack_t& _call_stack) { - namespace fs = ::std::filesystem; - auto ofname = std::string{"intercept_table.log"}; if(auto* eofname = getenv("ROCPROFILER_SAMPLE_OUTPUT_FILE")) ofname = eofname; @@ -125,8 +112,8 @@ print_call_stack(const call_stack_t& _call_stack) for(const auto& itr : _call_stack) { *ofs << std::left << std::setw(2) << ++n << "/" << std::setw(2) << _call_stack.size() - << " [" << fs::path{itr.file}.filename() << ":" << itr.line << "] " << std::setw(20) - << itr.function; + << " [" << common::fs::path{itr.file}.filename() << ":" << itr.line << "] " + << std::setw(20) << itr.function; if(!itr.context.empty()) *ofs << " :: " << itr.context; *ofs << "\n"; } diff --git a/samples/pc_sampling/CMakeLists.txt b/samples/pc_sampling/CMakeLists.txt deleted file mode 100644 index 0ce25c5a56..0000000000 --- a/samples/pc_sampling/CMakeLists.txt +++ /dev/null @@ -1,34 +0,0 @@ -# -# -# -cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR) - -project(rocprofiler-samples-pc-sampling LANGUAGES C CXX) - -if(NOT TARGET rocprofiler::rocprofiler) - find_package(rocprofiler REQUIRED) -endif() - -add_executable(pc_sampling_single-user-host-trap) -target_sources(pc_sampling_single-user-host-trap PRIVATE common.h - single-user-host-trap.cpp) -target_link_libraries( - pc_sampling_single-user-host-trap - PRIVATE rocprofiler::rocprofiler - $) - -add_executable(pc_sampling_single-user-host-trap-retry) -target_sources(pc_sampling_single-user-host-trap-retry - PRIVATE common.h single-user-host-trap-retries-service-instantiation.cpp) -target_link_libraries( - pc_sampling_single-user-host-trap-retry - PRIVATE rocprofiler::rocprofiler - $) - -add_executable(pc_sampling_single-user-multiple-agents) -target_sources(pc_sampling_single-user-multiple-agents - PRIVATE common.h single-user-multiple-agents.cpp) -target_link_libraries( - pc_sampling_single-user-multiple-agents - PRIVATE rocprofiler::rocprofiler - $) diff --git a/samples/pc_sampling/common.h b/samples/pc_sampling/common.h deleted file mode 100644 index 9074088f35..0000000000 --- a/samples/pc_sampling/common.h +++ /dev/null @@ -1,147 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include - -constexpr size_t BUFFER_SIZE_BYTES = 4096; -constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); -const std::string_view MI200_NAME = "gfx90a"; - -#define ROCPROFILER_CALL(result, msg) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ - { \ - puts(#result " failed"); \ - } \ - } - -// We might want to test the calls that fails -// e.g. calling `rocprofiler_configure_pc_sampling_service ` -// after previous initialization. -#define ROCPROFILER_CALL_FAILS(result, msg) \ - { \ - rocprofiler_status_t CHECKSTATUS = result; \ - if(CHECKSTATUS == ROCPROFILER_STATUS_SUCCESS) \ - { \ - puts(#result " succeeded"); \ - } \ - } - -static rocprofiler_status_t -find_first_gpu_agent_impl(const rocprofiler_agent_t** agents, size_t num_agents, void* data) -{ - // data is required - if(!data) return ROCPROFILER_STATUS_ERROR; - - auto* _out_agent = static_cast(data); - // find the first GPU agent - for(size_t i = 0; i < num_agents; i++) - { - if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) - { - *_out_agent = *agents[i]; - printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", - __FUNCTION__, - _out_agent->name, - _out_agent->node_id, - _out_agent->type, - _out_agent->num_pc_sampling_configs); - return ROCPROFILER_STATUS_SUCCESS; - } - else - { - printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", - __FUNCTION__, - agents[i]->name, - agents[i]->node_id, - agents[i]->type, - agents[i]->num_pc_sampling_configs); - } - } - return ROCPROFILER_STATUS_ERROR; -} - -static std::optional -find_first_gpu_agent() -{ - // This function returns the first gpu agent it encounters. - // TODO: write the better function querying information about the agent, - // and return if the agent is MI200. - rocprofiler_agent_t gpu_agent; - - auto status = rocprofiler_query_available_agents( - &find_first_gpu_agent_impl, sizeof(rocprofiler_agent_t), static_cast(&gpu_agent)); - - if(status != ROCPROFILER_STATUS_SUCCESS) return std::nullopt; - - return gpu_agent; -} - -static void -rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/, - rocprofiler_buffer_id_t /*buffer_id*/, - rocprofiler_record_header_t** headers, - size_t num_headers, - void* /*data*/, - uint64_t drop_count) -{ - // Vladimir: I am not sure if this is the right way of iterating over PC sampling records. - printf( - "The number of delivered samples is: %zu, while the number of dropped samples is: %lu.\n", - num_headers, - drop_count); - - for(size_t i = 0; i < num_headers; i++) - { - auto* cur_header = headers[i]; - if(cur_header->category == ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING) - { - auto* pc_sample = static_cast(cur_header->payload); - printf("--- pc: %lx, dispatch_id: %lx, timestamp: %lu, hardware_id: %lu\n", - pc_sample->pc, - pc_sample->dispatch_id, - pc_sample->timestamp, - pc_sample->hardware_id); - // Vladimir: How to parse the remaining part of the `rocprofiler_pc_sampling_record_t` - // struct? - } - } - // Vladimr: We might want to add somewhere in the documentation that headars actually contain PC - // samples. -} - -static void -run_HIP_app() -{ - // TODO: provide the simple HIP app -} diff --git a/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp b/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp deleted file mode 100644 index 5246c184f9..0000000000 --- a/samples/pc_sampling/single-user-host-trap-retries-service-instantiation.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// Vladimir: The example of using Host-trap PC sampling on a system with single MI200/300 by two -// users. The first user initiates Host-Trap sampling with the configuration A. The second user -// tries initiaiting stochastic sampling with configuration B and fails. Then it queries available -// configurations and observes only the configuration A. It accepts it and starts PC sampling. -// Vladimir: Currently, this example is written as a single-threaded program. -// Decide whether to move the second user to a separate thread or process - -#include - -#include "common.h" - -#include -#include -#include -#include - -#define HOST_TRAP_INTERVAL 1000 - -rocprofiler_pc_sampling_method_t host_trap_sampling_method; -rocprofiler_pc_sampling_unit_t host_trap_sampling_unit_time; -uint64_t host_trap_interval; - -void -second_user() -{ - // creating a context - rocprofiler_context_id_t context_id2; - ROCPROFILER_CALL(rocprofiler_create_context(&context_id2), - "Cannot create context for the second user\n"); - - auto gpu_agent = find_first_gpu_agent(); - if(!gpu_agent) throw std::runtime_error{"no gpu agents were found"}; - - // creating a buffer that will hold pc sampling information - rocprofiler_buffer_policy_t lossless_buffer_action = ROCPROFILER_BUFFER_POLICY_LOSSLESS; - rocprofiler_buffer_id_t buffer_id2; - ROCPROFILER_CALL(rocprofiler_create_buffer(context_id2, - BUFFER_SIZE_BYTES, - WATERMARK, - lossless_buffer_action, - rocprofiler_pc_sampling_callback, - nullptr, - &buffer_id2), - "Cannot create pc sampling buffer for the second user"); - - // The second user tries to create another pc sampling service with different configuration, - // but the rocprofiler rejects it. - rocprofiler_pc_sampling_method_t sampling_method2 = ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC; - rocprofiler_pc_sampling_unit_t sampling_unit2 = ROCPROFILER_PC_SAMPLING_UNIT_CYCLES; - uint64_t interval2 = 2048; // I assumed micro secs, so this should be 1ms - // The following function returns an error code indicating the PC sampling has already been - // configured. - ROCPROFILER_CALL_FAILS( - rocprofiler_configure_pc_sampling_service( - context_id2, *gpu_agent, sampling_method2, sampling_unit2, interval2, buffer_id2), - "Instantiation of the PC sampling service should fail"); - - // After failure, the second user queries available configuration and observes the one chosen by - // the first user. - auto config_count = gpu_agent->num_pc_sampling_configs; - auto configs = std::vector{}; - configs.reserve(config_count); - for(size_t i = 0; i < config_count; ++i) - configs.emplace_back(gpu_agent->pc_sampling_configs[i]); - - // Only one configuration should be listed, and its parameters should match the parameters set - // by the first user. Vladimir: Is it ok to use assertions? In the release mode, they might be - // ignored. - assert(config_count == 1); - rocprofiler_pc_sampling_configuration_t first_user_config = configs[0]; - assert(first_user_config.method == host_trap_sampling_method); - assert(first_user_config.unit == host_trap_sampling_unit_time); - // Vladimir: Should the min_interval and max_interval have the same value at this point (the PC - // sampling is alredy configured)?? - assert(first_user_config.min_interval == host_trap_interval && - first_user_config.min_interval == first_user_config.max_interval); - - // Reuse the same configuration set by the first user. - // The second user is satisfied with the configuration chosen by the first user, so it - // starts PC sampling. - ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id2, - *gpu_agent, - first_user_config.method, - first_user_config.unit, - first_user_config.min_interval, - buffer_id2), - "The second user cannot share already created PC sampling configuration"); - - // Starting the context that should trigger PC sampling? - ROCPROFILER_CALL(rocprofiler_start_context(context_id2), - "Cannot start PC sampling context for the second user"); - - // Running the applicaiton - run_HIP_app(); - - // Stop the context that should stop PC sampling? - ROCPROFILER_CALL(rocprofiler_stop_context(context_id2), - "Cannot start PC sampling context for the second user"); - - // Explicit buffer flush, before destroying it - ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_id2), - "Cannot destroy the second user's buffer"); - // Destroying the buffer - ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_id2), "Cannot destroy the second user's"); -} - -int -main(int /*argc*/, char** /*argv*/) -{ - // creating a context - rocprofiler_context_id_t context_id; - ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n"); - - auto gpu_agent = find_first_gpu_agent(); - if(!gpu_agent) - { - fprintf(stderr, "no gpu agents were found\n"); - return EXIT_FAILURE; - } - - // creating a buffer that will hold pc sampling information - rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD; - rocprofiler_buffer_id_t buffer_id; - ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, - BUFFER_SIZE_BYTES, - WATERMARK, - drop_buffer_action, - rocprofiler_pc_sampling_callback, - nullptr, - &buffer_id), - "Cannot create pc sampling buffer"); - - // PC sampling service configuration - host_trap_sampling_method = ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP; - host_trap_sampling_unit_time = ROCPROFILER_PC_SAMPLING_UNIT_TIME; - // Vladimir: What units are we using for time? ms, micro secs, ns? - host_trap_interval = HOST_TRAP_INTERVAL; - // Instantiating the first PC sampling service succeeds. - ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id, - *gpu_agent, - host_trap_sampling_method, - host_trap_sampling_unit_time, - host_trap_interval, - buffer_id), - "Cannot create PC sampling service"); - - // Trigger the second user code. - // Vladimir: Discuss whether this should be put in a separate thread/process. - second_user(); - - // Starting the context that should trigger PC sampling? - ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context"); - - // Running the applicaiton - run_HIP_app(); - - // Stop the context that should stop PC sampling? - ROCPROFILER_CALL(rocprofiler_stop_context(context_id), "Cannot start PC sampling context"); - - // Explicit buffer flush, before destroying it - ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_id), "Cannot destroy buffer"); - // Destroying the buffer - ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_id), "Cannot destroy buffer"); - - // Vladimir: Do we need to destroy context or a service? - - return 0; -} diff --git a/samples/pc_sampling/single-user-host-trap.cpp b/samples/pc_sampling/single-user-host-trap.cpp deleted file mode 100644 index 8aebfa248b..0000000000 --- a/samples/pc_sampling/single-user-host-trap.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// Vladimir: The example of using Host-trap PC sampling exclusively on the system with single MI200. -// If any of the rocprofiler calls returns status fail, we simply stop the application. - -#include -#include -#include "common.h" - -int -main(int /*argc*/, char** /*argv*/) -{ - // creating a context - rocprofiler_context_id_t context_id; - ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n"); - - auto gpu_agent = find_first_gpu_agent(); - if(!gpu_agent) - { - fprintf(stderr, "no gpu agents were found\n"); - return EXIT_FAILURE; - } - - // creating a buffer that will hold pc sampling information - rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD; - rocprofiler_buffer_id_t buffer_id; - ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, - BUFFER_SIZE_BYTES, - WATERMARK, - drop_buffer_action, - rocprofiler_pc_sampling_callback, - nullptr, - &buffer_id), - "Cannot create pc sampling buffer"); - - // PC sampling service configuration - rocprofiler_pc_sampling_method_t sampling_method = ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP; - rocprofiler_pc_sampling_unit_t sampling_unit = ROCPROFILER_PC_SAMPLING_UNIT_TIME; - // What units are we using for time? ms, micro secs, ns? - uint64_t interval = 1000; // I assumed micro secs, so this should be 1ms - // Instantiating the PC sampling service - ROCPROFILER_CALL( - rocprofiler_configure_pc_sampling_service( - context_id, *gpu_agent, sampling_method, sampling_unit, interval, buffer_id), - "Cannot create PC sampling service"); - - // Vladimir: Is this the place of retrying if someone already created the - // configuration and the previous call fails? - - // Starting the context that should trigger PC sampling? - ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context"); - - // Running the applicaiton - run_HIP_app(); - - // Stop the context that should stop PC sampling? - ROCPROFILER_CALL(rocprofiler_stop_context(context_id), "Cannot start PC sampling context"); - - // Explicit buffer flush, before destroying it - ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_id), "Cannot destroy buffer"); - // Destroying the buffer - ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_id), "Cannot destroy buffer"); - - // Vladimir: Do we need to destroy context or a service? - - return 0; -} diff --git a/samples/pc_sampling/single-user-multiple-agents.cpp b/samples/pc_sampling/single-user-multiple-agents.cpp deleted file mode 100644 index a64a0bf804..0000000000 --- a/samples/pc_sampling/single-user-multiple-agents.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// MIT License -// -// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -// Vladimir: The example that shows how a single user can use PC sampling -// on multiple GPU agents. - -#include -#include -#include -#include -#include -#include -#include -#include "common.h" - -namespace -{ -// GPU agents supporting some kind of PC sampling -std::vector gpu_agents; -std::vector contexts; -std::vector buffer_ids; - -rocprofiler_status_t -find_all_gpu_agents_supporting_pc_sampling_impl(const rocprofiler_agent_t** agents, - size_t num_agents, - void* data) -{ - // data is required - if(!data) return ROCPROFILER_STATUS_ERROR; - - auto* _out_agents = static_cast*>(data); - // find the first GPU agent - for(size_t i = 0; i < num_agents; i++) - { - if(agents[i]->type == ROCPROFILER_AGENT_TYPE_GPU) - { - // Skip GPU agents not supporting PC sampling - // Vladimir: The assumption is that if a GPU agent does not support PC sampling, - // the size is 0. - if(agents[i]->num_pc_sampling_configs == 0) continue; - - _out_agents->push_back(*agents[i]); - - printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", - __FUNCTION__, - agents[i]->name, - agents[i]->node_id, - agents[i]->type, - agents[i]->num_pc_sampling_configs); - return ROCPROFILER_STATUS_SUCCESS; - } - else - { - printf("[%s] %s :: id=%u, type=%i, num pc sample configs=%zu\n", - __FUNCTION__, - agents[i]->name, - agents[i]->node_id, - agents[i]->type, - agents[i]->num_pc_sampling_configs); - } - } - - return !_out_agents->empty() ? ROCPROFILER_STATUS_SUCCESS : ROCPROFILER_STATUS_ERROR; -} - -void -find_all_gpu_agents_supporting_pc_sampling() -{ - // This function returns the all gpu agents supporting some kind of PC sampling - ROCPROFILER_CALL( - rocprofiler_query_available_agents(&find_all_gpu_agents_supporting_pc_sampling_impl, - sizeof(rocprofiler_agent_t), - static_cast(&gpu_agents)), - "Failed to find GPU agents"); -} -} // namespace - -void -configure_host_trap_sampling(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_agent_t gpu_agent) -{ - // Vladimir: Does MI200 have only one configuration? - assert(gpu_agent.num_pc_sampling_configs == 1); - - // Extract the configuration - auto host_trap_config = gpu_agent.pc_sampling_configs[0]; - - // The mean of min_interval and max_interval - auto interval = (host_trap_config.min_interval + host_trap_config.max_interval) / 2; - - ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id, - gpu_agent, - host_trap_config.method, - host_trap_config.unit, - interval, - buffer_id), - "Cannot create host-trap PC sampling service"); -} - -rocprofiler_pc_sampling_configuration_t -extract_stochastic_config(const rocprofiler_pc_sampling_configuration_t* configs, - size_t num_configs) -{ - // Iterate over an array of configurations and return the first one - // with stochasting method. - for(size_t i = 0; i < num_configs; i++) - { - if(configs[i].method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) - { - return configs[i]; - } - } - printf("Improper use of the `extract_stochastic_config` function."); - exit(-1); -} - -void -configure_stochastic_sampling(rocprofiler_context_id_t context_id, - rocprofiler_buffer_id_t buffer_id, - rocprofiler_agent_t gpu_agent) -{ - // Find the configuration matching stochastic sampling in cycles - rocprofiler_pc_sampling_configuration_t stochastic_config = - extract_stochastic_config(gpu_agent.pc_sampling_configs, gpu_agent.num_pc_sampling_configs); - - // The mean of min_interval and max_interval - auto interval = (stochastic_config.min_interval + stochastic_config.max_interval) / 2; - - ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(context_id, - gpu_agent, - stochastic_config.method, - stochastic_config.unit, - interval, - buffer_id), - "Cannot create stochastic PC sampling service"); -} - -int -main(int /*argc*/, char** /*argv*/) -{ - if(!find_first_gpu_agent()) - { - fprintf(stderr, "no gpu agents were found\n"); - return EXIT_FAILURE; - } - - find_all_gpu_agents_supporting_pc_sampling(); - - if(gpu_agents.empty()) - { - printf("No availabe gpu agents\n"); - exit(-1); - } - - // Vladimir: The relations I assumed: - // - a context per gpu agent - // - a buffer per context - // - a pc sampling service per buffer - // How about the following: Single context with mulitple buffers and PC sampling services? - // When starting the context, does it start all PC sampling services at once? - - for(auto gpu_agent : gpu_agents) - { - // creating a context - rocprofiler_context_id_t context_id; - ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "Cannot create context\n"); - contexts.push_back(context_id); - - // creating a buffer that will hold pc sampling information - rocprofiler_buffer_policy_t drop_buffer_action = ROCPROFILER_BUFFER_POLICY_DISCARD; - rocprofiler_buffer_id_t buffer_id; - ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, - BUFFER_SIZE_BYTES, - WATERMARK, - drop_buffer_action, - rocprofiler_pc_sampling_callback, - nullptr, - &buffer_id), - "Cannot create pc sampling buffer"); - buffer_ids.push_back(buffer_id); - - if(gpu_agent.name == MI200_NAME) - configure_host_trap_sampling(context_id, buffer_id, gpu_agent); - else - configure_stochastic_sampling(context_id, buffer_id, gpu_agent); - - // Starting the context that should trigger PC sampling - ROCPROFILER_CALL(rocprofiler_start_context(context_id), "Cannot start PC sampling context"); - } - - // Running the applicaiton - run_HIP_app(); - - for(size_t i = 0; i < gpu_agents.size(); i++) - { - // Stop the context that should stop PC sampling? - ROCPROFILER_CALL(rocprofiler_stop_context(contexts[i]), "Cannot start PC sampling context"); - // Explicit buffer flush, before destroying it - ROCPROFILER_CALL(rocprofiler_flush_buffer(buffer_ids[i]), "Cannot destroy buffer"); - // Destroying the buffer - ROCPROFILER_CALL(rocprofiler_destroy_buffer(buffer_ids[i]), "Cannot destroy buffer"); - } - - return 0; -} diff --git a/source/bin/CMakeLists.txt b/source/bin/CMakeLists.txt index f3875c7ce0..07213e0ec8 100644 --- a/source/bin/CMakeLists.txt +++ b/source/bin/CMakeLists.txt @@ -3,3 +3,10 @@ # rocprofiler_activate_clang_tidy() + +configure_file(rocprofv2 ${PROJECT_BINARY_DIR} COPYONLY) +install( + FILES rocprofv2 + DESTINATION ${CMAKE_INSTALL_BINDIR} + PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + COMPONENT runtime) diff --git a/source/bin/rocprofv2 b/source/bin/rocprofv2 new file mode 100755 index 0000000000..73419d1a91 --- /dev/null +++ b/source/bin/rocprofv2 @@ -0,0 +1,83 @@ +#!/bin/bash -e + +set -eo pipefail + +CURRENT_DIR="$( dirname -- "$0"; )"; +ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]})); +ROCM_DIR=$( dirname -- "${ROCPROFV2_DIR}"; ) + +export HSA_TOOLS_LIB=${ROCM_DIR}/lib/librocprofiler64.so.2 + +# Define color code +GREEN='\033[0;32m' +GREY='\033[0;90m' +RESET='\033[0m' + +usage() { + echo -e "${RESET}ROCProfilerV2 Run Script Usage:" + echo -e "${GREEN}-h | --help ${RESET} For showing this message" + echo -e "${GREEN}--hsa-api ${RESET} For Collecting HSA API Traces" + echo -e "${GREEN}--hsa-activity | --hsa-trace ${RESET} For Collecting HSA API Activities Traces" + echo -e "${GREEN}--kernel-trace ${RESET} For Collecting Kernel Dispatch Traces" + echo -e "${GREEN}-o | --output-file ${RESET} For the output file name" + echo -e "\t#${GREY} usage e.g:(with current dir): rocprofv2 --hip-trace -o " + echo -e "\t#${GREY} usage e.g:(with custom dir): rocprofv2 --hip-trace -d -o ${RESET}\n" + echo -e "${GREEN}-d | --output-directory ${RESET} For adding output path where the output files will be saved" + echo -e "\t#${GREY} usage e.g:(with custom dir): rocprofv2 --hip-trace -d ${RESET}\n" + exit 1 +} + +if [ -z "$1" ]; then + usage + exit 1 +fi + +: ${ROCPROFILER_OUTPUT_PATH:="."} + +while [ 1 ]; do + if [[ "$1" == "-h" || "$1" == "--help" ]]; then + usage + exit 1 + elif [[ "$1" == "-o" || "$1" == "--output-file-name" ]]; then + if [ $2 ]; then + export ROCPROFILER_OUTPUT_FILE_NAME=$2 + else + usage + exit 1 + fi + shift + shift + elif [[ "$1" == "-d" || "$1" == "--output-directory" ]]; then + if [ $2 ]; then + ROCPROFILER_OUTPUT_PATH=$2 + else + usage + exit 1 + fi + shift + shift + elif [ "$1" == "--hsa-api" ]; then + export ROCPROFILER_HSA_API_TRACE=1 + shift + elif [[ "$1" == "--hsa-activity" || "$1" == "--hsa-trace" ]]; then + export ROCPROFILER_HSA_API_TRACE=1 + export ROCPROFILER_HSA_ACTIVITY_TRACE=1 + shift + elif [ "$1" == "--kernel-trace" ]; then + export ROCPROFILER_KERNEL_TRACE=1 + shift + elif [ "$1" == "--" ]; then + shift + break + elif [[ "$1" == "-"* || "$1" == "--"* ]]; then + echo -e "Wrong option \"$1\", Please use the following options:\n" + usage + exit 1 + else + break + fi +done + +export ROCPROFILER_OUTPUT_PATH + +ROCP_TOOL_LIBRARIES=${ROCM_DIR}/lib/rocprofiler/librocprofiler-tool.so $* diff --git a/source/docs/about.md b/source/docs/about.md index 6e617a783b..4fb8d2dc06 100644 --- a/source/docs/about.md +++ b/source/docs/about.md @@ -6,6 +6,29 @@ :maxdepth: 4 ``` -## Overview +## Important Changes -TODO: Overview of rocprofiler v2 +[Roctracer](https://github.com/ROCm-Developer-Tools/roctracer) and [rocprofiler (v1)](https://github.com/ROCm-Developer-Tools/rocprofiler) +have been combined into a single rocprofiler SDK and re-designed from scratch. The new rocprofiler API has been designed with some +new restrictions to avoid problems that plagued the former implementations. These restrictions enable more efficient implementations +and much better thread-safety. The most important restriction is the window for tools to inform rocprofiler about which services +the tool wishes to use (where "services" refers to the capabilities for API tracing, kernel tracing, etc.). + +In the former implementations, when one of the ROCm runtimes were initially loaded, a tool only had +to inform roctracer/rocprofiler that it wished to use its services at some point (e.g. calling `roctracer_init()`) +and were not required to specify which services it would eventually or potentially use. Thus, these libraries had to effectively prepare for +any service to be enable at any point in time -- which introduced unnecessary overhead when tools had no desire to use certain features and +made thread-safe data management difficult. For example, roctracer was required to _always_ install wrappers around _every_ runtime API function +and _always_ added extra overhead of indirection through the roctracer library and checks for the current service configuration (in a thread-safe manner). + +In the re-designed implementation, rocprofiler introduces the concept of a "context". Contexts are effectively +bundles of service configurations. Rocprofiler gives each tool _one_ opportunity to create as many contexts as necessary -- +for example, a tool can group all of the services into one context, create individual contexts for each service, or somewhere in between. +Due to this design choice change, rocprofiler now knows _exactly_ which services might be requested by the tool clients at any point in time. +This has several important implications: + +- rocprofiler does not have to unnecessarily prepare for services that are never used -- if no registered contexts requested tracing the HSA API, no wrappers need to be generated +- rocprofiler can perform more extensive checks during service specification and inform tools about potential issues very early on +- rocprofiler can allow multiple tools to use certain services simulatenously +- rocprofiler was able to improve thread-safety without introducing parallel bottlenecks +- rocprofiler can manage internal data and allocations more efficiently diff --git a/source/docs/buffered_tracing.md b/source/docs/buffered_tracing.md new file mode 100644 index 0000000000..576f55664a --- /dev/null +++ b/source/docs/buffered_tracing.md @@ -0,0 +1,14 @@ +# Buffered Tracing Services + + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +## Overview + +## HSA API Tracing + +## Kernel Tracing diff --git a/source/docs/callback_tracing.md b/source/docs/callback_tracing.md new file mode 100644 index 0000000000..dbdfe14453 --- /dev/null +++ b/source/docs/callback_tracing.md @@ -0,0 +1,13 @@ +# Callback Tracing Services + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +## Overview + +## Code Object Tracing + +## HSA API Tracing diff --git a/source/docs/features.md b/source/docs/features.md index 91547d5269..c4d6820399 100644 --- a/source/docs/features.md +++ b/source/docs/features.md @@ -8,4 +8,8 @@ ## Overview -TODO: Overview of rocprofiler v2 features +- Improved tool initialization +- Support for multiple tools using the same services simulatenously +- Simplified management of enabling/disabling one or more data collection services +- Improved error checking and logging +- Backwards ABI compatibility (goal) diff --git a/source/docs/index.md b/source/docs/index.md index 0961e13205..bd850c841b 100644 --- a/source/docs/index.md +++ b/source/docs/index.md @@ -9,5 +9,9 @@ about features installation + tool_library_overview + callback_tracing + buffered_tracing + intercept_table developer_api ``` diff --git a/source/docs/intercept_table.md b/source/docs/intercept_table.md new file mode 100644 index 0000000000..5cd904eaf0 --- /dev/null +++ b/source/docs/intercept_table.md @@ -0,0 +1,9 @@ +# Runtime Intercept Tables + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +Discussion on how access the raw runtime intercept tables of HSA and HIP (i.e. ExaTracer requirements by LTTng). diff --git a/source/docs/tool_library_overview.md b/source/docs/tool_library_overview.md new file mode 100644 index 0000000000..7079c05fd3 --- /dev/null +++ b/source/docs/tool_library_overview.md @@ -0,0 +1,254 @@ +# Building Tool Library + +```eval_rst +.. toctree:: + :glob: + :maxdepth: 4 +``` + +## Rocprofiler and ROCm Runtimes Design + +The ROCm runtimes are now designed to directly communicate with a new library called rocprofiler-register during their initialization. This library does cursory checks +for whether any tools have indicated they want rocprofiler support via detection of one or more instances of a symbol named `rocprofiler_configure` (which is provided by +the tool libraries) and/or the `ROCP_TOOL_LIBRARIES` environment variable. This design dramatically improves upon previous designs which relied solely on +a tool racing to set runtime-specific environment variables (e.g. `HSA_TOOLS_LIB`) before the runtime initialization. + +## Tool Library Design + +When a tool has `rocprofiler_configure` visible in its symbol table, rocprofiler will invoke this function and provide information regarding +the version of rocprofiler which invoking the function, how many tools have already been invoked, and a unique idenitifier for the tool. The tool +returns a pointer to a `rocprofiler_tool_configure_result_t` struct, which, if non-null, can provide rocprofiler with the function it should +call for tool initialization (i.e. the opportunity for context creation), a function is should call when rocprofiler is finalized, and a pointer +to any data that rocprofiler should provide back to the tool when it calls the initialization and finalization functions. + +Rocprofiler provides a `rocprofiler/registration.h` header file which forward declares the `rocprofiler_configure` function with the necessary +compiler function attributes to ensure that the symbol is publicly visible. + +```cpp +#include + +namespace +{ +// saves the data provided to rocprofiler_configure +struct ToolData +{ + uint32_t version; + const char* runtime_version; + uint32_t priority; + rocprofiler_client_id_t client_id; +}; + +// tool initialization function +int +tool_init(rocprofiler_client_finalize_t fini_func, + void* tool_data_v); + +// tool finalization function +void +tool_fini(void* tool_data_v); +} + +extern "C" +{ +rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* client_id) +{ + // if not first tool to register, indicate tool doesn't want to do anything + if(priority > 0) return nullptr; + + // (optional) provide a name for this tool to rocprofiler + client_id->name = "ExampleTool"; + + // (optional) create configure data + static auto data = ToolData{ version, + runtime_version, + priority, + client_id }; + + // construct configure result + static auto cfg = + rocprofiler_tool_configure_result_t{ sizeof(rocprofiler_tool_configure_result_t), + &tool_init, + &tool_fini, + static_cast(&data) }; + + return &cfg; +} +``` + +## Tool Initialization + +> ***NOTE: rocprofiler does NOT support calls to any of the runtime functions (HSA, HIP, etc.) during tool initialization.*** +> ***Invoking any functions from the runtimes will result in a deadlock.*** + +For each tool which contains a `rocprofiler_configure` function and returns a non-null pointer to a `rocprofiler_tool_configure_result_t` struct, +rocprofiler will invoke the `initialize` callback after completing the scan for all `rocprofiler_configure` symbols. In other words, rocprofiler +collects all of the `rocprofiler_tool_configure_result_t` instances before invoking the `initialize` member of any of these instances. +When rocprofiler invokes this function in a tool, this is the opportunity to create contexts: + +```cpp +#include + +namespace +{ +int +tool_init(rocprofiler_client_finalize_t fini_func, + void* data_v) +{ + // create a context + auto ctx = rocprofiler_context_id_t{}; + rocprofiler_create_context(&ctx); + + // ... associate services with context ... + + // start the context (optional) + rocprofiler_start_context(ctx); + + return 0; +} +} +``` + +Although not strictly necessary, it is recommended that tools store the context handle(s) to control the data collection of the services associated with the context. + +## Tool Finalization + +In the invocation of the user-provided `initialize` callback, rocprofiler will provide a function pointer of type `rocprofiler_client_finalize_t`. +This function pointer can be invoked by the tool to explicitly invoke the `finalize` callback from the `rocprofiler_tool_configure_result_t` instance: + +```cpp +#include + +namespace +{ +int +tool_init(rocprofiler_client_finalize_t fini_func, + void* data_v) +{ + // ... see initialization section ... + + // function which finalizes tool after 10 seconds + auto explicit_finalize = [](rocprofiler_client_finalize_t finalizer, + rocprofiler_client_id_t* client_id) + { + std::this_thread::sleep_for(std::chrono::seconds{ 10 }); + finalizer(client_id); + }; + + // start the context + rocprofiler_start_context(ctx); + + // dispatch a background thread to explicitly finalize after 10 seconds + std::thread{ explicit_finalize, fini_func, static_cast(data_v)->client_id }.detach(); + + return 0; +} +} +``` + +Otherwise, rocprofiler will invoke the `finalize` callback via an `atexit` handler. + +## Agent Information + +## Contexts + +## Configuring Services + +## Synchronous Callbacks + +## Asychronous Callbacks for Buffers + +## Recommendations + +## Full `rocprofiler_configure` Sample + +All of the snippets from the previous sections have been combined here for convenience. + +```cpp +#include + +namespace +{ +struct rocp_tool_data +{ + uint32_t version; + const char* runtime_version; + uint32_t priority; + rocprofiler_client_id_t client_id; + rocprofiler_client_finalize_t finalizer; + std::vector contexts; +}; + +void +tool_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* callback_data); + +int +tool_init(rocprofiler_client_finalize_t fini_func, + void* tool_data_v) +{ + rocp_tool_data* tool_data = static_cast(tool_data_v); + + // save the finalizer function + tool_data->finalizer = fini_func; + + // create a context + auto ctx = rocprofiler_context_id_t{}; + rocprofiler_create_context(&ctx); + + // save your contexts + tool_data->contexts.emplace_back(ctx); + + // associate code object tracing with this context + rocprofiler_configure_callback_tracing_service( + ctx, + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, + nullptr, + 0, + tool_tracing_callback, + tool_data); + + // ... associate services with contexts ... + + return 0; +} + +void +tool_fini(void* tool_data); +} + +extern "C" +{ +rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t version, + const char* runtime_version, + uint32_t priority, + rocprofiler_client_id_t* client_id) +{ + // if not first tool to register, indicate tool doesn't want to do anything + if(priority > 0) return nullptr; + + // (optional) provide a name for this tool to rocprofiler + client_id->name = "ExampleTool"; + + // info provided back to tool_init and tool_fini + auto* my_tool_data = new rocp_tool_data{ version, + runtime_version, + priority, + client_id, + nullptr }; + + // create configure data + static auto cfg = + rocprofiler_tool_configure_result_t{ sizeof(rocprofiler_tool_configure_result_t), + &tool_init, + &tool_fini, + my_tool_data }; + + return &cfg; +} +``` diff --git a/source/include/rocprofiler/buffer_tracing.h b/source/include/rocprofiler/buffer_tracing.h index 0d67aefc42..afeb3218be 100644 --- a/source/include/rocprofiler/buffer_tracing.h +++ b/source/include/rocprofiler/buffer_tracing.h @@ -37,6 +37,7 @@ ROCPROFILER_EXTERN_C_INIT /** * @brief ROCProfiler Buffer HSA API Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -51,6 +52,7 @@ typedef struct /** * @brief ROCProfiler Buffer HIP API Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -65,6 +67,7 @@ typedef struct /** * @brief ROCProfiler Buffer Marker Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -80,6 +83,7 @@ typedef struct /** * @brief ROCProfiler Buffer Memory Copy Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -95,6 +99,7 @@ typedef struct /** * @brief ROCProfiler Buffer Kernel Dispatch Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -114,6 +119,7 @@ typedef struct /** * @brief ROCProfiler Buffer Page Migration Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -128,6 +134,7 @@ typedef struct /** * @brief ROCProfiler Buffer Scratch Memory Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -142,6 +149,7 @@ typedef struct /** * @brief ROCProfiler Buffer Queue Scheduling Tracer Record. + * TODO(aelwazir): Add comments against every variable */ typedef struct { @@ -154,6 +162,7 @@ typedef struct // Not Sure What is the info needed here? } rocprofiler_buffer_tracing_queue_scheduling_record_t; +// TODO(aelwazir): Review with Jonathan if Code Object is in the buffer tracing /** * @brief ROCProfiler Code Object Tracer Buffer Record. * @@ -303,6 +312,8 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ * entire duration of the program. It is recommended to call this function once and cache this data * in the client instead of making multiple on-demand calls. * + * TODO(aelwazir): to be modified + * * @param [in] callback Callback function invoked for each enumeration value in @ref * rocprofiler_buffer_tracing_kind_t with the exception of the `NONE` and `LAST` values. * @param [in] data User data passed back into the callback diff --git a/source/include/rocprofiler/counters.h b/source/include/rocprofiler/counters.h index bd7481cffa..653aeccf58 100644 --- a/source/include/rocprofiler/counters.h +++ b/source/include/rocprofiler/counters.h @@ -52,6 +52,8 @@ rocprofiler_query_record_counter_id(rocprofiler_counter_instance_id_t id, * @param [in] dim dimension for which positional info is requested * @param [out] pos value of the dimension in id. * @return ::rocprofiler_status_t + * + * TODO(aelwazir): rocprofiler_status_t types that can be returned */ rocprofiler_status_t ROCPROFILER_API rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t id, @@ -65,6 +67,9 @@ rocprofiler_query_record_dimension_position(rocprofiler_counter_instance_id_t i * @param [in] dim dimension * @param [out] info info on the dimension (name, instance_size) * @return ::rocprofiler_status_t + * + * TODO(aelwazir): rocprofiler_status_t types that can be returned + * */ rocprofiler_status_t ROCPROFILER_API rocprofiler_query_record_dimension_info(rocprofiler_counter_id_t id, @@ -73,12 +78,16 @@ rocprofiler_query_record_dimension_info(rocprofiler_counter_id_t id, ROCPROFILER_NONNULL(3); /** - * @brief Query Counter name. + * @brief Query Counter name as a literal string. * - * @param [in] counter_id + * @param [in] counter_id @see rocprofiler_iterate_agent_supported_counters to get the available + * counter IDs * @param [out] name returns a pointer to the name of the counter * @param [out] size returns the size of the name returned * @return ::rocprofiler_status_t + * + * TODO(aelwazir): rocprofiler_status_t types that can be returned + * */ rocprofiler_status_t ROCPROFILER_API rocprofiler_query_counter_name(rocprofiler_counter_id_t counter_id, const char** name, size_t* size) @@ -102,6 +111,7 @@ rocprofiler_query_counter_instance_count(rocprofiler_agent_t agent, rocprofiler_counter_id_t counter_id, size_t* instance_count) ROCPROFILER_NONNULL(3); +// TODO(aelwazir): Ben to add a brief typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)( rocprofiler_counter_id_t* counters, size_t num_counters, @@ -110,7 +120,7 @@ typedef rocprofiler_status_t (*rocprofiler_available_counters_cb_t)( /** * @brief Query Agent Counters Availability. * - * @param [in] agent + * @param [in] agent GPU agent * @param [in] cb callback to caller to get counters * @param [in] user_data data to pass into the callback * @return ::rocprofiler_status_t diff --git a/source/include/rocprofiler/dispatch_profile.h b/source/include/rocprofiler/dispatch_profile.h index 200c22ad03..0f1f37c163 100644 --- a/source/include/rocprofiler/dispatch_profile.h +++ b/source/include/rocprofiler/dispatch_profile.h @@ -41,10 +41,10 @@ ROCPROFILER_EXTERN_C_INIT * @brief Kernel Dispatch Callback. This is a callback that is invoked before the kernel * is enqueued into the HSA queue. What counters to collect for a kernel are set * via passing back a profile config (config) in this callback. These counters - * will be collected and emplaced in the buffer rocprofiler_buffer_id_t used when + * will be collected and emplaced in the buffer with @ref rocprofiler_buffer_id_t used when * setting up this callback. * - * @param [in] queue_id Queue the kernel dispatach packet is being enqueued onto + * @param [in] queue_id Queue the kernel dispatch packet is being enqueued onto * @param [in] agent Agent of this queue * @param [in] correlation_id Correlation ID for this dispatch * @param [in] dispatch_packet Kernel dispatch packet about to be enqueued into HSA @@ -62,9 +62,11 @@ typedef void (*rocprofiler_profile_counting_dispatch_callback_t)( /** * @brief Configure buffered dispatch profile Counting Service. * Collects the counters in dispatch packets and stores them - * in buffer_id. The buffer may contain packets from more than + * in a buffer with @p buffer_id. The buffer may contain packets from more than * one dispatch (denoted by correlation id). Will trigger the - * callback based on the parameters setup in buffer_id_t. + * callback based on the parameters setup in @p buffer_id. + * + * // TODO(aelwazir): Should this be per agent? * * @param [in] context_id context id * @param [in] buffer_id id of the buffer to use for the counting service diff --git a/source/include/rocprofiler/fwd.h b/source/include/rocprofiler/fwd.h index 87c6c8d406..50e9aa8f6b 100644 --- a/source/include/rocprofiler/fwd.h +++ b/source/include/rocprofiler/fwd.h @@ -42,6 +42,10 @@ ROCPROFILER_EXTERN_C_INIT * @{ */ +// TODO(aelwazir): Do we need to add a null (way) for every handle? +// TODO(aelwazir): Remove API Data args from the doxygen? +// TODO(aelwazir): Not everything in bin needs to be installed bin, use libexec or share? + /** * @brief Status codes. */ @@ -258,6 +262,7 @@ typedef uint64_t rocprofiler_kernel_id_t; // forward declaration of struct typedef struct rocprofiler_pc_sampling_configuration_s rocprofiler_pc_sampling_configuration_t; +typedef struct rocprofiler_pc_sampling_record_s rocprofiler_pc_sampling_record_t; /** * @brief Unique record id encoding both the counter @@ -347,10 +352,11 @@ typedef struct /** * @brief Profile Configurations + * @see rocprofiler_create_profile_config for how to create. */ typedef struct { - uint64_t handle; + uint64_t handle; // Opaque handle } rocprofiler_profile_config_id_t; /** @@ -440,7 +446,7 @@ rocprofiler_record_header_compute_hash(uint32_t category, uint32_t kind) } /** - * @brief Details for the dimension, including its size, for a counter. + * @brief Details for the dimension, including its size, for a counter record. */ typedef struct { @@ -449,7 +455,7 @@ typedef struct } rocprofiler_record_dimension_info_t; /** - * @brief ROCProfiler Profile Counting Counter per instance. + * @brief ROCProfiler Profile Counting Counter Record per instance. */ typedef struct { @@ -458,26 +464,6 @@ typedef struct rocprofiler_correlation_id_t corr_id; } rocprofiler_record_counter_t; -/** - * @brief ROCProfiler PC Sampling Record. - * - */ -typedef struct -{ - uint64_t pc; - uint64_t dispatch_id; - uint64_t timestamp; - uint64_t hardware_id; - union - { - uint8_t arb_value; - }; - union - { - void* data; - }; -} rocprofiler_pc_sampling_record_t; - /** * @brief ROCProfiler SPM Record. * diff --git a/source/include/rocprofiler/internal_threading.h b/source/include/rocprofiler/internal_threading.h index 8c5e0837a1..03c643e832 100644 --- a/source/include/rocprofiler/internal_threading.h +++ b/source/include/rocprofiler/internal_threading.h @@ -82,6 +82,7 @@ rocprofiler_at_internal_thread_create(rocprofiler_internal_thread_library_cb_t p /** * @brief opaque handle to an internal thread identifier which delivers callbacks for buffers + * @see rocprofiler_create_callback_thread */ typedef struct { diff --git a/source/include/rocprofiler/pc_sampling.h b/source/include/rocprofiler/pc_sampling.h index 18d0e02bd0..3b26b4f6ab 100644 --- a/source/include/rocprofiler/pc_sampling.h +++ b/source/include/rocprofiler/pc_sampling.h @@ -35,25 +35,95 @@ ROCPROFILER_EXTERN_C_INIT */ /** - * @brief Create PC Sampling Service. + * @brief Function used to configure the PC sampling service on the GPU agent with @p agent_id. + * @brief Function used to configure the PC sampling service on the GPU agent with @p agent_id. + * Prerequisites are the following: + * - The user must create a context and supply its @p context_id. By using this context, + * - The user must create a context and supply its @p context_id. By using this context, + * the user can start/stop PC sampling on the agent. For more information, + * please @see `rocprofiler_start_context`/`rocprofiler_stop_context`. + * - The user must create a buffer and supply its @p buffer_id. Rocprofiler uses the buffer + * - The user must create a buffer and supply its @p buffer_id. Rocprofiler uses the buffer + * to deliver the PC samples to the user. For more information about the data delivery, + * please @see `rocprofiler_create_buffer` and `rocprofiler_buffer_tracing_cb_t`. * - * @param [in] context_id - * @param [in] agent - * @param [in] method - * @param [in] unit - * @param [in] interval - * @param [in] buffer_id + * Before calling this function, we recommend querying PC sampling configurations + * supported by the GPU agent via the `rocprofiler_query_pc_sampling_agent_configurations`. + * The user then chooses the @p method, @p unit, and @p interval to match one of the + * available configurations. Note that the @p interval must belong to the range of values + * The user then chooses the @p method, @p unit, and @p interval to match one of the + * available configurations. Note that the @p interval must belong to the range of values + * [available_config.min_interval, available_config.max_interval], + * where available_config is the instance of the `rocprofiler_pc_sampling_configuration_s` + * supported at the moment. + * + * Rocprofiler checks whether the requsted configuration is actually supported + * at the moment of calling this function. If the answer is yes, it returns + * the ROCPROFILER_STATUS_SUCCESS. Otherwise, notifies the caller about the + * rejection reason via the returned status code. For more information + * about the status codes, please @see rocprofiler_status_t. + * + * Constraint1: A GPU agent can be configured to support at most one running PC sampling + * configuration at any time, which implies some of the consequences described below. + * After the tool configures the PC sampling with one of the available configurations, + * rocprofiler guarantees that this configuration will be valid for the tool's + * lifetime. The tool can start and stop the configured PC sampling service whenever convenient. + * + * Constraint2: Since the same GPU agent can be used by multiple processes concurrently, + * Rocprofiler cannot guarantee the exclusive access to the PC sampling capability. + * The consequence is the following scenario. The tool TA that belongs to the process PA, + * calls the `rocprofiler_query_pc_sampling_agent_configurations` that returns the + * two supported configurations CA and CB by the agent. Then the toolb TB of the process PB, + * configures the PC sampling on the same agent by using the configuration CB. + * Subsequently, the TA tries configuring the CA on the agent, and it fails. + * To point out that this case happened, we introduce a special status code (TODO: ARE WE)? + * When this status code is observed by the tool TA, it queties all available configurations again + * by calling `rocprofiler_query_pc_sampling_agent_configurations`, + * that returns only CB this time. The tool TA can choose CB, so that both + * TA and TB use the PC sampling capability in the separate processes. + * + * Constraints3: We allow only one context to contain the configured PC sampling service + * within the process, that implies that at most one of the loaded tools can use PC sampling. + * One context can contains multiple PC sampling services configured for different GPU agents. + * + * @param [in] context_id - id of the context used for starting/stopping PC sampling service + * @param [in] agent_id - id of the agent on which caller tries using PC sampling capability + * @param [in] method - the type of PC sampling the caller tries to use on the agent. + * @param [in] unit - The unit appropriate to the PC sampling type/method. + * @param [in] interval - frequency at which PC samples are generated + * @param [in] buffer_id - id of the buffer used for delivering PC samples * @return ::rocprofiler_status_t * */ rocprofiler_status_t ROCPROFILER_API rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id, - rocprofiler_agent_t agent, + rocprofiler_agent_id_t agent_id, rocprofiler_pc_sampling_method_t method, rocprofiler_pc_sampling_unit_t unit, uint64_t interval, rocprofiler_buffer_id_t buffer_id); +/** + * @brief PC sampling configuration supported by a GPU agent. + * @var rocprofiler_pc_sampling_configuration_s::method + * Sampling method supported by the GPU + * agent. Currenlty, it can take one of the following two values: + * - ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP: a background host thread + * periodically interrupts waves execution on the GPU to generate PC samples + * - ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC: performance monitoring hardware + * on the GPU periodically interrupts waves to generate PC samples. + * @var rocprofiler_pc_sampling_configuration_s::unit + * A unit used to specify the period of the + * @ref method for samples generation. + * @var rocprofiler_pc_sampling_configuration_s::min_interval + * the highest possible frequencey for + * generating samples using @ref method. + * @var rocprofiler_pc_sampling_configuration_s::max_interval + * the lowest possible frequency for + * generating samples using @ref method + * @var rocprofiler_pc_sampling_configuration_s::flags + * TODO: ??? + */ struct rocprofiler_pc_sampling_configuration_s { rocprofiler_pc_sampling_method_t method; @@ -63,6 +133,163 @@ struct rocprofiler_pc_sampling_configuration_s uint64_t flags; }; +/** + * @brief The rocprofiler calls the tool's callback to deliver the list + * of available configurations upon the calls to the @ref + * rocprofiler_query_pc_sampling_agent_configurations. + * + * @param[out] configs - The list of PC sampling configurations supported by the agent of the + * moment of invoking @ref rocprofiler_query_pc_sampling_agent_configurations. + * @param[out] num_config - The number of configuration contained in the underlying + * In case the GPU agent does not support PC sampling, the value is 0. + * @param[in] user_data - A pointer passed as the last argument of the + * @ref rocprofiler_query_pc_sampling_agent_configurations + */ +typedef rocprofiler_status_t (*rocprofiler_available_pc_sampling_configurations_cb_t)( + const rocprofiler_pc_sampling_configuration_t* configs, + size_t num_config, + void* user_data); + +/** + * @brief Query PC Sampling Configuration. + * + * @param [in] agent_id - id of the agent for which available configuration will be listed + * @param [in] cb - User callback that delivers the available PC sampling configurations + * @param [in] user_data - passed to the @p cb + * @return ::rocprofiler_status_t + */ +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_pc_sampling_agent_configurations( + rocprofiler_agent_id_t agent_id, + rocprofiler_available_pc_sampling_configurations_cb_t cb, + void* user_data) ROCPROFILER_NONNULL(2, 3); + +/** + * @brief The header of the @ref rocprofiler_pc_sampling_record_s, indicating + * what fields of the @ref rocprofiler_pc_sampling_record_s instance are meaningful + * @brief The header of the @ref rocprofiler_pc_sampling_record_s, indicating + * what fields of the @ref rocprofiler_pc_sampling_record_s instance are meaningful + * for the sample. + * @var rocprofiler_pc_sampling_header_v1_t::valid + * the sample is valid + * @var rocprofiler_pc_sampling_header_v1_t::type + * The following values are possible: + * - 0 - reserved + * - 1 - host trap pc sample + * - 2 - stochastic pc sample + * - 3 - perfcounter (unsupported at the moment) + * - other values does not mean anything at the moment + * @var rocprofiler_pc_sampling_header_v1_t::has_stall_reason + * whether the sample contains + * information about the stall reason. If so, please @see rocprofiler_pc_sampling_snapshot_v1_t. + * @var rocprofiler_pc_sampling_header_v1_t::has_wave_cnt + * whether the @ref rocprofiler_pc_sampling_record_s::wave_count contains + * meaningful value + * @var rocprofiler_pc_sampling_header_v1_t::has_memory_counter + * whether the content of the @ref + * rocprofiler_pc_sampling_memorycounters_v1_t is meaningful + */ +typedef struct +{ + uint8_t valid : 1; + uint8_t type : 4; // 0=reserved, 1=hosttrap, 2=stochastic, 3=perfcounter, >=4 possible v2? + uint8_t has_stall_reason : 1; + uint8_t has_wave_cnt : 1; + uint8_t has_memory_counter : 1; +} rocprofiler_pc_sampling_header_v1_t; + +/** + * @brief TODO: provide the description + */ +typedef struct +{ + uint32_t dual_issue_valu : 1; + uint32_t inst_type : 4; + uint32_t reason_not_issued : 7; + uint32_t arb_state_issue : 10; + uint32_t arb_state_stall : 10; +} rocprofiler_pc_sampling_snapshot_v1_t; + +/** + * @brief TODO: provide the description + */ +typedef union +{ + struct + { + uint32_t load_cnt : 6; + uint32_t store_cnt : 6; + uint32_t bvh_cnt : 3; + uint32_t sample_cnt : 6; + uint32_t ds_cnt : 6; + uint32_t km_cnt : 5; + }; + uint32_t raw; +} rocprofiler_pc_sampling_memorycounters_v1_t; + +// TODO: The definition of this structure might change over time +// to reduce the space needed to represent a single sample. +/** + * @brief ROCProfiler PC Sampling Record corresponding to the interrupted wave. + * @var rocprofiler_pc_sampling_record_s::flags + * header that indicates what fields are meaningful + * for the PC sample. The values depend on what the underlying GPU agent architecture supports. + * @var rocprofiler_pc_sampling_record_s::chiplet + * chiplet index + * @var rocprofiler_pc_sampling_record_s::wave_id + * wave identifier within the workgroup + * @var rocprofiler_pc_sampling_record_s::wave_issued + * a flags indicated whether the wave is + * issueing the instruction' represented by the @ref pc at the moment of interruption. + * @var rocprofiler_pc_sampling_record_s::reserved + * FIXME: reserved 7 bits, must be zero. + * @var rocprofiler_pc_sampling_record_s::hw_id + * compute unit identifier + * @var rocprofiler_pc_sampling_record_s::pc + * The current program counter of the wave at the moment + * of interruption + * @var rocprofiler_pc_sampling_record_s::exec_mask + * shows how many SIMD lanes of the wave were + * executing the instruction represented by the @ref pc. Useful to understand thread-divergance + * within the wave + * @var rocprofiler_pc_sampling_record_s::workgroup_id_x + * the x coordinate of the wave within the workgroup + * @var rocprofiler_pc_sampling_record_s::workgroup_id_y + * the y coordinate of the wave within the workgroup + * @var rocprofiler_pc_sampling_record_s::workgroup_id_z + * the y coordinate of the wave within the workgroup + * @var rocprofiler_pc_sampling_record_s::wave_count + * FIXME: number of waves active at the CU at the moment of sample generation??? + * @var rocprofiler_pc_sampling_record_s::timestamp + * represents the GPU timestamp when the sample is generated + * @var rocprofiler_pc_sampling_record_s::correlation_id + * correlation id of the API call that + * initiated kernel laucnh. The interrupted wave is executed as part of the kernel. + * @var rocprofiler_pc_sampling_record_s::snapshot + * TODO: + * @var rocprofiler_pc_sampling_record_s::memory_counters + * TODO: + */ +struct rocprofiler_pc_sampling_record_s +{ + rocprofiler_pc_sampling_header_v1_t flags; + uint8_t chiplet; + uint8_t wave_id; + uint8_t wave_issued : 1; + uint8_t reserved : 7; + uint32_t hw_id; + uint64_t pc; + uint64_t exec_mask; + uint32_t workgroup_id_x; + uint32_t workgroup_id_y; + uint32_t workgroup_id_z; + uint32_t wave_count; + uint64_t timestamp; + rocprofiler_correlation_id_t correlation_id; + rocprofiler_pc_sampling_snapshot_v1_t snapshot; + rocprofiler_pc_sampling_memorycounters_v1_t memory_counters; +}; + /** @} */ -ROCPROFILER_EXTERN_C_FINI +ROCPROFILER_EXTERN_C_FINI \ No newline at end of file diff --git a/source/include/rocprofiler/registration.h b/source/include/rocprofiler/registration.h index d9d6887638..50681df294 100644 --- a/source/include/rocprofiler/registration.h +++ b/source/include/rocprofiler/registration.h @@ -48,15 +48,19 @@ typedef struct const uint32_t handle; ///< internal handle } rocprofiler_client_id_t; +// TODO(aelwazir): Add Docs typedef void (*rocprofiler_client_finalize_t)(rocprofiler_client_id_t); +// TODO(aelwazir): Add Docs typedef int (*rocprofiler_tool_initialize_t)(rocprofiler_client_finalize_t finalize_func, void* tool_data); +// TODO(aelwazir): Add Docs typedef void (*rocprofiler_tool_finalize_t)(void* tool_data); /** * @brief Data structure containing a initialization, finalization, and data + * TODO(aelwazir): Add correlation with rocprofiler_configure. * */ typedef struct diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt index 55657945f0..0d644a4656 100644 --- a/source/lib/CMakeLists.txt +++ b/source/lib/CMakeLists.txt @@ -3,6 +3,7 @@ # add_subdirectory(common) add_subdirectory(rocprofiler) +add_subdirectory(rocprofiler-tool) add_subdirectory(plugins) if(ROCPROFILER_BUILD_TESTS) diff --git a/source/lib/common/CMakeLists.txt b/source/lib/common/CMakeLists.txt index 89963ab8f1..e85cc81061 100644 --- a/source/lib/common/CMakeLists.txt +++ b/source/lib/common/CMakeLists.txt @@ -31,7 +31,7 @@ target_link_libraries( $ $ $ - $ + $ $ $ $ diff --git a/source/lib/common/config.cpp b/source/lib/common/config.cpp index 712b1b98d8..1665e2ce87 100644 --- a/source/lib/common/config.cpp +++ b/source/lib/common/config.cpp @@ -22,8 +22,10 @@ // #include "lib/common/config.hpp" +#include "lib/common/defines.hpp" #include "lib/common/demangle.hpp" #include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/utility.hpp" #include @@ -32,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -339,18 +340,18 @@ compose_filename(const config& _cfg) } // join / and replace any keys with values - auto _prefix = format(std::filesystem::path{_output_path} / _output_file); + auto _prefix = format(common::filesystem::path{_output_path} / _output_file); // return on empty if(_prefix.empty()) return std::string{}; // get the absolute path - auto _fname = std::filesystem::absolute(std::filesystem::path{_prefix}); + auto _fname = common::filesystem::absolute(common::filesystem::path{_prefix}); // create the directory if necessary auto _fname_path = _fname.parent_path(); - if(!std::filesystem::exists(_fname_path)) - std::filesystem::create_directories(_fname.parent_path()); + if(!common::filesystem::exists(_fname_path)) + common::filesystem::create_directories(_fname.parent_path()); return _fname.string(); } diff --git a/source/lib/common/container/record_header_buffer.cpp b/source/lib/common/container/record_header_buffer.cpp index ab8b6a6033..e7e898329e 100644 --- a/source/lib/common/container/record_header_buffer.cpp +++ b/source/lib/common/container/record_header_buffer.cpp @@ -74,8 +74,10 @@ record_header_buffer::allocate(size_t num_bytes) auto _lk = rhb_raii_lock{*this}; m_buffer.init(num_bytes); - m_headers.resize(m_buffer.capacity(), - rocprofiler_record_header_t{.hash = 0, .payload = nullptr}); + rocprofiler_record_header_t record = {}; + record.hash = 0; + record.payload = nullptr; + m_headers.resize(m_buffer.capacity(), record); return true; } @@ -106,9 +108,15 @@ record_header_buffer::clear() auto _sz = m_buffer.capacity(); if(!m_buffer.clear(std::nothrow_t{})) return 0; std::for_each(m_headers.begin(), m_headers.end(), [](auto& itr) { - itr = rocprofiler_record_header_t{.hash = 0, .payload = nullptr}; + rocprofiler_record_header_t record = {}; + record.hash = 0; + record.payload = nullptr; + itr = record; }); - m_headers.resize(_sz, rocprofiler_record_header_t{.hash = 0, .payload = nullptr}); + rocprofiler_record_header_t record = {}; + record.hash = 0; + record.payload = nullptr; + m_headers.resize(_sz, record); m_index.store(0, std::memory_order_release); } diff --git a/source/lib/common/container/record_header_buffer.hpp b/source/lib/common/container/record_header_buffer.hpp index 42784ea640..26e03724d2 100644 --- a/source/lib/common/container/record_header_buffer.hpp +++ b/source/lib/common/container/record_header_buffer.hpp @@ -231,8 +231,11 @@ record_header_buffer::emplace(uint64_t _hash, Tp& _v) // for where the header record should be placed. // NOTE: m_headers was resized to be large enough to accomodate // sizeof(Tp) == 1 for every entry in buffer - auto idx = m_index.fetch_add(1, std::memory_order_release); - m_headers.at(idx) = rocprofiler_record_header_t{.hash = _hash, .payload = _addr}; + auto idx = m_index.fetch_add(1, std::memory_order_release); + rocprofiler_record_header_t record = {}; + record.hash = _hash; + record.payload = _addr; + m_headers.at(idx) = record; } read_unlock(); return (_addr != nullptr); diff --git a/source/lib/common/container/small_vector.hpp b/source/lib/common/container/small_vector.hpp index 5f33d61bf3..1f88a86359 100644 --- a/source/lib/common/container/small_vector.hpp +++ b/source/lib/common/container/small_vector.hpp @@ -233,6 +233,8 @@ protected: assert(is_safe_to_reference_after_resize(elt, new_size) && "Attempting to reference an element of the vector in an operation " "that invalidates it"); + (void) elt; + (void) new_size; } /// check whether elt will be invalidated by increasing the size of the diff --git a/source/lib/common/filesystem.hpp b/source/lib/common/filesystem.hpp new file mode 100644 index 0000000000..d33d3da261 --- /dev/null +++ b/source/lib/common/filesystem.hpp @@ -0,0 +1,77 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#if !defined(ROCPROFILER_HAS_GHC_LIB_FILESYSTEM) +# if defined __has_include +# if __has_include() +# define ROCPROFILER_HAS_GHC_LIB_FILESYSTEM 1 +# else +# define ROCPROFILER_HAS_GHC_LIB_FILESYSTEM 0 +# endif +# else +# define ROCPROFILER_HAS_GHC_LIB_FILESYSTEM 0 +# endif +#endif + +#if ROCPROFILER_HAS_GHC_LIB_FILESYSTEM == 0 +# if defined __has_include +# if __has_include() +# include +# endif +# endif + +# if defined(__cpp_lib_filesystem) +# define ROCPROFILER_HAS_CPP_LIB_FILESYSTEM 1 +# else +# if defined __has_include +# if __has_include() +# define ROCPROFILER_HAS_CPP_LIB_FILESYSTEM 1 +# endif +# endif +# endif +#endif + +// include the correct filesystem header +#if defined(ROCPROFILER_HAS_GHC_LIB_FILESYSTEM) && ROCPROFILER_HAS_GHC_LIB_FILESYSTEM > 0 +# include +#elif defined(ROCPROFILER_HAS_CPP_LIB_FILESYSTEM) && ROCPROFILER_HAS_CPP_LIB_FILESYSTEM > 0 +# include +#else +# include +#endif + +// create a namespace alias +namespace rocprofiler +{ +namespace common +{ +#if defined(ROCPROFILER_HAS_GHC_LIB_FILESYSTEM) && ROCPROFILER_HAS_GHC_LIB_FILESYSTEM > 0 +namespace filesystem = ::ghc::filesystem; // NOLINT +#elif defined(ROCPROFILER_HAS_CPP_LIB_FILESYSTEM) && ROCPROFILER_HAS_CPP_LIB_FILESYSTEM > 0 +namespace filesystem = ::std::filesystem; // NOLINT +#else +namespace filesystem = ::std::experimental::filesystem; // NOLINT +#endif +} // namespace common +} // namespace rocprofiler diff --git a/source/lib/common/xml.cpp b/source/lib/common/xml.cpp index cfda098bb8..387031b0d7 100644 --- a/source/lib/common/xml.cpp +++ b/source/lib/common/xml.cpp @@ -50,6 +50,7 @@ Xml::~Xml() if(!map_) return; for(auto& [_, nodes] : *map_) { + (void) _; for(auto& node : nodes) { node->nodes.clear(); diff --git a/source/lib/rocprofiler-tool/CMakeLists.txt b/source/lib/rocprofiler-tool/CMakeLists.txt new file mode 100644 index 0000000000..50927a6244 --- /dev/null +++ b/source/lib/rocprofiler-tool/CMakeLists.txt @@ -0,0 +1,32 @@ +# +# Tool library used by rocprofiler +# + +add_library(rocprofiler-tool SHARED) +target_sources(rocprofiler-tool PRIVATE helper.hpp helper.cpp tool.cpp trace_buffer.hpp) +target_link_libraries( + rocprofiler-tool + PRIVATE rocprofiler::rocprofiler-shared-library + rocprofiler::rocprofiler-hsa-runtime + rocprofiler::rocprofiler-headers + rocprofiler::rocprofiler-build-flags + rocprofiler::rocprofiler-memcheck + rocprofiler::rocprofiler-common-library + rocprofiler::rocprofiler-cxx-filesystem + atomic) + +set_target_properties( + rocprofiler-tool + PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/rocprofiler + SOVERSION ${PROJECT_VERSION_MAJOR} + VERSION ${PROJECT_VERSION} + SKIP_BUILD_RPATH OFF + BUILD_RPATH "\$ORIGIN:\$ORIGIN/.." + INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..") + +install( + TARGETS rocprofiler-tool + DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} + COMPONENT tools + EXPORT ${PROJECT_NAME}-library-targets) diff --git a/source/lib/rocprofiler-tool/README.md b/source/lib/rocprofiler-tool/README.md new file mode 100644 index 0000000000..3d1f5e7669 --- /dev/null +++ b/source/lib/rocprofiler-tool/README.md @@ -0,0 +1,33 @@ +# ROCProfiler Tool Library + +This is a tool that gets registered with the +rocprofiler to obtain its services. +The tool is built as a shared library and is named as +rocprofiler-tool. +The library can be preloaded using LD_PRELOAD +to facilitate its registration as a tool +with the rocprofiler. + +The user through rocprofv2 script can select the +options to obtain tracing and counter collection +services from the rocprofiler. + +Currently, this tool supports kernel trace and the +hsa-api trace. +The tool uses the following environment variables +to read the user choices. + +- `ROCPROFILER_KERNEL_TRACE=1` to obtain kernel trace +- `ROCPROFILER_HSA_API_TRACE=1` to obtain hsa api trace + +The user can also specify the output filename and output file path +to which the traces are written to. + +- `ROCPROFILER_OUTPUT_PATH=` to set the output directory path +- `ROCPROFILER_OUTPUT_FILE_NAME=` to set the output file name + +## CHANGELOG + +The tool design is similar to its earlier versions. +However, not all features that the earlier versions supported are supported by +this tool. diff --git a/source/lib/rocprofiler-tool/helper.cpp b/source/lib/rocprofiler-tool/helper.cpp new file mode 100644 index 0000000000..0749c9a125 --- /dev/null +++ b/source/lib/rocprofiler-tool/helper.cpp @@ -0,0 +1,353 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "helper.hpp" + +#include + +#include +#include +#include +#include + +namespace +{ +using amd_compute_pgm_rsrc_three32_t = uint32_t; + +// AMD Compute Program Resource Register Three. +enum amd_compute_gfx9_pgm_rsrc_three_t +{ + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_ACCUM_OFFSET, 0, 5), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_TG_SPLIT, 16, 1) +}; + +enum amd_compute_gfx10_gfx11_pgm_rsrc_three_t +{ + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_SHARED_VGPR_COUNT, 0, 4), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_INST_PREF_SIZE, 4, 6), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_TRAP_ON_START, 10, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_TRAP_ON_END, 11, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_COMPUTE_PGM_RSRC_THREE_IMAGE_OP, 31, 1) +}; + +// Kernel code properties. +enum amd_kernel_code_property_t +{ + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, + 0, + 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, 1, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, 2, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, + 3, + 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, 4, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, + 6, + 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_RESERVED0, 7, 3), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + 10, + 1), // GFX10+ + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, 11, 1), + AMD_HSA_BITS_CREATE_ENUM_ENTRIES(AMD_KERNEL_CODE_PROPERTY_RESERVED1, 12, 4), +}; + +std::mutex kernel_descriptor_name_map_mutex; +std::unordered_map kernel_descriptor_name_map; + +std::mutex kernel_properties_correlation_mutex; +std::unordered_map + kernel_properties_correlation_map; + +uint32_t +arch_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) +{ + std::string info_name(name.data(), name.size()); + if(strcmp(name.data(), "gfx90a") == 0 || strncmp(name.data(), "gfx94", 5) == 0) + return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc3, + AMD_COMPUTE_PGM_RSRC_THREE_ACCUM_OFFSET) + + 1) * + 4; + + return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1, + AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) + + 1) * + (AMD_HSA_BITS_GET(kernel_code.kernel_code_properties, + AMD_KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) + ? 8 + : 4); +} + +uint32_t +accum_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) +{ + std::string info_name(name.data(), name.size()); + if(strcmp(info_name.c_str(), "gfx908") == 0) return arch_vgpr_count(name, kernel_code); + if(strcmp(info_name.c_str(), "gfx90a") == 0 || strncmp(info_name.c_str(), "gfx94", 5) == 0) + return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1, + AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WORKITEM_VGPR_COUNT) + + 1) * + 8 - + arch_vgpr_count(name, kernel_code); + + return 0; +} + +uint32_t +sgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) +{ + // GFX10 and later always allocate 128 sgprs. + + // TODO(srnagara): Recheck the extraction of gfxip from gpu name + const char* name_data = name.data(); + const size_t gfxip_label_len = std::min(name.size() - 2, size_t{63}); + if(gfxip_label_len > 0 && strnlen(name_data, gfxip_label_len + 1) >= gfxip_label_len) + { + auto gfxip = std::vector{}; + gfxip.resize(gfxip_label_len + 1, '\0'); + memcpy(gfxip.data(), name_data, gfxip_label_len); + // TODO(srnagara): Check if it is hardcoded + if(std::stoi(&gfxip.at(3)) >= 10) return 128; + return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc1, + AMD_COMPUTE_PGM_RSRC_ONE_GRANULATED_WAVEFRONT_SGPR_COUNT) / + 2 + + 1) * + 16; + } + return 0; +} + +const auto& +GetLoaderTable() +{ + static const auto _v = []() { + using hsa_loader_table_t = hsa_ven_amd_loader_1_01_pfn_t; + auto _tbl = hsa_loader_table_t{}; + memset(&_tbl, 0, sizeof(hsa_loader_table_t)); + hsa_system_get_major_extension_table( + HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_loader_table_t), &_tbl); + return _tbl; + }(); + return _v; +} + +const kernel_descriptor_t* +GetKernelCode(uint64_t kernel_object) +{ + const kernel_descriptor_t* kernel_code = nullptr; + if(GetLoaderTable().hsa_ven_amd_loader_query_host_address == nullptr) return kernel_code; + hsa_status_t status = GetLoaderTable().hsa_ven_amd_loader_query_host_address( + reinterpret_cast(kernel_object), // NOLINT(performance-no-int-to-ptr) + reinterpret_cast(&kernel_code)); + if(HSA_STATUS_SUCCESS != status) + { + kernel_code = reinterpret_cast( // NOLINT(performance-no-int-to-ptr) + kernel_object); + } + return kernel_code; +} +} // namespace + +void +SetKernelDescriptorName(rocprofiler_address_t kernel_descriptor, const char* kernel_name) +{ + std::lock_guard kernel_descriptor_name_map_lock(kernel_descriptor_name_map_mutex); + kernel_descriptor_name_map[kernel_descriptor] = kernel_name; +} + +void +SetKernelProperties(uint64_t correlation_id, rocprofiler_tool_kernel_properties_t kernel_properties) +{ + std::lock_guard kernel_properties_correlation_map_lock( + kernel_properties_correlation_mutex); + kernel_properties_correlation_map[correlation_id] = kernel_properties; +} + +rocprofiler_tool_kernel_properties_t +GetKernelProperties(uint64_t correlation_id) +{ + std::lock_guard kernel_properties_correlation_map_lock( + kernel_properties_correlation_mutex); + auto it = kernel_properties_correlation_map.find(correlation_id); + if(it == kernel_properties_correlation_map.end()) + { + std::cout << "kernel properties not found" << std::endl; + abort(); + } + return it->second; +} +const char* +GetKernelDescriptorName(rocprofiler_address_t kernel_descriptor) +{ + std::lock_guard kernel_descriptor_name_map_lock(kernel_descriptor_name_map_mutex); + auto it = kernel_descriptor_name_map.find(kernel_descriptor); + if(it == kernel_descriptor_name_map.end()) + { + std::cout << "kernel name not found" << std::endl; + abort(); + } + return it->second; +} + +std::vector +GetCounterNames() +{ + std::vector counters; + const char* line_c_str = getenv("ROCPROFILER_COUNTERS"); + if(line_c_str) + { + std::string line = line_c_str; + // skip commented lines + auto found = line.find_first_not_of(" \t"); + if(found != std::string::npos) + { + if(line[found] == '#') return {}; + } + if(line.find("pmc") == std::string::npos) return counters; + char seperator = ' '; + std::string::size_type prev_pos = 0, pos = line.find(seperator, prev_pos); + prev_pos = ++pos; + if(pos != std::string::npos) + { + while((pos = line.find(seperator, pos)) != std::string::npos) + { + std::string substring(line.substr(prev_pos, pos - prev_pos)); + if(substring.length() > 0 && substring != ":") + { + counters.push_back(substring); + } + prev_pos = ++pos; + } + if(!line.substr(prev_pos, pos - prev_pos).empty()) + { + counters.push_back(line.substr(prev_pos, pos - prev_pos)); + } + } + } + return counters; +} + +void +populate_kernel_properties_data(rocprofiler_tool_kernel_properties_t* kernel_properties, + const hsa_kernel_dispatch_packet_t dispatch_packet) +{ + const uint64_t kernel_object = dispatch_packet.kernel_object; + + const kernel_descriptor_t* kernel_code = GetKernelCode(kernel_object); + uint64_t grid_size = + dispatch_packet.grid_size_x * dispatch_packet.grid_size_y * dispatch_packet.grid_size_z; + if(grid_size > UINT32_MAX) abort(); + kernel_properties->grid_size = grid_size; + uint64_t workgroup_size = dispatch_packet.workgroup_size_x * dispatch_packet.workgroup_size_y * + dispatch_packet.workgroup_size_z; + if(workgroup_size > UINT32_MAX) abort(); + kernel_properties->workgroup_size = (uint32_t) workgroup_size; + kernel_properties->lds_size = dispatch_packet.group_segment_size; + kernel_properties->scratch_size = dispatch_packet.private_segment_size; + kernel_properties->arch_vgpr_count = + arch_vgpr_count(kernel_properties->gpu_agent.name, *kernel_code); + kernel_properties->accum_vgpr_count = + accum_vgpr_count(kernel_properties->gpu_agent.name, *kernel_code); + kernel_properties->sgpr_count = sgpr_count(kernel_properties->gpu_agent.name, *kernel_code); + kernel_properties->wave_size = + AMD_HSA_BITS_GET(kernel_code->kernel_code_properties, + AMD_KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) + ? 32 + : 64; + kernel_properties->signal_handle = dispatch_packet.completion_signal.handle; +} + +std::string +cxa_demangle(std::string_view _mangled_name, int* _status) +{ + constexpr size_t buffer_len = 4096; + // return the mangled since there is no buffer + if(_mangled_name.empty()) + { + *_status = -2; + return std::string{}; + } + + auto _demangled_name = std::string{_mangled_name}; + + // PARAMETERS to __cxa_demangle + // mangled_name: + // A nullptr-terminated character string containing the name to be demangled. + // buffer: + // A region of memory, allocated with malloc, of *length bytes, into which the + // demangled name is stored. If output_buffer is not long enough, it is expanded + // using realloc. output_buffer may instead be nullptr; in that case, the demangled + // name is placed in a region of memory allocated with malloc. + // _buflen: + // If length is non-nullptr, the length of the buffer containing the demangled name + // is placed in *length. + // status: + // *status is set to one of the following values + size_t _demang_len = 0; + char* _demang = abi::__cxa_demangle(_demangled_name.c_str(), nullptr, &_demang_len, _status); + switch(*_status) + { + // 0 : The demangling operation succeeded. + // -1 : A memory allocation failure occurred. + // -2 : mangled_name is not a valid name under the C++ ABI mangling rules. + // -3 : One of the arguments is invalid. + case 0: + { + if(_demang) _demangled_name = std::string{_demang}; + break; + } + case -1: + { + char _msg[buffer_len]; + ::memset(_msg, '\0', buffer_len * sizeof(char)); + ::snprintf(_msg, + buffer_len, + "memory allocation failure occurred demangling %s", + _demangled_name.c_str()); + ::perror(_msg); + break; + } + case -2: break; + case -3: + { + char _msg[buffer_len]; + ::memset(_msg, '\0', buffer_len * sizeof(char)); + ::snprintf(_msg, + buffer_len, + "Invalid argument in: (\"%s\", nullptr, nullptr, %p)", + _demangled_name.c_str(), + (void*) _status); + ::perror(_msg); + break; + } + default: break; + }; + + // if it "demangled" but the length is zero, set the status to -2 + if(_demang_len == 0 && *_status == 0) *_status = -2; + + // free allocated buffer + ::free(_demang); + return _demangled_name; +} diff --git a/source/lib/rocprofiler-tool/helper.hpp b/source/lib/rocprofiler-tool/helper.hpp new file mode 100644 index 0000000000..357666a70b --- /dev/null +++ b/source/lib/rocprofiler-tool/helper.hpp @@ -0,0 +1,149 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lib/common/filesystem.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ROCPROFILER_CALL(result, msg) \ + { \ + rocprofiler_status_t CHECKSTATUS = result; \ + if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \ + { \ + std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \ + std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \ + << " failed with error code " << CHECKSTATUS << ": " << status_msg \ + << std::endl; \ + std::stringstream errmsg{}; \ + errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \ + << status_msg << ")"; \ + throw std::runtime_error(errmsg.str()); \ + } \ + } + +constexpr size_t BUFFER_SIZE_BYTES = 4096; +constexpr size_t WATERMARK = (BUFFER_SIZE_BYTES / 2); + +// This can be different for different architecture +// Lets follow the v1 rocprof +// I will have a kernel id from the rocprofiler +// address the kernel descriptor and access the information +// This works for gfx9 but may not for Navi arch +// Interecept the kernel symbol load build a table for kernel id +// when kenel dispatch callback. Here is the kernel id +// Use the kernel id +typedef struct +{ + uint64_t grid_size; + uint64_t workgroup_size; + uint64_t lds_size; + uint64_t scratch_size; + uint64_t arch_vgpr_count; + uint64_t accum_vgpr_count; + uint64_t sgpr_count; + uint64_t wave_size; + uint64_t signal_handle; + uint64_t kernel_object; + rocprofiler_queue_id_t queue_id; + rocprofiler_agent_t gpu_agent; + +} rocprofiler_tool_kernel_properties_t; + +typedef struct +{ + std::vector gpu_agents_lists; + +} rocprofiler_tool_agent_callback_t; + +struct kernel_descriptor_t +{ + uint8_t reserved0[16]; + int64_t kernel_code_entry_byte_offset; + uint8_t reserved1[20]; + uint32_t compute_pgm_rsrc3; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint16_t kernel_code_properties; + uint8_t reserved2[6]; +}; + +using rocprofiler_tool_callback_kind_names_t = + std::map; +using rocprofiler_tool_callback_kind_operation_names_t = + std::map>; + +struct rocprofiler_tool_callback_name_info_t +{ + rocprofiler_tool_callback_kind_names_t kind_names = {}; + rocprofiler_tool_callback_kind_operation_names_t operation_names = {}; +}; + +std::vector +GetCounterNames(); + +void +SetKernelDescriptorName(rocprofiler_address_t kernel_descriptor, const char* name); + +void +SetKernelProperties(uint64_t correlation_id, + rocprofiler_tool_kernel_properties_t kernel_properties); +void +SetKernelProperties(uint64_t correlation_id, + rocprofiler_tool_kernel_properties_t kernel_properties); + +rocprofiler_tool_kernel_properties_t +GetKernelProperties(uint64_t correlation_id); + +const char* +GetKernelDescriptorName(rocprofiler_address_t kernel_descriptor); + +void +populate_kernel_properties_data(rocprofiler_tool_kernel_properties_t* kernel_properties, + const hsa_kernel_dispatch_packet_t dispatch_packet); + +void +TracerFlushRecord(void* data, rocprofiler_callback_tracing_kind_t kind); + +std::string +cxa_demangle(std::string_view _mangled_name, int* _status); diff --git a/source/lib/rocprofiler-tool/tool.cpp b/source/lib/rocprofiler-tool/tool.cpp new file mode 100644 index 0000000000..66aad11de0 --- /dev/null +++ b/source/lib/rocprofiler-tool/tool.cpp @@ -0,0 +1,464 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "helper.hpp" +#include "trace_buffer.hpp" + +#include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace common = ::rocprofiler::common; +namespace fs = common::filesystem; + +TRACE_BUFFER_INSTANTIATE(); + +// static const uint32_t lds_block_size = 128 * 4; + +namespace +{ +auto tool_buffer = rocprofiler_buffer_id_t{}; +auto output_path = + fs::path{common::get_env("ROCPROFILER_OUTPUT_PATH", fs::current_path().string())}; +auto output_file_name = + common::get_env("ROCPROFILER_OUTPUT_FILE_NAME", std::to_string(getpid()) + "-"); + +std::pair +get_output_stream(const std::string& fname, const std::string& ext = ".ext") +{ + if(output_path.string().empty()) return {&std::clog, [](auto*&) {}}; + + if(fs::exists(output_path) && !fs::is_directory(fs::status(output_path))) + throw std::runtime_error{ + fmt::format("ROCPROFILER_OUTPUT_PATH ({}) already exists and is not a directory", + output_path.string())}; + if(!fs::exists(output_path)) fs::create_directory(output_path); + + auto output_file = output_path / (output_file_name + fname + ext); + auto* _ofs = new std::ofstream{output_file}; + if(!_ofs && !*_ofs) + throw std::runtime_error{ + fmt::format("Failed to open {} for output", (output_path / output_file_name).string())}; + std::cout << "Results File: " << output_file << std::endl; + return {_ofs, [](std::ostream*& v) { + if(v) dynamic_cast(v)->close(); + delete v; + v = nullptr; + }}; +} + +template +std::string +as_hex(Tp _v, size_t _width = 16) +{ + auto _ss = std::stringstream{}; + _ss.fill('0'); + _ss << "0x" << std::hex << std::setw(_width) << _v; + return _ss.str(); +} +} // namespace + +struct output_file +{ + output_file(std::string name) + : m_name(std::move(name)) + { + std::tie(m_stream, m_dtor) = get_output_stream(name); + } + + ~output_file() { m_dtor(m_stream); } + + output_file(const output_file&) = delete; + output_file& operator=(const output_file&) = delete; + + std::string name() const { return m_name; } + + template + std::ostream& operator<<(T&& value) + { + return (*m_stream) << std::forward(value); + } + + std::ostream& operator<<(std::ostream& (*func)(std::ostream&) ) { return (*m_stream) << func; } + +private: + using stream_dtor_t = void (*)(std::ostream*&); + + const std::string m_name = {}; + std::ostream* m_stream = nullptr; + stream_dtor_t m_dtor = [](std::ostream*&) {}; +}; + +auto& +get_hsa_api_file() +{ + static auto _v = output_file{"hsa_api_trace"}; + return _v; +} + +auto& +get_kernel_trace_file() +{ + static auto _v = output_file{"kernel_trace"}; + return _v; +} + +std::shared_mutex kernel_data_mutex; +std::unordered_map kernel_data; + +struct hsa_api_trace_entry_t +{ + std::atomic valid; + rocprofiler_callback_tracing_record_t record; + rocprofiler_timestamp_t begin_timestamp; + rocprofiler_timestamp_t end_timestamp; + std::string_view api_name; + + hsa_api_trace_entry_t(rocprofiler_timestamp_t begin, + rocprofiler_timestamp_t end, + rocprofiler_callback_tracing_record_t tracer_record, + std::string_view name + + ) + : valid(TRACE_ENTRY_INIT) + , record(tracer_record) + , begin_timestamp(begin) + , end_timestamp(end) + , api_name(name) + {} +}; + +TraceBuffer hsa_api_buffer("HSA API", + 0x200000, + [](hsa_api_trace_entry_t* entry) { + TracerFlushRecord( + entry, + ROCPROFILER_CALLBACK_TRACING_HSA_API); + }); + +rocprofiler_tool_callback_name_info_t name_info; + +void +tool_fini(void* tool_data) +{ + (void) (tool_data); +} + +void +TracerFlushRecord(void* data, rocprofiler_callback_tracing_kind_t kind) +{ + if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API) + { + auto* entry = reinterpret_cast(data); + get_hsa_api_file() << entry->api_name << " " << entry->begin_timestamp << ":" + << entry->end_timestamp << " " << entry->record.correlation_id.internal + << '\n'; + } +} +void +rocprofiler_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* data) +{ + if(record.kind == ROCPROFILER_CALLBACK_TRACING_HIP_API) + { + // To be implemented + throw std::runtime_error{"not implemented"}; + } + + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_HSA_API) + { + auto timestamp = rocprofiler_timestamp_t{}; + ROCPROFILER_CALL(rocprofiler_get_timestamp(×tamp), "timestamp failed"); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + { + user_data->value = timestamp; + } + + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + { + const auto* info_name_str = name_info.operation_names[record.kind][record.operation]; + hsa_api_trace_entry_t& entry = + hsa_api_buffer.Emplace(user_data->value, timestamp, record, info_name_str); + entry.valid.store(TRACE_ENTRY_COMPLETE, std::memory_order_release); + } + } + + else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_API) + { + // To be implemented + throw std::runtime_error{"not implemented"}; + } + (void) (data); +} + +void +code_object_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* data) +{ + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_LOAD) + { + if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + { + // flush the buffer to ensure that any lookups for the client kernel names for the code + // object are completed + auto flush_status = rocprofiler_flush_buffer(tool_buffer); + if(flush_status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY) + ROCPROFILER_CALL(flush_status, "buffer flush"); + } + } + + if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT && + record.operation == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) + { + auto* sym_data = + static_cast( + record.payload); + + if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD) + { + auto kernel_name = + std::regex_replace(sym_data->kernel_name, std::regex{"(\\.kd)$"}, ""); + int demangle_status = 0; + kernel_name = cxa_demangle(kernel_name, &demangle_status); + std::unique_lock lock(kernel_data_mutex); + kernel_data.emplace(sym_data->kernel_id, kernel_name); + } + // The map entry cannot be erased here + // since we are tracing the kernel symbols here not the kernel dispatch + // else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD) + //{ + + // kernel_data.erase(data->kernel_id); + + //} + } + + (void) user_data; + (void) data; +} + +void +kernel_tracing_callback(rocprofiler_context_id_t context, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* user_data, + uint64_t /*drop_count*/) +{ + if(num_headers == 0) + throw std::runtime_error{ + "rocprofiler invoked a buffer callback with no headers. this should never happen"}; + else if(headers == nullptr) + throw std::runtime_error{"rocprofiler invoked a buffer callback with a null pointer to the " + "array of headers. this should never happen"}; + + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + + if(header == nullptr) + { + throw std::runtime_error{ + "rocprofiler provided a null pointer to header. this should never happen"}; + } + else if(header->hash != + rocprofiler_record_header_compute_hash(header->category, header->kind)) + { + throw std::runtime_error{"rocprofiler_record_header_t (category | kind) != hash"}; + } + else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH) + { + auto* record = + static_cast(header->payload); + std::string kernel_name; + { + std::shared_lock lock(kernel_data_mutex); + kernel_name = kernel_data.at(record->kernel_id); + } + + get_kernel_trace_file() + << "agent_id=" << record->agent_id.handle + << ", queue_id=" << record->queue_id.handle << ", kernel_id=" << record->kernel_id + << ", kernel=" << kernel_name << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal + << ", extern_cid=" << record->correlation_id.external.value + << ", kind=" << record->kind << ", start=" << record->start_timestamp + << ", stop=" << record->end_timestamp + << ", private_segment_size=" << record->private_segment_size + << ", group_segment_size=" << record->group_segment_size << ", workgroup_size=(" + << record->workgroup_size.x << "," << record->workgroup_size.y << "," + << record->workgroup_size.z << "), grid_size=(" << record->grid_size.x << "," + << record->grid_size.y << "," << record->grid_size.z << ")" << '\n'; + } + } + (void) (user_data); +} + +rocprofiler_tool_callback_name_info_t +get_callback_id_names() +{ + auto cb_name_info = rocprofiler_tool_callback_name_info_t{}; + // + // callback for each kind operation + // + static auto tracing_kind_operation_cb = + [](rocprofiler_callback_tracing_kind_t kindv, uint32_t operation, void* data_v) { + auto* name_info_v = static_cast(data_v); + + if(kindv == ROCPROFILER_CALLBACK_TRACING_HSA_API) + { + const char* name = nullptr; + ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( + kindv, operation, &name, nullptr), + "query callback failed"); + if(name) name_info_v->operation_names[kindv][operation] = name; + } + + if(kindv == ROCPROFILER_CALLBACK_TRACING_HIP_API) + { + const char* name = nullptr; + ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_operation_name( + kindv, operation, &name, nullptr), + "query callback failed"); + if(name) name_info_v->operation_names[kindv][operation] = name; + } + return 0; + }; + + // + // callback for each callback kind (i.e. domain) + // + static auto tracing_kind_cb = [](rocprofiler_callback_tracing_kind_t kind, void* data) { + // store the callback kind name + auto* name_info_v = static_cast(data); + const char* name = nullptr; + ROCPROFILER_CALL(rocprofiler_query_callback_tracing_kind_name(kind, &name, nullptr), + "query callback failed"); + + if(name) name_info_v->kind_names[kind] = name; + + if(kind == ROCPROFILER_CALLBACK_TRACING_HSA_API) + { + ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kind_operations( + kind, tracing_kind_operation_cb, static_cast(data)), + "query callback failed"); + } + return 0; + }; + + ROCPROFILER_CALL(rocprofiler_iterate_callback_tracing_kinds(tracing_kind_cb, + static_cast(&cb_name_info)), + "iterate_callback failed"); + + return cb_name_info; +} + +int +tool_init(rocprofiler_client_finalize_t /*fini_func*/, void* tool_data) +{ + // Add the rocporfiler_call macro + rocprofiler_context_id_t context_id; + + name_info = get_callback_id_names(); + + ROCPROFILER_CALL(rocprofiler_create_context(&context_id), "create context failed"); + + if(common::get_env("ROCPROFILER_KERNEL_TRACE", false)) + { + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(context_id, + ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, + nullptr, + 0, + code_object_tracing_callback, + nullptr), + "tracing configure failed"); + + ROCPROFILER_CALL(rocprofiler_create_buffer(context_id, + 4096, + 2048, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + kernel_tracing_callback, + tool_data, + &tool_buffer), + "buffer creation"); + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service( + context_id, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, tool_buffer), + "buffer tracing service for kernel dispatch configure"); + } + + if(common::get_env("ROCPROFILER_HSA_API_TRACE", false)) + { + // Requesting all operations + ROCPROFILER_CALL( + rocprofiler_configure_callback_tracing_service(context_id, + ROCPROFILER_CALLBACK_TRACING_HSA_API, + nullptr, + 0, + rocprofiler_tracing_callback, + nullptr), + "tracing configure failed"); + } + + ROCPROFILER_CALL(rocprofiler_start_context(context_id), "start context failed"); + + return 0; +} + +extern "C" rocprofiler_tool_configure_result_t* +rocprofiler_configure(uint32_t /*version*/, + const char* /*runtime_version*/, + uint32_t priority, + rocprofiler_client_id_t* id) +{ + // only activate if main tool + if(priority > 0) return nullptr; + + // set the client name + id->name = "rocporfiler-tool"; + + // store client info + // client::client_id = id; + + // create configure data + static auto cfg = rocprofiler_tool_configure_result_t{ + sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr}; + + // return pointer to configure data + return &cfg; + // data passed around all the callbacks +} diff --git a/source/lib/rocprofiler-tool/trace_buffer.hpp b/source/lib/rocprofiler-tool/trace_buffer.hpp new file mode 100644 index 0000000000..dfe9d9843f --- /dev/null +++ b/source/lib/rocprofiler-tool/trace_buffer.hpp @@ -0,0 +1,318 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class TraceBufferBase +{ +public: + static void FlushAll() + { + std::lock_guard lock(mutex_); + + for(auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_) + trace_buffer->Flush(); + } + + static void Register(TraceBufferBase* elem) + { + std::lock_guard lock(mutex_); + + auto** prev_ptr = &head_; + while(*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_) + prev_ptr = &(*prev_ptr)->next_; + + elem->next_ = *prev_ptr; + *prev_ptr = elem; + } + + static void Unregister(TraceBufferBase* elem) + { + std::lock_guard lock(mutex_); + + auto** prev_ptr = &head_; + while(*prev_ptr != nullptr && *prev_ptr != elem) + prev_ptr = &(*prev_ptr)->next_; + + assert(*prev_ptr != nullptr && "elem is not in the list"); + *prev_ptr = elem->next_; + } + + TraceBufferBase(std::string name, int priority) + : name_(std::move(name)) + , priority_(priority) + , next_(nullptr) + {} + + TraceBufferBase(const TraceBufferBase&) = delete; + TraceBufferBase& operator=(const TraceBufferBase&) = delete; + + virtual ~TraceBufferBase() { Unregister(this); } + + virtual void Flush() = 0; + + std::string name() && { return std::move(name_); } + const std::string& name() const& { return name_; } + +private: + const std::string name_; + const int priority_; + TraceBufferBase* next_; + + static TraceBufferBase* head_; + static std::mutex mutex_; +}; + +enum TraceEntryState +{ + TRACE_ENTRY_INVALID = 0, + TRACE_ENTRY_INIT = 1, + TRACE_ENTRY_COMPLETE = 2 +}; + +template > +class TraceBuffer : protected TraceBufferBase +{ +public: + using callback_t = std::function; + + TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0) + : TraceBufferBase(std::move(name), priority) + , flush_callback_(std::move(flush_callback)) + , size_(size) + { + assert(size_ != 0 && "cannot create an empty trace buffer"); + + Entry* write_buffer = allocator_.allocate(size_); + assert(write_buffer != nullptr); + buffer_list_.push_back(write_buffer); + + read_index_ = 0; + write_index_ = {0, write_buffer}; + + AllocateFreeBuffer(); + + // Add this instance to the link list of all trace buffers in the process. + Register(this); + } + + ~TraceBuffer() override + { + // Flush the remaining records. After flushing, there should not be any records left in the + // trace buffer. + Flush(); + assert(read_index_ == write_index_.load().index); + + // Acquire both the writer and worker lock as we are accessing shared variables they + // protect. + std::unique_lock writer_lock(write_mutex_, std::defer_lock); + std::unique_lock worker_lock(worker_mutex_, std::defer_lock); + std::lock(writer_lock, worker_lock); + + // Deallocate the buffers. + allocator_.deallocate(write_index_.load().buffer, size_); + allocator_.deallocate(free_buffer_, size_); + + // Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional + // after waking up, and exits if it does not have a value. + if(worker_thread_) + { + std::thread worker_thread = std::move(worker_thread_.value()); + { + // Tell the worker thread loop to exit. + worker_thread_.reset(); + free_buffer_ = nullptr; + worker_cond_.notify_one(); + } + // Release the worker lock to allow the worker thread to exit. + worker_lock.unlock(); + worker_thread.join(); + } + } + + // Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are + // monotonically increasing indices, with read_pointer % size always indexing inside the first + // buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with + // the next invocation after changing its state to 'complete'. + void Flush() override + { + std::lock_guard lock(write_mutex_); + auto write_index = write_index_.load(std::memory_order_relaxed); + + for(auto it = buffer_list_.begin(); it != buffer_list_.end();) + { + auto end_of_buffer = read_index_ - read_index_ % size_ + size_; + + while(read_index_ < std::min(write_index.index, end_of_buffer)) + { + Entry* entry = &(*it)[read_index_ % size_]; + + // The entry is not yet complete, stop flushing here. + if(entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return; + + flush_callback_(entry); + entry->~Entry(); + + ++read_index_; + } + + // The buffer is still in use or the read pointer did not reach the end of the buffer. + if(*it == write_index.buffer || read_index_ != end_of_buffer) return; + + // All entries in the current buffer are now processed. Destroy the buffer and move onto + // the next buffer in the list. + allocator_.deallocate(*it, size_); + it = buffer_list_.erase(it); + } + } + + template + Entry& Emplace(Args... args) + { + return *new(GetEntry()) Entry(std::forward(args)...); + } + +private: + Entry* GetEntry() + { + auto current = write_index_.load(std::memory_order_relaxed); + + while(true) + { + // If the pointer is at the end of the current buffer, switch to the available free + // buffer and notify the worker thread to allocate a new buffer. + if(current.index != 0 && current.index % size_ == 0) + { + std::lock_guard lock(write_mutex_); + + // If the worker thread wasn't already started, start it now. This avoids starting a + // new thread when the trace buffer is created. + if(!worker_thread_) + { + std::promise ready; + auto future = ready.get_future(); + { + std::lock_guard worker_lock(worker_mutex_); + worker_thread_.emplace( + &TraceBuffer::WorkerThreadLoop, this, std::move(ready)); + } + future.wait(); + } + + // Re-check the pointer overflow under the writer lock, another thread could have + // beaten us to it and already bumped the write_index_. + current = write_index_.load(std::memory_order_relaxed); + if(current.index % size_ == 0) + { + std::unique_lock worker_lock(worker_mutex_); + + // Wait for the free buffer to become available. + worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; }); + + current.buffer = free_buffer_; + buffer_list_.push_back(current.buffer); + write_index_.store({current.index + 1, current.buffer}, + std::memory_order_relaxed); + + // Tell the worker thread to allocate a new free buffer. + free_buffer_ = nullptr; + worker_cond_.notify_one(); + + // We successfully allocated a new buffer, return the first element. + return ¤t.buffer[0]; + } + } + + if(write_index_.compare_exchange_weak( + current, {current.index + 1, current.buffer}, std::memory_order_relaxed)) + return ¤t.buffer[current.index % size_]; + } + } + + void AllocateFreeBuffer() + { + assert(free_buffer_ == nullptr); + + free_buffer_ = allocator_.allocate(size_); + assert(free_buffer_ != nullptr); + + for(size_t i = 0; i < size_; ++i) + free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed); + } + + void WorkerThreadLoop(std::promise ready) + { + std::unique_lock lock(worker_mutex_); + + // This worker thread is now ready to accept work. + ready.set_value(); + + while(true) + { + worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; }); + if(!worker_thread_) break; + AllocateFreeBuffer(); + worker_cond_.notify_one(); + } + } + + // The WriteIndex is used to store both the index and the buffer associated with that index (the + // buffer contains the trace buffer records at [index - index % size, index - index % size_t + + // size_ - 1]) in a single atomic variable. + struct WriteIndex + { + uint64_t index; + Entry* buffer; + }; + + const callback_t flush_callback_; + const uint64_t size_; + + uint64_t read_index_; // The index of the next record to flush. + std::atomic write_index_; // The index of the next record that could be written. + Entry* free_buffer_{nullptr}; // The next available free buffer. + + std::optional worker_thread_; + std::mutex worker_mutex_; + std::condition_variable worker_cond_; + + std::mutex write_mutex_; + std::list buffer_list_; + Allocator allocator_; +}; + +#define TRACE_BUFFER_INSTANTIATE() \ + TraceBufferBase* TraceBufferBase::head_ = nullptr; \ + std::mutex TraceBufferBase::mutex_; diff --git a/source/lib/rocprofiler/CMakeLists.txt b/source/lib/rocprofiler/CMakeLists.txt index c75c89aa73..37ee103c8d 100644 --- a/source/lib/rocprofiler/CMakeLists.txt +++ b/source/lib/rocprofiler/CMakeLists.txt @@ -45,9 +45,12 @@ target_link_libraries( rocprofiler-object-library PUBLIC rocprofiler::rocprofiler-headers rocprofiler::rocprofiler-hsa-runtime rocprofiler::rocprofiler-hip - PRIVATE rocprofiler::rocprofiler-build-flags rocprofiler::rocprofiler-memcheck - rocprofiler::rocprofiler-common-library rocprofiler::rocprofiler-stdcxxfs - rocprofiler::rocprofiler-dl rocprofiler::rocprofiler-amd-comgr) + PRIVATE rocprofiler::rocprofiler-build-flags + rocprofiler::rocprofiler-memcheck + rocprofiler::rocprofiler-common-library + rocprofiler::rocprofiler-cxx-filesystem + rocprofiler::rocprofiler-dl + rocprofiler::rocprofiler-amd-comgr) target_compile_definitions(rocprofiler-object-library PRIVATE rocprofiler_EXPORTS=1) @@ -71,7 +74,7 @@ target_link_libraries( PRIVATE rocprofiler::rocprofiler-build-flags rocprofiler::rocprofiler-memcheck rocprofiler::rocprofiler-common-library - rocprofiler::rocprofiler-stdcxxfs + rocprofiler::rocprofiler-cxx-filesystem rocprofiler::rocprofiler-dl rocprofiler::rocprofiler-amd-comgr rocprofiler::rocprofiler-object-library) diff --git a/source/lib/rocprofiler/agent.cpp b/source/lib/rocprofiler/agent.cpp index d15ba6ecd3..40265c3edf 100644 --- a/source/lib/rocprofiler/agent.cpp +++ b/source/lib/rocprofiler/agent.cpp @@ -24,6 +24,7 @@ #include #include +#include "lib/common/filesystem.hpp" #include "lib/rocprofiler/agent.hpp" #include "lib/rocprofiler/hsa/agent_cache.hpp" @@ -33,7 +34,6 @@ #include #include -#include #include #include #include @@ -43,14 +43,14 @@ #include #include +namespace fs = rocprofiler::common::filesystem; + namespace rocprofiler { namespace agent { namespace { -namespace fs = ::std::filesystem; - struct cpu_info { long processor = -1; @@ -737,9 +737,9 @@ construct_agent_cache(::HsaApiTable* table) }; auto is_duplicate = [](const auto* agent_v) { - for(const auto& itr : get_agent_caches()) + for(const auto& aitr : get_agent_caches()) { - if(itr == agent_v) return true; + if(aitr == agent_v) return true; } return false; }; diff --git a/source/lib/rocprofiler/aql/tests/aql_test.cpp b/source/lib/rocprofiler/aql/tests/aql_test.cpp index ed914ec370..f39a58e6c5 100644 --- a/source/lib/rocprofiler/aql/tests/aql_test.cpp +++ b/source/lib/rocprofiler/aql/tests/aql_test.cpp @@ -43,12 +43,17 @@ namespace rocprofiler AmdExtTable get_ext_table() { - return {.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info, - .hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools, - .hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate, - .hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free, - .hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info, - .hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access}; + static auto _v = []() { + auto val = AmdExtTable{}; + val.hsa_amd_memory_pool_get_info_fn = hsa_amd_memory_pool_get_info; + val.hsa_amd_agent_iterate_memory_pools_fn = hsa_amd_agent_iterate_memory_pools; + val.hsa_amd_memory_pool_allocate_fn = hsa_amd_memory_pool_allocate; + val.hsa_amd_memory_pool_free_fn = hsa_amd_memory_pool_free; + val.hsa_amd_agent_memory_pool_get_info_fn = hsa_amd_agent_memory_pool_get_info; + val.hsa_amd_agents_allow_access_fn = hsa_amd_agents_allow_access; + return val; + }(); + return _v; } auto diff --git a/source/lib/rocprofiler/counters/metrics.cpp b/source/lib/rocprofiler/counters/metrics.cpp index aa4412b5f6..7ae04f56fe 100644 --- a/source/lib/rocprofiler/counters/metrics.cpp +++ b/source/lib/rocprofiler/counters/metrics.cpp @@ -24,6 +24,8 @@ #include +#include "lib/common/defines.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include "lib/common/xml.hpp" @@ -36,7 +38,6 @@ #include #include #include -#include #include namespace rocprofiler @@ -143,7 +144,7 @@ findViaInstallPath(const std::string& filename) DLOG(INFO) << filename << " is being looked up via install path"; if(dladdr(reinterpret_cast(rocprofiler_query_available_agents), &dl_info) != 0) { - return std::filesystem::path{dl_info.dli_fname}.parent_path().parent_path() / + return common::filesystem::path{dl_info.dli_fname}.parent_path().parent_path() / fmt::format("share/rocprofiler/{}", filename); } return filename; @@ -155,7 +156,7 @@ findViaEnvironment(const std::string& filename) if(const char* metrics_path = nullptr; (metrics_path = getenv("ROCPROFILER_METRICS_PATH"))) { DLOG(INFO) << filename << " is being looked up via env variable ROCPROFILER_METRICS_PATH"; - return std::filesystem::path{std::string{metrics_path}} / filename; + return common::filesystem::path{std::string{metrics_path}} / filename; } // No environment variable, lookup via install path return findViaInstallPath(filename); diff --git a/source/lib/rocprofiler/counters/parser/tests/CMakeLists.txt b/source/lib/rocprofiler/counters/parser/tests/CMakeLists.txt index a1c256772b..8abf59cf53 100644 --- a/source/lib/rocprofiler/counters/parser/tests/CMakeLists.txt +++ b/source/lib/rocprofiler/counters/parser/tests/CMakeLists.txt @@ -6,7 +6,9 @@ set(ROCPROFILER_LIB_PARSER_TEST_SOURCES "parser_test.cpp") add_executable(parser-test) -target_sources(parser-test PRIVATE ${ROCPROFILER_LIB_PARSER_TEST_SOURCES}) +target_sources( + parser-test PRIVATE ${ROCPROFILER_LIB_PARSER_TEST_SOURCES} + $) target_link_libraries( parser-test diff --git a/source/lib/rocprofiler/hsa/agent_cache.cpp b/source/lib/rocprofiler/hsa/agent_cache.cpp index e721b431ae..67d6f052b7 100644 --- a/source/lib/rocprofiler/hsa/agent_cache.cpp +++ b/source/lib/rocprofiler/hsa/agent_cache.cpp @@ -23,12 +23,13 @@ #include "agent_cache.hpp" #include -#include #include #include #include #include +#include "lib/common/defines.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" diff --git a/source/lib/rocprofiler/hsa/queue_controller.cpp b/source/lib/rocprofiler/hsa/queue_controller.cpp index dea8d859ba..16722f7c9f 100644 --- a/source/lib/rocprofiler/hsa/queue_controller.cpp +++ b/source/lib/rocprofiler/hsa/queue_controller.cpp @@ -74,6 +74,10 @@ destroy_queue(hsa_queue_t* hsa_queue) get_queue_controller().destory_queue(hsa_queue); return HSA_STATUS_SUCCESS; } + +constexpr rocprofiler_agent_t default_agent = + rocprofiler_agent_t{sizeof(rocprofiler_agent_t), + rocprofiler_agent_id_t{std::numeric_limits::max()}}; } // namespace void @@ -87,7 +91,7 @@ QueueController::add_queue(hsa_queue_t* id, std::unique_ptr queue) for(const auto& [cbid, cb_tuple] : callbacks) { auto& [agent, qcb, ccb] = cb_tuple; - if(agent.id.handle == ALL_AGENTS.id.handle || agent.id.handle == agent_id) + if(agent.id.handle == default_agent.id.handle || agent.id.handle == agent_id) { map[id]->register_callback(cbid, qcb, ccb); } @@ -117,7 +121,7 @@ QueueController::add_callback(std::optional agent, } else { - cb_cache[client_id] = std::tuple(ALL_AGENTS, qcb, ccb); + cb_cache[client_id] = std::tuple(default_agent, qcb, ccb); } client_id++; diff --git a/source/lib/rocprofiler/hsa/queue_controller.hpp b/source/lib/rocprofiler/hsa/queue_controller.hpp index 7167f1bb5f..81ca9da6c4 100644 --- a/source/lib/rocprofiler/hsa/queue_controller.hpp +++ b/source/lib/rocprofiler/hsa/queue_controller.hpp @@ -66,8 +66,6 @@ public: const Queue* get_queue(const hsa_queue_t&) const; private: - static constexpr rocprofiler_agent_t ALL_AGENTS{ - .id = {.handle = std::numeric_limits::max()}}; using agent_callback_tuple_t = std::tuple; using queue_map_t = std::unordered_map>; diff --git a/source/lib/rocprofiler/internal_threading.cpp b/source/lib/rocprofiler/internal_threading.cpp index fbd96ce6bb..7c37ae584e 100644 --- a/source/lib/rocprofiler/internal_threading.cpp +++ b/source/lib/rocprofiler/internal_threading.cpp @@ -227,8 +227,10 @@ create_callback_thread() // this will be index after emplace_back auto idx = get_thread_pools().size(); - auto& thr_pool = get_thread_pools().emplace_back( - std::make_shared(thread_pool_config_t{.pool_size = 1})); + thread_pool_config_t pool_config = {}; + pool_config.pool_size = 1; + + auto& thr_pool = get_thread_pools().emplace_back(std::make_shared(pool_config)); if(!get_task_groups()) get_task_groups() = new task_group_vec_t{}; diff --git a/source/lib/rocprofiler/pc_sampling.cpp b/source/lib/rocprofiler/pc_sampling.cpp index 3a35fdd20c..75d3c7d7a1 100644 --- a/source/lib/rocprofiler/pc_sampling.cpp +++ b/source/lib/rocprofiler/pc_sampling.cpp @@ -36,7 +36,7 @@ consume_args(Tp&&...) extern "C" { rocprofiler_status_t rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id, - rocprofiler_agent_t agent, + rocprofiler_agent_id_t agent_id, rocprofiler_pc_sampling_method_t method, rocprofiler_pc_sampling_unit_t unit, uint64_t interval, @@ -45,7 +45,17 @@ rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t conte if(rocprofiler::registration::get_init_status() > 0) return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED; - consume_args(context_id, agent, method, unit, interval, buffer_id); + consume_args(context_id, agent_id, method, unit, interval, buffer_id); + return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; +} + +rocprofiler_status_t ROCPROFILER_API +rocprofiler_query_pc_sampling_agent_configurations( + rocprofiler_agent_id_t agent_id, + rocprofiler_available_pc_sampling_configurations_cb_t cb, + void* user_data) +{ + consume_args(agent_id, cb, user_data); return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED; } } diff --git a/source/lib/rocprofiler/pc_sampling/parser/tests/pcs_parser.cpp b/source/lib/rocprofiler/pc_sampling/parser/tests/pcs_parser.cpp index c9c43bac39..5f5ec1e28c 100644 --- a/source/lib/rocprofiler/pc_sampling/parser/tests/pcs_parser.cpp +++ b/source/lib/rocprofiler/pc_sampling/parser/tests/pcs_parser.cpp @@ -20,7 +20,12 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#ifdef NDEBUG +# undef NDEBUG +#endif + #include +#include #include #include "lib/rocprofiler/pc_sampling/parser/pc_record_interface.hpp" @@ -364,15 +369,16 @@ TEST(pcs_parser_correlation_id, queue_hammer) assert(all_allocations.size() == NUM_ACTIONS && "QueueHammer test: Incorrect number of callbacks"); - for(auto sb = 0ul; sb < all_allocations.size(); sb++) + for(auto& all_allocation : all_allocations) { - pcsample_v1_t* samples = all_allocations[sb].first; - size_t num_samples = all_allocations[sb].second; + pcsample_v1_t* samples = all_allocation.first; + size_t num_samples = all_allocation.second; assert(num_samples == NUM_QUEUES && "QueueHammer: Incorrect number of samples"); assert(check_samples(samples, num_samples) && "QueueHammer: parsed ID does not match correct ID"); delete[] samples; + (void) num_samples; } } @@ -414,6 +420,7 @@ TEST(pcs_parser_correlation_id, multi_buffer) delete[] all_allocations[0].first; delete[] all_allocations[1].first; + (void) sample; }; /** diff --git a/source/lib/rocprofiler/tests/details/agent.cpp b/source/lib/rocprofiler/tests/details/agent.cpp index 6291234444..07ead0db9e 100644 --- a/source/lib/rocprofiler/tests/details/agent.cpp +++ b/source/lib/rocprofiler/tests/details/agent.cpp @@ -21,10 +21,10 @@ // THE SOFTWARE. #include "lib/rocprofiler/tests/details/agent.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/utility.hpp" #include -#include #include #include @@ -64,8 +64,6 @@ } \ } -// namespace fs = std::filesystem; - namespace rocprofiler { namespace test diff --git a/source/lib/rocprofiler/tests/external_correlation.cpp b/source/lib/rocprofiler/tests/external_correlation.cpp index d3387dc686..4bf0242ade 100644 --- a/source/lib/rocprofiler/tests/external_correlation.cpp +++ b/source/lib/rocprofiler/tests/external_correlation.cpp @@ -23,7 +23,9 @@ #include #include +#include "lib/common/defines.hpp" #include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/units.hpp" #include "lib/common/utility.hpp" #include "rocprofiler/external_correlation.h" @@ -35,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/source/lib/rocprofiler/tests/intercept_table.cpp b/source/lib/rocprofiler/tests/intercept_table.cpp index 13cc106aba..63681af7f7 100644 --- a/source/lib/rocprofiler/tests/intercept_table.cpp +++ b/source/lib/rocprofiler/tests/intercept_table.cpp @@ -25,7 +25,9 @@ #include #include +#include "lib/common/defines.hpp" #include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/units.hpp" #include "lib/common/utility.hpp" @@ -37,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/source/lib/rocprofiler/tests/registration.cpp b/source/lib/rocprofiler/tests/registration.cpp index 84e349bfd2..de99cffbfc 100644 --- a/source/lib/rocprofiler/tests/registration.cpp +++ b/source/lib/rocprofiler/tests/registration.cpp @@ -23,7 +23,9 @@ #include #include +#include "lib/common/defines.hpp" #include "lib/common/environment.hpp" +#include "lib/common/filesystem.hpp" #include "lib/common/units.hpp" #include "lib/common/utility.hpp" @@ -35,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -233,7 +234,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, struct info_data { uint64_t num_args = 0; - std::stringstream arg_ss = {}; + std::stringstream arg_ss; } info_data_v; auto info_data_cb = [](rocprofiler_callback_tracing_kind_t, diff --git a/source/lib/tests/buffering/buffering-save-load.cpp b/source/lib/tests/buffering/buffering-save-load.cpp index 5abdbf837d..3aef8f7ef9 100644 --- a/source/lib/tests/buffering/buffering-save-load.cpp +++ b/source/lib/tests/buffering/buffering-save-load.cpp @@ -111,11 +111,7 @@ launch_threads(record_header_buffer_t& _buf, mpl::type_list, std::index_sequence _seq) { - ((std::thread{[_seq](auto* _buf_v, auto* _barrier_v) { launch(_buf_v, _barrier_v, _seq); }, - &_buf, - &_done_barrier} - .detach()), - ...); + ((std::thread{launch, &_buf, &_done_barrier, _seq}.detach()), ...); } // computes the size of every raw_array size for a given type diff --git a/tests/kernel-tracing/CMakeLists.txt b/tests/kernel-tracing/CMakeLists.txt index 7066470cd3..2ae41e7bfb 100644 --- a/tests/kernel-tracing/CMakeLists.txt +++ b/tests/kernel-tracing/CMakeLists.txt @@ -15,12 +15,13 @@ target_link_libraries( kernel-tracing-test-tool PRIVATE rocprofiler::rocprofiler rocprofiler::cereal $) -set_target_properties(kernel-tracing-test-tool PROPERTIES INSTALL_RPATH "\$ORIGIN" - INSTALL_RPATH_USE_LINK_PATH ON) +set_target_properties( + kernel-tracing-test-tool PROPERTIES INSTALL_RPATH "\$ORIGIN:\$ORIGIN/.." + INSTALL_RPATH_USE_LINK_PATH ON) install( TARGETS kernel-tracing-test-tool - DESTINATION lib + DESTINATION lib/rocprofiler COMPONENT rocprofiler-test-libs) if(ROCPROFILER_MEMCHECK_PRELOAD_ENV) diff --git a/tests/kernel-tracing/kernel-tracing.cpp b/tests/kernel-tracing/kernel-tracing.cpp index 789b0d7c5c..ef3f6dcc8d 100644 --- a/tests/kernel-tracing/kernel-tracing.cpp +++ b/tests/kernel-tracing/kernel-tracing.cpp @@ -31,6 +31,7 @@ * @brief Test rocprofiler tool */ +#include "lib/common/filesystem.hpp" #include "serialization.hpp" #include @@ -50,7 +51,6 @@ #include #include #include -#include #include #include #include