Adding rocprofilerv2

Change-Id: Ic0cc280ba207d2b8f6ccae1cd4ac3184152fc1ad


[ROCm/rocprofiler commit: 8032adb64f]
This commit is contained in:
Ammar ELWazir
2023-02-03 12:31:39 -06:00
والد b23a2f3029
کامیت de4abd0d0f
263فایلهای تغییر یافته به همراه607729 افزوده شده و 307 حذف شده
@@ -0,0 +1,60 @@
---
Language: Cpp
BasedOnStyle: Google
AccessModifierOffset: -1
ConstructorInitializerIndentWidth: 4
AlignEscapedNewlinesLeft: false
AlignTrailingComments: true
AlignConsecutiveAssignments: false
AlignOperands: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: false
AlwaysBreakBeforeMultilineStrings: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BinPackParameters: true
ColumnLimit: 100
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
IndentWrappedFunctionNames: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 2
KeepEmptyLinesAtTheStartOfBlocks: false
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakString: 1000
PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
DerivePointerAlignment: false
PointerAlignment: Left
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: true
Standard: Auto
IndentWidth: 2
TabWidth: 8
UseTab: Never
BreakBeforeBraces: Attach
SpacesInParentheses: false
SpacesInAngles: false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
CommentPragmas: '^ IWYU pragma:'
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
SpaceBeforeParens: ControlStatements
DisableFormat: false
SortIncludes: false
...
@@ -0,0 +1,4 @@
build
compile_commands.json
.cache
.DS_Store
@@ -20,18 +20,32 @@
# THE SOFTWARE.
################################################################################
cmake_minimum_required ( VERSION 2.8.12 )
## Verbose output.
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
cmake_minimum_required ( VERSION 3.18.0 )
## Set module name and project name.
set ( ROCPROFILER_NAME "rocprofiler" )
set ( ROCPROFILER_TARGET "${ROCPROFILER_NAME}64" )
set ( ROCPROFILER_LIBRARY "lib${ROCPROFILER_TARGET}" )
project ( ${ROCPROFILER_NAME} )
project(${ROCPROFILER_NAME} VERSION 2.0.0)
include(GNUInstallDirs)
# set default ROCM_PATH
if(NOT DEFINED ROCM_PATH)
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory")
endif()
## Build is not supported on Windows plaform
if(WIN32)
message(FATAL_ERROR "Windows build is not supported.")
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_compile_options(-Wall)
set(THREADS_PREFER_PTHREAD_FLAG ON)
## Adding default path cmake modules
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
## Include common cmake modules
@@ -40,7 +54,7 @@ include ( utils )
include ( env )
## Setup the package version.
get_version ( "1.0.0" )
get_version ( "2.0.0" )
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
@@ -68,11 +82,39 @@ set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
set ( LIB_DIR "${ROOT_DIR}/src" )
set ( TEST_DIR "${ROOT_DIR}/test" )
find_package(amd_comgr REQUIRED CONFIG HINTS ${CMAKE_INSTALL_PREFIX} PATHS ${ROCM_PATH} PATH_SUFFIXES lib/cmake/amd_comgr)
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
link_libraries(amd_comgr)
find_package(Threads REQUIRED)
find_package(hsa-runtime64 REQUIRED CONFIG HINTS ${CMAKE_INSTALL_PREFIX} PATHS ${ROCM_PATH})
find_package(HIP REQUIRED CONFIG HINTS ${CMAKE_INSTALL_PREFIX} PATHS ${ROCM_PATH})
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HSA_H hsa.h
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hsa
NO_DEFAULT_PATH
REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
include_directories(${HSA_RUNTIME_INC_PATH})
if(NOT DEFINED LIBRARY_TYPE)
set(LIBRARY_TYPE SHARED)
endif()
## Enable tracing API
if (NOT USE_PROF_API)
set(USE_PROF_API 1)
endif()
configure_file(rocprofv2 ${PROJECT_BINARY_DIR} COPYONLY)
install(FILES
${PROJECT_SOURCE_DIR}/rocprofv2
DESTINATION ${CMAKE_INSTALL_BINDIR}
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
COMPONENT runtime)
# Protocol header lookup
set(PROF_API_HEADER_NAME prof_protocol.h)
if(USE_PROF_API EQUAL 1)
@@ -87,26 +129,27 @@ if(USE_PROF_API EQUAL 1)
if(NOT PROF_API_HEADER_DIR)
MESSAGE(FATAL_ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=<path to ${PROF_API_HEADER_NAME} header>")
else()
add_definitions(-DUSE_PROF_API=1)
include_directories(${PROF_API_HEADER_DIR})
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}/${PROF_API_HEADER_NAME}")
endif()
endif()
## Build library
include ( ${LIB_DIR}/CMakeLists.txt )
## Build libraries
add_subdirectory(src)
## Set the VERSION and SOVERSION values
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
if(${LIBRARY_TYPE} STREQUAL SHARED)
## Build samples
add_subdirectory(samples)
## If the library is a release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
add_custom_command ( TARGET ${ROCPROFILER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
endif ()
## Build tests
add_subdirectory(tests)
endif()
## Build Plugins
add_subdirectory(plugin)
## Build tests
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
add_subdirectory(${TEST_DIR} ${PROJECT_BINARY_DIR}/test)
## Installation and packaging
set ( DEST_NAME ${ROCPROFILER_NAME} )
@@ -130,18 +173,8 @@ message ( "CMake-install-prefix: ${CMAKE_INSTALL_PREFIX}" )
message ( "CPack-install-prefix: ${CPACK_PACKAGING_INSTALL_PREFIX}" )
message ( "-----------Dest-name: ${DEST_NAME}" )
## set components
set ( CPACK_COMPONENTS_ALL runtime dev )
## Enable Component Install
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_DEB_COMPONENT_INSTALL ON)
## Install libraries: Non versioned lib file in dev package
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY )
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP )
## Install headers
install ( FILES
${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h
${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${DEST_NAME}
COMPONENT dev )
@@ -178,77 +211,114 @@ install ( FILES ${PROJECT_BINARY_DIR}/test/rocprof-ctrl DESTINATION ${CMAKE_INST
COMPONENT runtime )
# File reorg Backward compatibility
option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
if(FILE_REORG_BACKWARD_COMPATIBILITY)
include (rocprofiler-backward-compat.cmake)
endif()
# option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
# if(FILE_REORG_BACKWARD_COMPATIBILITY)
# include (rocprofiler-backward-compat.cmake)
# endif()
## Packaging directives
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR e.g. DEB;RPM" )
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
set ( CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." )
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}" )
set ( CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.ROCm-Profiler.support@amd.com>" )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCPROFILER library for AMD HSA runtime API extension support" )
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" )
if(${LIBRARY_TYPE} STREQUAL SHARED)
# # Installation and packaging
if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
get_filename_component(DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME)
get_filename_component(DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY)
set(CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR})
endif()
# Install license file
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime)
message("-----------Dest-name: ${DEST_NAME}")
message("------Install-prefix: ${CMAKE_INSTALL_PREFIX}")
message("-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}")
if ( DEFINED ENV{ROCM_LIBPATCH_VERSION} )
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}" )
message ( "Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}" )
endif()
## Packaging directives
set(CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR DEB;RPM")
set(ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
set(CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.ROCm-Profiler.support@amd.com>")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCMTOOLS library")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
## Debian package specific variables
if ( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
else()
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
endif()
message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" )
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
message("Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}")
endif()
# # Install license file
install(FILES ${CPACK_RESOURCE_FILE_LICENSE}
DESTINATION ${CMAKE_INSTALL_DOCDIR}
COMPONENT runtime)
# # Debian package specific variables
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
else()
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
endif()
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
set(CPACK_DEB_COMPONENT_INSTALL ON)
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core")
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev" )
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core" )
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev, hsa-rocr-dev, rocm-core")
set(CPACK_DEBIAN_SAMPLES_PACKAGE_NAME "${PROJECT_NAME}-samples")
set(CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS "${PROJECT_NAME}-dev, hsa-rocr-dev, rocm-core")
set(CPACK_DEBIAN_DOCS_PACKAGE_NAME "${PROJECT_NAME}-docs")
set(CPACK_DEBIAN_DOCS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev, hsa-rocr-dev, rocm-core")
set(CPACK_DEBIAN_PLUGINS_PACKAGE_NAME "${PROJECT_NAME}-plugins")
set(CPACK_DEBIAN_PLUGINS_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
set ( CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${ROCPROFILER_NAME}" )
set ( CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core" )
set ( CPACK_DEBIAN_DEV_PACKAGE_NAME "${ROCPROFILER_NAME}-dev" )
set ( CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${ROCPROFILER_NAME}, hsa-rocr-dev, rocm-core" )
## Process the Debian install/remove scripts to update the CPACK variables
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )
set ( CPACK_DEBIAN_RUNTIME_PACKAGE_CONTROL_EXTRA "DEBIAN/postinst;DEBIAN/prerm" )
## RPM package specific variables
if ( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
else()
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
endif()
message ( "Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}" )
# # RPM package specific variables
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
else()
set(CPACK_RPM_PACKAGE_RELEASE "local")
endif()
set( CPACK_RPM_PACKAGE_LICENSE "MIT" )
message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}")
## 'dist' breaks manual builds on debian systems due to empty Provides
execute_process( COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE )
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
endif()
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
# # 'dist' breaks manual builds on debian systems due to empty Provides
execute_process(COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE)
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
endif()
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "hsa-rocr-dev, rocm-core")
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-dev")
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-dev")
set(CPACK_RPM_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, hsa-rocr-dev, rocm-core")
set(CPACK_RPM_SAMPLES_PACKAGE_NAME "${PROJECT_NAME}-samples")
set(CPACK_RPM_SAMPLES_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, hsa-rocr-dev, rocm-core")
set(CPACK_RPM_DOCS_PACKAGE_NAME "${PROJECT_NAME}-docs")
set(CPACK_RPM_DOCS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, hsa-rocr-dev, rocm-core")
set(CPACK_RPM_PLUGINS_PACKAGE_NAME "${PROJECT_NAME}-plugins")
set(CPACK_RPM_PLUGINS_PACKAGE_REQUIRES "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}")
set ( CPACK_RPM_RUNTIME_PACKAGE_NAME "${ROCPROFILER_NAME}" )
set ( CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "hsa-rocr-devel, rocm-core" )
set ( CPACK_RPM_DEV_PACKAGE_NAME "${ROCPROFILER_NAME}-devel" )
set ( CPACK_RPM_DEV_PACKAGE_REQUIRES "${ROCPROFILER_NAME}, hsa-rocr-devel, rocm-core" )
set ( CPACK_RPM_DEV_PACKAGE_PROVIDES "${ROCPROFILER_NAME}-dev" )
set ( CPACK_RPM_DEV_PACKAGE_OBSOLETES "${ROCPROFILER_NAME}-dev" )
## Process the Rpm install/remove scripts to update the CPACK variables
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
@@ -256,11 +326,85 @@ configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
set ( CPACK_RPM_RUNTIME_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
set ( CPACK_RPM_RUNTIME_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )
# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_TESTS_PACKAGE_REQUIRES ${CPACK_RPM_TESTS_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_SAMPLES_PACKAGE_REQUIRES ${CPACK_RPM_SAMPLES_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DOCS_PACKAGE_REQUIRES ${CPACK_RPM_DOCS_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PLUGINS_PACKAGE_REQUIRES ${CPACK_RPM_PLUGINS_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS ${CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS ${CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DOCS_PACKAGE_DEPENDS ${CPACK_DEBIAN_DOCS_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PLUGINS_PACKAGE_DEPENDS ${CPACK_DEBIAN_PLUGINS_PACKAGE_DEPENDS})
endif()
set(CPACK_COMPONENTS_ALL runtime dev tests docs samples plugins)
include(CPack)
cpack_add_component(runtime
DISPLAY_NAME "Runtime"
DESCRIPTION "Dynamic libraries for the ROCProfiler")
cpack_add_component(dev
DISPLAY_NAME "Development"
DESCRIPTION "Development needed header files for ROCProfiler"
DEPENDS runtime)
cpack_add_component(plugins
DISPLAY_NAME "ROCProfile Plugins"
DESCRIPTION "Plugins for handling ROCProfiler data output"
DEPENDS runtime)
cpack_add_component(tests
DISPLAY_NAME "Tests"
DESCRIPTION "Tests for the ROCProfiler"
DEPENDS dev)
cpack_add_component(samples
DISPLAY_NAME "Samples"
DESCRIPTION "Samples for the ROCProfiler"
DEPENDS dev)
cpack_add_component(docs
DISPLAY_NAME "Documentation"
DESCRIPTION "Documentation for the ROCProfiler API"
DEPENDS dev)
endif()
include ( CPack )
find_package(Doxygen)
if(DOXYGEN_FOUND)
# # Set input and output files
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in)
set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
# # Request to configure the file
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html ${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
COMMAND make -C ${CMAKE_CURRENT_BINARY_DIR}/doc/latex pdf
MAIN_DEPENDENCY ${DOXYGEN_OUT} ${DOXYGEN_IN}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler_plugin.h
COMMENT "Generating documentation")
add_custom_target(doc DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html
${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf"
DESTINATION ${CMAKE_INSTALL_DOCDIR}
RENAME "${PROJECT_NAME}.pdf"
OPTIONAL
COMPONENT docs)
install(DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/doc/html/"
DESTINATION ${CMAKE_INSTALL_DATADIR}/html/${PROJECT_NAME}
OPTIONAL
COMPONENT docs)
endif()
+47 -13
مشاهده پرونده
@@ -22,19 +22,54 @@
# IN THE SOFTWARE.
################################################################################
SRC_DIR=`dirname $0`
TO_CLEAN=yes
SRC_DIR=$(dirname "$0")
COMPONENT="rocprofiler"
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
usage() {
echo -e "ROCProfiler Build Script Usage:"
echo -e "\nTo run ./run.sh PARAMs, PARAMs can be the following:"
echo -e "-h | --help For showing this message"
echo -e "-b | --build For compiling"
echo -e "-cb | --clean-build For full clean build"
echo -e "-act | --asan-clean-build For compiling with ASAN library attached"
exit 1
}
while [ 1 ] ; do
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
usage
exit 1
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
TO_CLEAN=no
shift
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
ASAN=True TO_CLEAN=yes
shift
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
TO_CLEAN=yes
shift
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
echo -e "Wrong option \"$1\", Please use the following options:\n"
usage
exit 1
else
break
fi
done
umask 022
if [ -z "$ROCPROFILER_ROOT" ]; then ROCPROFILER_ROOT=$SRC_DIR; fi
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$SRC_DIR/build; fi
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="${ROCM_PATH}/${COMPONENT}"; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH="${ROCM_PATH}/include/hsa:${ROCM_PATH}"; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
if [ -z "$ASAN" ] ; then ASAN=False; fi
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
@@ -43,15 +78,14 @@ mkdir -p $BUILD_DIR
pushd $BUILD_DIR
cmake \
-DCMAKE_MODULE_PATH=$ROCPROFILER_ROOT/cmake_modules \
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_PREFIX_PATH="${PREFIX_PATH}" \
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
-DCPACK_GENERATOR="${CPACKGEN:-"DEB;RPM"}" \
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
$ROCPROFILER_ROOT
make -j
make mytest
make package
exit 0
@@ -0,0 +1,60 @@
---
Language: Cpp
BasedOnStyle: Google
AccessModifierOffset: -1
ConstructorInitializerIndentWidth: 4
AlignEscapedNewlinesLeft: false
AlignTrailingComments: true
AlignConsecutiveAssignments: false
AlignOperands: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: false
AlwaysBreakBeforeMultilineStrings: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BinPackParameters: true
ColumnLimit: 100
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
IndentWrappedFunctionNames: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 2
KeepEmptyLinesAtTheStartOfBlocks: false
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakString: 1000
PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
DerivePointerAlignment: false
PointerAlignment: Left
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: true
Standard: Auto
IndentWidth: 2
TabWidth: 8
UseTab: Never
BreakBeforeBraces: Attach
SpacesInParentheses: false
SpacesInAngles: false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
CommentPragmas: '^ IWYU pragma:'
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
SpaceBeforeParens: ControlStatements
DisableFormat: false
SortIncludes: false
...
+91
مشاهده پرونده
@@ -0,0 +1,91 @@
#!/bin/bash -e
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
SRC_DIR=$(dirname "$0")
COMPONENT="rocmtools"
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
usage() {
echo -e "ROCMTools Build Script Usage:"
echo -e "\nTo run ./run.sh PARAMs, PARAMs can be the following:"
echo -e "-h | --help For showing this message"
echo -e "-b | --build For compiling"
echo -e "-cb | --clean-build For full clean build"
echo -e "-act | --asan-clean-build For compiling with ASAN library attached"
exit 1
}
while [ 1 ] ; do
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
usage
exit 1
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
TO_CLEAN=no
shift
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
ASAN=True TO_CLEAN=yes
shift
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
TO_CLEAN=yes
shift
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
echo -e "Wrong option \"$1\", Please use the following options:\n"
usage
exit 1
else
break
fi
done
umask 022
if [ -z "$ROCPROFILER_ROOT" ]; then ROCPROFILER_ROOT=$SRC_DIR; fi
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
if [ -z "$ASAN" ] ; then ASAN=False; fi
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
mkdir -p $BUILD_DIR
pushd $BUILD_DIR
cmake \
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
$ROCPROFILER_ROOT
make -j
exit 0
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,92 @@
# rocmtools
## Getting started
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
## Add your files
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
```
cd existing_repo
git remote add origin http://gitlab1.amd.com/vinodtipparaju/rocmtools.git
git branch -M main
git push -uf origin main
```
## Integrate with your tools
- [ ] [Set up project integrations](http://gitlab1.amd.com/vinodtipparaju/rocmtools/-/settings/integrations)
## Collaborate with your team
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
## Test and Deploy
Use the built-in continuous integration in GitLab.
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
***
# Editing this README
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
## Suggestions for a good README
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
## Name
Choose a self-explaining name for your project.
## Description
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
## Badges
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
## Visuals
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
## Installation
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
## Usage
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
## Support
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
## Roadmap
If you have ideas for releases in the future, it is a good idea to list them in the README.
## Contributing
State if you are open to contributions and what your requirements are for accepting them.
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
## Authors and acknowledgment
Show your appreciation to those who have contributed to the project.
## License
For open source projects, say how it is licensed.
## Project status
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
فایل باینری نشان داده نشده است.
@@ -0,0 +1,28 @@
# Try to find LIBDW
#
# Once found, this will define:
# - LIBDW_FOUND - system has libelf
# - LIBDW_INCLUDE_DIRS - the libelf include directory
# - LIBDW_LIBRARIES - Link these to use libelf
# - LIBDW_DEFINITIONS - Compiler switches required for using libelf
find_path(FIND_LIBDW_INCLUDES
NAMES
elfutils/libdw.h
PATHS
/usr/include
/usr/local/include)
find_library(FIND_LIBDW_LIBRARIES
NAMES
dw
PATH
/usr/lib
/usr/local/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LibDw DEFAULT_MSG
FIND_LIBDW_INCLUDES FIND_LIBDW_LIBRARIES)
mark_as_advanced(FIND_LIBDW_INCLUDES FIND_LIBDW_LIBRARIES)
set(LIBDW_INCLUDES ${FIND_LIBDW_INCLUDES})
set(LIBDW_LIBRARIES ${FIND_LIBDW_LIBRARIES})
@@ -0,0 +1,30 @@
# Try to find LIBELF
#
# Once found, this will define:
# - LIBELF_FOUND - system has libelf
# - LIBELF_INCLUDE_DIRS - the libelf include directory
# - LIBELF_LIBRARIES - Link these to use libelf
# - LIBELF_DEFINITIONS - Compiler switches required for using libelf
find_path(FIND_LIBELF_INCLUDES
NAMES
libelf.h
PATHS
/usr/include
/usr/include/libelf
/usr/local/include
/usr/local/include/libelf)
find_library(FIND_LIBELF_LIBRARIES
NAMES
elf
PATH
/usr/lib
/usr/local/lib)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LibElf DEFAULT_MSG
FIND_LIBELF_INCLUDES FIND_LIBELF_LIBRARIES)
mark_as_advanced(FIND_LIBELF_INCLUDES FIND_LIBELF_LIBRARIES)
set(LIBELF_INCLUDES ${FIND_LIBELF_INCLUDES})
set(LIBELF_LIBRARIES ${FIND_LIBELF_LIBRARIES})
@@ -0,0 +1,25 @@
################################################################################
## Copyright (c) 2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
add_subdirectory(file)
add_subdirectory(perfetto)
add_subdirectory(ctf)
@@ -0,0 +1 @@
README.html
@@ -0,0 +1,161 @@
################################################################################
## Copyright (c) 2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
# Plugin shared object.
add_library(ctf_plugin SHARED
ctf.cpp
plugin.cpp
barectf.c "${CMAKE_CURRENT_BINARY_DIR}/barectf.h"
${PROJECT_SOURCE_DIR}/src/utils/helper.cpp
hsa_begin.cpp.i hsa_end.cpp.i
hip_begin.cpp.i hip_end.cpp.i)
set_target_properties(ctf_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}")
set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf")
target_compile_definitions(ctf_plugin PRIVATE
HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_HCC__=1
CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata")
target_include_directories(ctf_plugin PRIVATE
"${PROJECT_SOURCE_DIR}/inc"
"${PROJECT_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/src/api"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_options(ctf_plugin PRIVATE
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
-Wl,--no-undefined)
target_link_libraries(ctf_plugin PRIVATE
${ROCPROFILER_TARGET}
hsa-runtime64::hsa-runtime64
systemd
stdc++fs
dl)
install(TARGETS ctf_plugin LIBRARY
DESTINATION "${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}"
COMPONENT runtime)
# `gen_api_files.py` and `gen_env_yaml.py` require Python 3,
# CppHeaderParser, PyYAML, and barectf.
find_package(Python3 COMPONENTS Interpreter REQUIRED)
message("Python: ${Python3_EXECUTABLE})")
execute_process(COMMAND Python3::Interpreter -c "print('hello')")
function(check_py3_pkg pkg_name)
execute_process(COMMAND "${Python3_EXECUTABLE}" -c "import ${pkg_name}"
RESULT_VARIABLE PY3_IMPORT_RES
OUTPUT_QUIET)
if(NOT (${PY3_IMPORT_RES} EQUAL 0))
message(FATAL_ERROR "Cannot find Python 3 package `${pkg_name}`")
endif()
message(STATUS "Found Python 3 package `${pkg_name}`")
endfunction()
check_py3_pkg(CppHeaderParser)
check_py3_pkg(yaml)
find_program(BARECTF_RES barectf REQUIRED)
# Generate barectf YAML and C++ files for HSA API.
get_property(HSA_RUNTIME_INCLUDE_DIRS
TARGET hsa-runtime64::hsa-runtime64
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HSA_H hsa.h
PATHS ${HSA_RUNTIME_INCLUDE_DIRS}
PATH_SUFFIXES hsa
NO_DEFAULT_PATH
REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH "${HSA_H}" DIRECTORY)
add_custom_command(
OUTPUT hsa_erts.yaml hsa_begin.cpp.i hsa_end.cpp.i
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
-o hsa_ext_amd.h.i
COMMAND ${CMAKE_COMMAND} -E cat hsa.h.i
hsa_ext_amd.h.i
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
> hsa_input.h
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
hsa hsa_input.h
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i hsa_input.h
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
"${HSA_RUNTIME_INC_PATH}/hsa.h"
"${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
COMMENT "Generating HSA API files for the `ctf` plugin...")
# Generate barectf YAML and C++ files for HIP API.
get_property(HIP_INCLUDE_DIRS TARGET hip::amdhip64
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
PATHS ${HIP_INCLUDE_DIRS}
PATH_SUFFIXES hip
NO_DEFAULT_PATH
REQUIRED)
find_file(HIP_PROF_STR_H hip_prof_str.h
PATHS ${HIP_INCLUDE_DIRS}
PATH_SUFFIXES hip hip/amd_detail
NO_DEFAULT_PATH
REQUIRED)
list(TRANSFORM HIP_INCLUDE_DIRS PREPEND -I)
add_custom_command(
OUTPUT hip_erts.yaml hip_begin.cpp.i hip_end.cpp.i
COMMAND ${CMAKE_C_COMPILER} ${HIP_INCLUDE_DIRS}
-E "${HIP_RUNTIME_API_H}"
-D__HIP_PLATFORM_HCC__=1
-D__HIP_ROCclr__=1
-o hip_runtime_api.h.i
COMMAND cat hip_runtime_api.h.i "${HIP_PROF_STR_H}" > hip_input.h
BYPRODUCTS hip_runtime_api.h.i hip_input.h
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
hip hip_input.h
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
"${HIP_RUNTIME_API_H}"
"${HIP_PROF_STR_H}"
COMMENT "Generating HIP API files for the `ctf` plugin...")
# Generate `env.yaml` (trace environment for barectf).
add_custom_command(
OUTPUT env.yaml
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
${PROJECT_VERSION}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
COMMENT "Generating `env.yaml`...")
# Generate raw CTF tracer with barectf.
add_custom_command(
OUTPUT barectf.c barectf.h barectf-bitfield.h metadata
COMMAND "${BARECTF_RES}" gen "-I${CMAKE_CURRENT_BINARY_DIR}"
"-I${CMAKE_CURRENT_SOURCE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
DEPENDS hsa_erts.yaml
hip_erts.yaml
env.yaml
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/dst_base.yaml"
COMMENT "Generating raw CTF tracer with barectf...")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/metadata"
DESTINATION "${METADATA_STREAM_FILE_DIR}")
@@ -0,0 +1,260 @@
= CTF plugin for ROCMTools
13 December 2022
Philippe Proulx
This plugin writes the received ROCMTools tracer and profiler records to
a https://diamon.org/ctf/[CTF] trace.
== Build requirements
* Python ≥ 3.10
* barectf ≥ 3.1.1 (`pip3 install barectf`)
* PyYAML (`apt-get install python3-yaml`)
* CppHeaderParser (`pip3 install CppHeaderParser`)
== Usage
Once installed, you may load this plugin with `rocprofv2` using
the `--plugin ctf` command-line arguments.
This plugin honours the `OUTPUT_PATH` environment variable which
`rocprofv2` sets with the `-d` option. If you pass `-d my-dir` to
`rocprofv2`, then the plugin will write the CTF trace to the
`my-dir/trace` directory.
IMPORTANT: This plugin performs important cleanup tasks at finalization
time, so the resulting CTF trace could be corrupted if the plugin is
never finalized.
Once the plugin is finalized, open the resulting trace directory with
either https://babeltrace.org/[Babeltrace{nbsp}2] or
https://www.eclipse.org/tracecompass/[Trace Compass] to view or analyze
it.
=== Event record types
This plugin writes to different CTF data streams having different types.
On the file system, the prefix of a data stream file name indicates the
data stream type, that is:
`roctx_`::
rocTX messages.
+
Each CTF event record is named `roctx` and corresponds to a rocTX
tracer record.
+
The fields are:
+
--
[horizontal]
`thread_id`::
Thread ID.
`id`::
rocTX ID.
`msg`::
rocTX message.
--
`hsa_api_`::
HSA API beginning and end function calls.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`thread_id`::
Thread ID.
`queue_id`::
Queue ID.
`agent_id`::
Agent ID.
`correlation_id`::
Correlation ID.
--
+
For each ROCMTools HSA API tracer record for the HSA function named
`__name__`, this plugin writes two event records:
+
`__name___begin`:::
Beginning of the function call.
+
The event record contains fields which correspond to most of the
parameters of the HSA function.
`__name___end`:::
End of the function call.
`hip_api_`::
HIP API beginning and end function calls.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`thread_id`::
Thread ID.
`queue_id`::
Queue ID.
`agent_id`::
Agent ID.
`correlation_id`::
Correlation ID.
`kernel_name`::
Kernel name (empty string if not available).
--
+
For each ROCMTools HIP API tracer record for the HIP function named
`__name__`, this plugin writes two event records:
+
`__name__Begin`:::
Beginning of the function call.
+
The event record contains fields which correspond to most of the
parameters of the HIP function.
`__name__End`:::
End of the function call.
`api_ops_`::
HSA/HIP API beginning and end operations.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`thread_id`::
Thread ID.
`queue_id`::
Queue ID.
`agent_id`::
Agent ID.
`correlation_id`::
Correlation ID.
--
+
The possible CTF event records are:
+
`hsa_op_begin`:::
HSA API operation beginning.
`hsa_op_end`:::
HSA API operation end.
`hip_op_begin`:::
HIP API operation beginning.
+
Such an event record also has the field `kernel_name` which is the
kernel name (empty string if not available).
`hip_op_end`:::
HIP API operation end.
`profiler_`::
Profiler records.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`dispatch`::
Dispatch ID.
`gpu_id`::
GPU ID.
`queue_id`::
Queue ID.
`queue_index`::
Queue index.
`process_id`::
Process ID.
`thread_id`::
Thread ID.
`kernel_id`::
Kernel ID.
`kernel_name`::
Kernel name (empty string if not available).
`counter_names`::
Array of counter names, each one having a corresponding integral
value in the `counter_values` field.
`counter_values`::
Array of integers, each one being the value of a counter of which
the name is a corresponding string in the `counter_names` field.
--
+
The possible CTF event records are:
+
`profiler_record`:::
Profiler record.
`profiler_record_with_kernel_properties`:::
Profiler record with kernel properties.
+
Such an event record also has the following fields:
+
--
`grid_size`::
Grid size.
`workgroup_size`::
Workgroup size.
`lds_size`::
Local memory size.
`scratch_size`::
Scratch size.
`arch_vgpr_count`::
Architecture vector general purpose register count.
`accum_vgpr_count`::
Accum. vector general purpose register count
`sgpr_count`::
Scalar general purpose register count.
`wave_size`::
Wavefront size.
`signal_handle`::
Signal handle.
--
`hsa_handles_`::
HSA handle type mappings.
+
Each CTF event record is named `hsa_handle_type` and maps an HSA handle
to a processor unit type (CPU or GPU).
+
The clock value of those event records is irrelevant (always{nbsp}0).
+
The fields are:
+
--
[horizontal]
`handle`::
HSA handle.
`type`::
Processor unit type (`CPU` or `GPU` enumeration label).
--
@@ -0,0 +1,67 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_EVENT_RECORD_H
#define PLUGIN_CTF_BARECTF_EVENT_RECORD_H
#include <memory>
#include <cstdint>
struct barectf_default_ctx;
namespace rocm_ctf {
// Abstract base class of any barectf event record.
//
// A concrete event record class must implement Write() which must call
// a corresponding barectf tracing function.
//
// `CtxT` is the specific type of the barectf context which Write()
// receives.
template <typename CtxT> class BarectfEventRecord {
protected:
// Builds a barectf event record having the clock value `clock_val`.
explicit BarectfEventRecord(const std::uint64_t clock_val) noexcept : clock_val_{clock_val} {}
public:
// Shared pointer to const barectf event record.
using SP = std::shared_ptr<const BarectfEventRecord>;
virtual ~BarectfEventRecord() = default;
// Disabled copy operations to make this class simpler.
BarectfEventRecord(const BarectfEventRecord&) = delete;
BarectfEventRecord& operator=(const BarectfEventRecord&) = delete;
// Clock value of this event record.
std::uint64_t GetClockVal() const noexcept { return clock_val_; }
// Calls a corresponding barectf tracing function using the barectf
// context `barectf_ctx`.
virtual void Write(CtxT& barectf_ctx) const = 0;
private:
// Clock value.
std::uint64_t clock_val_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_EVENT_RECORD_H
@@ -0,0 +1,192 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_PLATFORM_H
#define PLUGIN_CTF_BARECTF_PLATFORM_H
#include <cstdlib>
#include <cstdint>
#include <fstream>
#include <vector>
#include <functional>
#include <experimental/filesystem>
#include "barectf.h"
namespace rocm_ctf {
template <typename> class BarectfWriter;
// A barectf platform for any barectf writer.
//
// The user doesn't deal directly with such an object: it's closely
// coupled with a barectf writer.
//
// Each platform takes care of a single CTF data stream file.
//
// After building such a platform, get the raw barectf context with
// GetCtx() to call tracing functions. The platform must still exist
// when calling a tracing function.
//
// Such a platform opens the data stream file on construction and closes
// it on destruction.
//
// `DescrT` is the specific barectf platform descriptor. It must be a
// structure having:
//
// `Ctx`:
// Specific barectf context type.
//
// `static void OpenPacket(Ctx&)`:
// Packet opening function.
//
// `static void ClosePacket(Ctx&)`:
// Packet closing function.
template <typename DescrT> class BarectfPlatform final {
friend class BarectfWriter<DescrT>;
private:
// Builds a barectf platform.
//
// The platform writes CTF packets of size `packet_size` bytes to the
// CTF data stream file `data_stream_file_path`.
//
// For each event record to write, the platform reads `clock_val` to
// know the current timestamp.
explicit BarectfPlatform(const std::size_t packet_size,
const std::experimental::filesystem::path& data_stream_file_path,
const std::uint64_t& clock_val)
: clock_val_{&clock_val}, buffer_(packet_size) {
// Initialize barectf callbacks.
barectf_platform_callbacks callbacks;
callbacks.default_clock_get_value = GetClockCb;
callbacks.is_backend_full = IsBackendFullCb;
callbacks.open_packet = OpenPacketCb;
callbacks.close_packet = ClosePacketCb;
// Configure exceptions so that stream operations throw instead of
// just setting flags on error.
output_.exceptions(std::ofstream::failbit | std::ofstream::badbit);
// Open CTF data stream output file in binary mode.
output_.open(data_stream_file_path, std::ios_base::out | std::ios_base::binary);
// Initialize the raw barectf context.
barectf_init(&ctx_, buffer_.data(), buffer_.size(), callbacks, this);
// Open the initial packet.
OpenPacketCb();
}
public:
// Disabled copy operations to make this class simpler.
BarectfPlatform(const BarectfPlatform&) = delete;
BarectfPlatform& operator=(const BarectfPlatform&) = delete;
// Closes/writes any last CTF packet and closes the data stream file.
~BarectfPlatform() {
if (barectf_packet_is_open(&ctx_) && !barectf_packet_is_empty(&ctx_)) {
// Close and write last CTF packet (not empty).
ClosePacketCb();
}
// Close data stream output file.
output_.close();
}
// Returns the raw barectf context of this platform.
const typename DescrT::Ctx& GetCtx() const noexcept { return ctx_; }
typename DescrT::Ctx& GetCtx() noexcept { return ctx_; }
private:
static BarectfPlatform& AsPlatform(void* const data) noexcept {
return *static_cast<BarectfPlatform*>(data);
}
// Four callbacks for barectf.
//
// Those four functions receive an instance of this class as `data`.
static std::uint64_t GetClockCb(void* const data) noexcept {
// Forward to instance method.
return AsPlatform(data).GetClockCb();
}
static int IsBackendFullCb(void* const data) noexcept {
// Forward to instance method.
return AsPlatform(data).IsBackendFullCb();
}
static void OpenPacketCb(void* const data) {
// Forward to instance method.
AsPlatform(data).OpenPacketCb();
}
static void ClosePacketCb(void* const data) {
// Forward to instance method.
AsPlatform(data).ClosePacketCb();
}
// Instance version of the "get clock value" callback.
std::uint64_t GetClockCb() noexcept { return *clock_val_; }
// Instance version of the "is the back end full?" callback.
int IsBackendFullCb() noexcept {
// Never full.
return 0;
}
// Instance version of the "open packet" callback.
void OpenPacketCb() {
// Forward to user (descriptor) function.
DescrT::OpenPacket(ctx_);
}
// Instance version of the "close packet" callback.
void ClosePacketCb() {
// Forward to user (descriptor) function to finalize the packet.
DescrT::ClosePacket(ctx_);
// Write to the data stream file.
WriteCurrentPacket();
}
// Writes the current CTF packet (`buffer_`) to the data stream file.
void WriteCurrentPacket() {
output_.write(reinterpret_cast<const char*>(buffer_.data()), buffer_.size());
}
// Clock value pointer.
const std::uint64_t* clock_val_;
// CTF data stream output file stream.
std::ofstream output_;
// Raw barectf context.
typename DescrT::Ctx ctx_;
// CTF packet buffer.
std::vector<std::uint8_t> buffer_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_PLATFORM_H
@@ -0,0 +1,124 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_TRACER_H
#define PLUGIN_CTF_BARECTF_TRACER_H
#include <cstdlib>
#include <memory>
#include <vector>
#include <string>
#include <experimental/filesystem>
#include "barectf_event_record.h"
#include "barectf_writer.h"
namespace rocm_ctf {
// A barectf tracer offers the AddEventRecord() method to add an event
// record which it will ultimately write to some CTF data stream file
// within some specified CTF trace directory.
//
// One important feature of such a tracer is that you don't need to add
// event records in order of time. A barectf tracer manages one or more
// barectf writers, each one managing a single barectf platform/context
// (CTF data stream file).
//
// All the CTF data stream files which a barectf tracer indirectly
// manages share a common specified prefix. You must not use the same
// prefix for two barectf tracers writing to the same CTF trace
// directory.
//
// `PlatformDescrT` is the specific barectf platform descriptor (see the
// documentation of the `BarectfPlatform` class template).
template <typename PlatformDescrT> class BarectfTracer final {
public:
// Specific barectf event record type.
using EventRecord = typename BarectfWriter<PlatformDescrT>::EventRecord;
// Builds a barectf tracer to write CTF packets of size `packet_size`
// bytes to CTF data stream files having the prefix
// `data_stream_file_name_prefix` within the CTF trace directory
// `trace_dir`.
//
// The internal barectf writers manage event record queues having a
// maximum size of `max_writer_queue_size`. Increasing
// `max_writer_queue_size` increases the memory footprint of the
// tracer, but may reduce the number of required CTF data stream files
// to ensure time-ordered event records.
explicit BarectfTracer(const std::size_t packet_size,
std::experimental::filesystem::path trace_dir,
const char* const data_stream_file_name_prefix,
const std::size_t max_writer_queue_size = 200)
: packet_size_{packet_size},
trace_dir_{std::move(trace_dir)},
data_stream_file_name_prefix_{data_stream_file_name_prefix},
max_writer_queue_size_{max_writer_queue_size} {}
// Disabled copy operations to make this class simpler.
BarectfTracer(const BarectfTracer&) = delete;
BarectfTracer& operator=(const BarectfTracer&) = delete;
// Adds the event record `event_record` to this tracer.
//
// The clock value of `event_record` may be less than the clock value
// of previously added event records.
void AddEventRecord(typename EventRecord::SP event_record) {
// Try to find a barectf writer to accept `event_record`.
for (auto& writer : writers_) {
if (writer->MayAddEventRecord(*event_record)) {
// Found: add the event record to this writer and return.
writer->AddEventRecord(std::move(event_record));
return;
}
}
// No barectf writer found: create a new one.
std::ostringstream ss;
ss << data_stream_file_name_prefix_ << writers_.size();
writers_.emplace_back(new BarectfWriter<PlatformDescrT>{packet_size_, trace_dir_ / ss.str(),
max_writer_queue_size_});
// Add the event record to this new barectf writer.
assert(writers_.back()->MayAddEventRecord(*event_record));
writers_.back()->AddEventRecord(std::move(event_record));
}
private:
// CTF packet size.
std::size_t packet_size_;
// CTF trace directory.
std::experimental::filesystem::path trace_dir_;
// CTF data stream file name prefix.
std::string data_stream_file_name_prefix_;
// Maximum event record queue size of a barectf writer.
std::size_t max_writer_queue_size_;
// barectf writers.
std::vector<std::unique_ptr<BarectfWriter<PlatformDescrT>>> writers_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_TRACER_H
@@ -0,0 +1,178 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_WRITER_H
#define PLUGIN_CTF_BARECTF_WRITER_H
#include <cassert>
#include <cstdlib>
#include <cstdint>
#include <cassert>
#include <queue>
#include <utility>
#include <experimental/filesystem>
#include "barectf_platform.h"
#include "barectf_event_record.h"
namespace rocm_ctf {
template <typename> class BarectfTracer;
// A barectf writer manages a queue of event records, writing them
// through barectf when needed.
//
// Such an object makes it possible to add some event record with a
// clock value V and then some other event record of which the clock
// value is less than V. The barectf writer ensures that actual barectf
// tracing functions are called chronologically, a requirement of CTF.
//
// A barectf writer keeps event records in memory until its queue is
// full (you provide the maximum queue size at construction time), in
// which case it writes the oldest event record to some current CTF
// packet through a barectf tracing function.
//
// Call MayAddEventRecord() to check whether or not you may add an event
// record to the barectf writer, and then AddEventRecord() if you may.
//
// A barectf writer writes all its remaining event records on
// destruction.
//
// `PlatformDescrT` is the specific barectf platform descriptor (see the
// documentation of the `BarectfPlatform` class template).
template <typename PlatformDescrT> class BarectfWriter final {
friend class BarectfTracer<PlatformDescrT>;
public:
// Specific barectf event record type.
using EventRecord = BarectfEventRecord<typename PlatformDescrT::Ctx>;
private:
// Builds a barectf writer to write CTF packets of size `packet_size`
// bytes to the CTF data stream file `data_stream_file_path`.
//
// The built barectf writer manages an event record queue having a
// maximum size of `max_queue_size`.
explicit BarectfWriter(const std::size_t packet_size,
const std::experimental::filesystem::path& data_stream_file_path,
const std::size_t max_queue_size)
: platform_{packet_size, data_stream_file_path, clock_val_},
max_queue_size_{max_queue_size} {}
public:
// Writes all its remaining event records.
~BarectfWriter() {
// Write all the remaining event records from the oldest to the
// newest.
while (!queue_.empty()) {
WriteOldestEventRecord();
}
}
// Disabled copy operations to make this class simpler.
BarectfWriter(const BarectfWriter&) = delete;
BarectfWriter& operator=(const BarectfWriter&) = delete;
// Whether or not you may add the event record `event_record` to this
// writer with AddEventRecord().
bool MayAddEventRecord(const EventRecord& event_record) const noexcept {
if (queue_.empty()) {
return true;
}
// One may only add an event record if its clock value is greater
// than or equal to the clock value of the most recently written
// event record.
return event_record.GetClockVal() >= clock_val_;
}
// Adds the event record `event_record` to this writer.
//
// `MayAddEventRecord(*event_record)` must return `true`.
void AddEventRecord(typename EventRecord::SP event_record) {
assert(MayAddEventRecord(*event_record) && "May add event record");
// Add event record to queue.
queue_.emplace(std::move(event_record));
if (queue_.size() > max_queue_size_) {
// Queue is too large: write the oldest event record now to
// satisfy the requirement.
WriteOldestEventRecord();
}
}
private:
// Comparison type for `queue_`.
struct EventRecordQueueCompare final {
bool operator()(const typename EventRecord::SP& left,
const typename EventRecord::SP& right) const noexcept {
// "Greater than" so that the top element of the queue is the
// oldest event record.
return left->GetClockVal() > right->GetClockVal();
}
};
// Oldest event record within `queue_`.
//
// `queue_` must not be empty.
const EventRecord& GetOldestEventRecord() const noexcept {
assert(!queue_.empty() && "Queue isn't empty");
return *queue_.top();
}
// Writes the oldest event record through a barectf tracing function
// and removes it from the event record queue.
void WriteOldestEventRecord() {
auto& oldest_event_record = GetOldestEventRecord();
// When calling a barectf tracing function, it calls the clock value
// accessor callback of the platform, which itself reads from
// `clock_val_`.
clock_val_ = oldest_event_record.GetClockVal();
// Forward to a barectf tracing function.
oldest_event_record.Write(platform_.GetCtx());
// Remove from queue.
queue_.pop();
}
// barectf platform (manages file I/O).
BarectfPlatform<PlatformDescrT> platform_;
// Current clock value for `platform_`.
//
// This is also the clock value of the most recently written event
// record, therefore that MayAddEventRecord() can rely on this.
std::uint64_t clock_val_ = 0;
// Maximum size of `queue_` below.
std::size_t max_queue_size_;
// Event record queue.
std::priority_queue<typename EventRecord::SP, std::vector<typename EventRecord::SP>,
EventRecordQueueCompare>
queue_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_WRITER_H
@@ -0,0 +1,165 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
%YAML 1.2
--- !<tag:barectf.org,2020/3/config>
trace:
$include:
# Environment (generated file).
- env.yaml
type:
$include:
- stdint.yaml
- stdmisc.yaml
native-byte-order: little-endian
clock-types:
default:
origin-is-unix-epoch: true
$c-type: uint64_t
data-stream-types:
hsa_api:
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
- _queue_id: uint32
- _agent_id: uint32
- _correlation_id: uint64
$include:
# Base.
- dst_base.yaml
# HSA API event record types (generated file).
- hsa_erts.yaml
hip_api:
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
- _queue_id: uint32
- _agent_id: uint32
- _correlation_id: uint64
- _kernel_name: str
$include:
# Base.
- dst_base.yaml
# HIP API event record types (generated file).
- hip_erts.yaml
roctx:
$include:
# Base
- dst_base.yaml
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
event-record-types:
roctx:
payload-field-type:
class: struct
members:
- _id: sint64
- _msg: str
hsa_handles:
$include:
# Base.
- dst_base.yaml
event-record-types:
hsa_handle_type:
payload-field-type:
class: struct
members:
- _handle: uint64
- _type:
field-type:
class: uenum
size: 8
mappings:
CPU: [0]
GPU: [1]
api_ops:
$include:
# Base.
- dst_base.yaml
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
- _queue_id: uint32
- _agent_id: uint32
- _correlation_id: uint64
event-record-types:
hsa_op_begin:
payload-field-type:
class: struct
hsa_op_end:
payload-field-type:
class: struct
hip_op_begin:
payload-field-type:
class: struct
members:
- _kernel_name: str
hip_op_end:
payload-field-type:
class: struct
profiler:
$include:
# Base.
- dst_base.yaml
event-record-common-context-field-type:
class: struct
members:
- _dispatch: uint64
- _gpu_id: uint64
- _queue_id: uint64
- _queue_index: uint64
- _process_id: uint32
- _thread_id: uint32
- _kernel_id: uint64
- _kernel_name: str
- _counter_names:
field-type:
class: dynamic-array
element-field-type: str
- _counter_values:
field-type:
class: dynamic-array
element-field-type: uint64
event-record-types:
profiler_record:
payload-field-type:
class: struct
profiler_record_with_kernel_properties:
payload-field-type:
class: struct
members:
- _grid_size: uint64
- _workgroup_size: uint64
- _lds_size: uint64
- _scratch_size: uint64
- _arch_vgpr_count: uint64
- _accum_vgpr_count: uint64
- _sgpr_count: uint64
- _wave_size: uint64
- _signal_handle: uint64
@@ -0,0 +1,107 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cassert>
#include <stdexcept>
#include <iostream>
#include <experimental/filesystem>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "plugin.h"
namespace fs = std::experimental::filesystem;
namespace {
// Global plugin instance
rocm_ctf::Plugin* the_plugin = nullptr;
} // namespace
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(const uint32_t rocprofiler_major_version,
const uint32_t rocprofiler_minor_version) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR) {
return -1;
}
if (the_plugin) {
return -1;
}
const auto output_dir = getenv("OUTPUT_PATH");
if (!output_dir) {
std::cerr << "rocprofiler_plugin_initialize(): "
<< "`OUTPUT_PATH` environment variable isn't set" << std::endl;
return -1;
}
// Create the plugin instance.
try {
the_plugin = new rocm_ctf::Plugin{256 * 1024, fs::path{output_dir} / "trace",
CTF_PLUGIN_METADATA_FILE_PATH};
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_initialize(): " << exc.what() << std::endl;
return -1;
}
return 0;
}
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
delete the_plugin;
the_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
const rocprofiler_record_header_t* const begin, const rocprofiler_record_header_t* const end,
const rocprofiler_session_id_t session_id, const rocprofiler_buffer_id_t buffer_id) {
assert(the_plugin);
try {
the_plugin->HandleBufferRecords(begin, end, session_id, buffer_id);
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_write_buffer_records(): " << exc.what() << std::endl;
return -1;
}
return 0;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record,
const rocprofiler_session_id_t session_id) {
assert(the_plugin);
if (record.header.id.handle == 0) {
return 0;
}
try {
the_plugin->HandleTracerRecord(record, session_id);
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl;
return -1;
}
return 0;
}
@@ -0,0 +1,28 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
$default-clock-type-name: default
$features:
packet:
beginning-timestamp-field-type: false
discarded-event-records-counter-snapshot-field-type: false
end-timestamp-field-type: false
@@ -0,0 +1,645 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import os
import os.path
import sys
import re
import yaml
import CppHeaderParser
# Numeric field type (abstract).
class _NumericFt:
# Returns the C++ expression to cast the expression `expr` to the C
# type of this field type.
def cast(self, expr):
return f'static_cast<{self.c_type}>({expr})'
# Integer field type (abstract).
class _IntFt(_NumericFt):
def __init__(self, size, pref_disp_base='dec'):
self._size = size
self._pref_disp_base = pref_disp_base
# Size (bits).
@property
def size(self):
return self._size
# Preferred display base (`dec` or `hex`).
@property
def pref_disp_base(self):
return self._pref_disp_base
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
return {
'size': self._size,
'preferred-display-base': self._pref_disp_base,
}
# Signed integer field type.
class _SIntFt(_IntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'sint'
return ret
# Equivalent C type
@property
def c_type(self):
return f'std::int{self._size}_t'
# Unsigned integer field type.
class _UIntFt(_IntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'uint'
return ret
# Equivalent C type.
@property
def c_type(self):
return f'std::uint{self._size}_t'
# Pointer field type.
class _PointerFt(_UIntFt):
def __init__(self):
super().__init__(64, 'hex')
# Returns the C++ expression to cast the expression `expr` to the C
# type of this field type.
def cast(self, expr):
return f'static_cast<{self.c_type}>(reinterpret_cast<std::uintptr_t>({expr}))'
# Enumeration field type (abstract).
class _EnumFt(_IntFt):
def __init__(self, size, mappings):
super().__init__(size)
self._mappings = mappings.copy()
# Mappings (names to integers).
@property
def mappings(self):
return self._mappings
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
mappings = {}
for name, val in self._mappings.items():
mappings[name] = [val]
ret['mappings'] = mappings
return ret
# Unsigned enumeration field type.
class _UEnumFt(_EnumFt, _UIntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'uenum'
return ret
# Signed enumeration field type.
class _SEnumFt(_EnumFt, _UIntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'senum'
return ret
# Optional string field type.
class _OptStrFt:
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
return {
'class': 'str',
}
# String field type.
class _StrFt(_OptStrFt):
pass
# Floating-point number field type.
class _FloatFt(_NumericFt):
def __init__(self, size):
self._size = size
# Size (bits): 32 or 64.
@property
def size(self):
return self._size
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
return {
'class': 'real',
'size': self._size,
}
# Equivalent C type.
@property
def c_type(self):
if self._size == 32:
return 'float'
else:
assert self._size == 64
return 'double'
# Event record type.
class _Ert:
def __init__(self, api_func_name, members):
self._api_func_name = api_func_name
self._members = members
# API function name
@property
def api_func_name(self):
return self._api_func_name
# Parameters of function (list of `_ErtMember`).
@property
def members(self):
return self._members
# Beginning event record type.
class _BeginErt(_Ert):
# Name of event record type depending on the API prefix.
def name(self, api_prefix):
suffix = '_begin' if api_prefix == 'hsa' else 'Begin'
return f'{self._api_func_name}{suffix}'
# End event record type.
class _EndErt(_Ert):
# Name of event record type depending on the API prefix.
def name(self, api_prefix):
suffix = '_end' if api_prefix == 'hsa' else 'End'
return f'{self._api_func_name}{suffix}'
# Event record type member.
class _ErtMember:
def __init__(self, access, member_names, ft):
self._access = access
self._member_names = member_names.copy()
self._ft = ft
# C++ access expression.
@property
def access(self):
return self._access
# List of member names.
@property
def member_names(self):
return self._member_names
# Equivalent field type.
@property
def ft(self):
return self._ft
# Makes sure some condition is satisfied, or prints the error message
# `error_msg` and quits with exit status 1 otherwise.
#
# This is an unconditional assertion.
def _make_sure(cond, error_msg):
if not cond:
print(f'Error: {error_msg}', file=sys.stderr)
sys.exit(1)
def _enumerator_effective_val(enum_val):
# Try the value, but this value may be a string (an
# enumerator/definition).
val = enum_val.get('value')
if type(val) is int:
return val
# Try the raw value.
val = enum_val.get('raw_value')
if val is not None:
if type(val) is int:
# Raw value is already an integer.
return val
else:
# Try to parse the raw value string as an integer.
try:
return int(val, 0)
except:
pass
_make_sure(False,
f'Cannot get the integral value of enumerator `{enum_val["name"]}`')
# Returns the equivalent field type of the C type `c_type`.
def _number_ft_from_c_type(cpp_header, c_type):
# Check for known enumeration.
m = re.match(r'(?:enum\s+)?(\w+)', c_type)
if m:
size = 32
for enum_info in cpp_header.enums:
if m.group(1) == enum_info.get('name'):
# Fill enumeration field type mappings.
mappings = {
str(v['name']): _enumerator_effective_val(v)
for v in enum_info['values']
}
if len(mappings) == 0:
return _SIntFt(64)
if max(mappings.values()) >= 2**31 or min(mappings.values()) < -2**31:
size = 64
_make_sure(len(mappings) > 0, f'Enumeration `{enum_info["name"]}` is empty')
# Create corresponding enumeration field type.
return _SEnumFt(size, mappings)
# Find corresponding basic field type.
is_unsigned = 'unsigned' in c_type
if 'long' in c_type:
if is_unsigned:
return _UIntFt(64)
else:
return _SIntFt(64)
elif 'short' in c_type:
if is_unsigned:
return _UIntFt(16)
else:
return _SIntFt(16)
elif 'char' in c_type:
if is_unsigned:
return _UIntFt(8)
else:
return _SIntFt(8)
elif 'float' in c_type:
return _FloatFt(32)
elif 'double' in c_type:
return _FloatFt(64)
else:
# Assume `int` (often an unresolved C enumeration).
if is_unsigned:
return _UIntFt(32)
else:
return _SIntFt(32)
# Returns whether or not a property has a pointer type.
def _prop_is_pointer(prop, c_type):
if prop['pointer'] or prop['function_pointer']:
return True
if prop['array'] and 'array_size' in prop:
return True
if prop['unresolved']:
# HSA API function pointers.
if prop['name'] in ('callback', 'handler'):
return True
# HIP API function pointers.
if c_type.endswith('Fn_t'):
return True
# Check the C type itself.
if '*' in c_type or '*' in prop.get('raw_type', ''):
return True
return False
# Returns a list of event record type member objects for the structure
# `struct` considering the initial C++ access expression `access` and
# member names `member_names`.
def _get_ert_members_for_struct(cpp_header, struct, access, member_names):
members = []
member_names = member_names.copy()
member_names.append(None)
props = struct['properties']['public']
for index, prop in enumerate(props):
# Property name.
name = prop['name']
# Member names, access, and C type.
member_names[-1] = str(name)
this_access = f'{access}.{name}'
c_type = prop['type']
aliases = prop['aliases']
# Skip no type.
if c_type == '':
continue
# Skip unnamed or union.
if name == '' or 'union' in name or re.match(r'\bunion\b', c_type):
continue
# Check for known C type alias.
while True:
c_type_alias = cpp_header.typedefs.get(c_type)
if c_type_alias is None:
break
c_type = c_type_alias
# Check for C string.
if re.match(r'^((const\s+char)|(char\s+const)|char)\s*\*$',
c_type.strip()):
members.append(_ErtMember(this_access, member_names, _OptStrFt()))
continue
# Check for pointer.
if _prop_is_pointer(prop, c_type):
# Pointer: use numeric value.
members.append(_ErtMember(this_access, member_names, _PointerFt()))
continue
# Check for substructure.
sub_struct = cpp_header.classes.get(c_type)
if sub_struct is None and len(aliases) == 1:
sub_struct = cpp_header.classes.get(aliases[0])
if sub_struct is not None:
members += _get_ert_members_for_struct(cpp_header, sub_struct,
this_access, member_names)
continue
# Use a basic field type.
members.append(_ErtMember(this_access, member_names,
_number_ft_from_c_type(cpp_header, c_type)))
return members
# Returns the beginning and end event record type objects for the
# callback data structure `struct`.
def _erts_from_cb_data_struct(api_prefix, cpp_header, retval_info, struct):
# The location of the `args` union within the nested structures of
# `struct`.
args_nested_cls_index = 0
# Create return value members (to be used later).
if retval_info is not None:
args_nested_cls_index = 1
retval_members = {}
nested_classes = struct['nested_classes']
_make_sure(len(nested_classes) >= 1,
f"Return value union doesn't exist in `{struct['name']}`")
retval_union = nested_classes[0]
for prop in retval_union['properties']['public']:
name = str(prop['name'])
member = _ErtMember(f'GetApiData().{name}', ['retval'],
_number_ft_from_c_type(cpp_header, prop['type']))
retval_members[prop['name']] = member
# Make sure we have everything we need.
for api_func_name, retval_name in retval_info.items():
if retval_name is not None:
_make_sure(retval_name in retval_members,
f"Return value union member `{retval_name}` doesn't exist (function {api_func_name}())")
# Create beginning/end event record type objects.
begin_erts = []
end_erts = []
nested_classes = struct['nested_classes'][args_nested_cls_index]['nested_classes']
props = struct['nested_classes'][args_nested_cls_index]['properties']['public']
_make_sure(len(nested_classes) == len(props),
f'Mismatch between nested structure and member count in `{struct["name"]}`')
for index, prop in enumerate(props):
# API function name is the name of the member.
api_func_name = str(prop['name'])
# Get the parameters.
members = _get_ert_members_for_struct(cpp_header,
nested_classes[index],
f'GetApiData().args.{api_func_name}',
[])
# Append new beginning event record type object.
begin_erts.append(_BeginErt(api_func_name, members))
# Append new end event record type object if possible.
ret_members = []
if retval_info is not None:
retval_type = retval_info.get(api_func_name)
if retval_type is not None:
ret_members.append(retval_members[retval_type])
end_erts.append(_EndErt(api_func_name, ret_members))
return begin_erts, end_erts
# Creates and returns the return value information dictionary.
#
# This dictionary maps API function names to the member to get within
# the callback data structure.
#
# This only applies to the HSA API: for other APIs, this function
# returns `None`.
def _get_retval_info(path):
if 'hsa' not in os.path.basename(path):
return
retval_info = {}
cur_api_func_name = None
with open(path) as f:
for line in f:
if 'out << ")' in line and cur_api_func_name is not None:
m = re.search(r'api_data.(\w+_retval)', line)
retval_info[cur_api_func_name] = m.group(1) if m else None
else:
m = re.search(r'out << "(hsa_\w+)\(";', line)
if m:
cur_api_func_name = m.group(1)
return retval_info
# Returns a partial barectf data stream type in YAML with the event
# record types `erts`.
def _yaml_dst_from_erts(api_prefix, erts):
# Base.
yaml_erts = {}
yaml_dst = {
'event-record-types': yaml_erts,
}
# Create one event record type per API function.
for ert in erts:
# Base.
yaml_members = []
yaml_ert = {
'payload-field-type': {
'class': 'struct',
'members': yaml_members,
},
}
# Create one structure field type member per member.
for member in ert.members:
# barectf doesn't support nested CTF structures, so join
# individual member names with `__` to flatten.
yaml_members.append({
'_' + '__'.join(member.member_names): {
'field-type': member.ft.barectf_yaml,
},
})
# Add event record type.
yaml_erts[ert.name(api_prefix)] = yaml_ert
# Convert to YAML.
return yaml.dump(yaml_dst)
# Returns the C++ switch statement which calls the correct barectf
# tracing function depending on the API function operation ID.
def _cpp_switch_statement_from_erts(api_prefix, erts):
lines = []
lines.append('switch (GetOp()) {')
for ert in erts:
lines.append(f' case {api_prefix.upper()}_API_ID_{ert.api_func_name}:')
lines.append(f' barectf_{api_prefix}_api_trace_{ert.name(api_prefix)}(')
lines.append(f' &barectf_ctx,')
lines.append(f' GetThreadId(),')
lines.append(f' GetQueueId(),')
lines.append(f' GetAgentId(),')
lines.append(f' GetCorrelationId(),')
if api_prefix == 'hip':
lines.append(f' GetKernelName().c_str(),')
if len(ert.members) == 0:
# Remove last comma.
lines[-1] = lines[-1].replace(',', '')
for index, member in enumerate(ert.members):
if type(member.ft) is _OptStrFt:
# Only dereference C string if not null, otherwise use
# an empty string.
lines.append(f' {member.access} ? {member.access} : ""')
elif type(member.ft) is _StrFt:
lines.append(f' {member.access}')
else:
lines.append(f' {member.ft.cast(member.access)}')
if index + 1 < len(ert.members):
lines[-1] += ','
lines.append(' );')
lines.append(' break;')
lines.append('}')
return lines
# Processes the complete API header file `path`.
def _process_file(api_prefix, path):
# Create `CppHeader` object.
try:
cpp_header = CppHeaderParser.CppHeader(path)
except CppHeaderParser.CppParseError as exc:
print(exc, file=sys.stderr)
sys.exit(1)
# Get return value information dictionary.
retval_info = _get_retval_info(path)
# Find callback data structure.
for struct_name, struct in cpp_header.classes.items():
if re.match(r'^' + api_prefix + r'_api_data\w+$', struct_name):
# Process callback data structure.
begin_erts, end_erts = _erts_from_cb_data_struct(api_prefix,
cpp_header,
retval_info,
struct)
# Write barectf YAML file.
with open(f'{api_prefix}_erts.yaml', 'w') as f:
f.write(_yaml_dst_from_erts(api_prefix, begin_erts + end_erts))
# Write C++ code (beginning event record).
with open(f'{api_prefix}_begin.cpp.i', 'w') as f:
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
begin_erts)))
# Write C++ code (end event record).
with open(f'{api_prefix}_end.cpp.i', 'w') as f:
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
end_erts)))
if __name__ == '__main__':
# Disable `CppHeaderParser` printing to standard output.
CppHeaderParser.CppHeaderParser.print_warnings = 0
CppHeaderParser.CppHeaderParser.print_errors = 0
CppHeaderParser.CppHeaderParser.debug = 0
CppHeaderParser.CppHeaderParser.debug_trace = 0
# Process the complete API header file.
_process_file(sys.argv[1], sys.argv[2])
@@ -0,0 +1,33 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import sys
import yaml
if __name__ == '__main__':
with open('env.yaml', 'w') as f:
f.write(yaml.dump({
'environment': {
'rocprofiler_version': sys.argv[1],
}
}))
@@ -0,0 +1,869 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cassert>
#include <cstdlib>
#include <sstream>
#include <stdexcept>
#include <iostream>
#include <utility>
#include <string>
#include <memory>
#include <limits>
#include <fstream>
#include <experimental/filesystem>
#include <time.h>
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include "hsa_prof_str.h"
#include <hip/hip_runtime.h>
#include <hip/amd_detail/hip_prof_str.h>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
#include "barectf.h"
#include "barectf_event_record.h"
#include "barectf_tracer.h"
#include "plugin.h"
namespace fs = std::experimental::filesystem;
namespace rocm_ctf {
namespace {
// Abstract tracer event record using the barectf context type `CtxT`.
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
protected:
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
: BarectfEventRecord<CtxT>{clock_val},
op_{record.operation_id.id},
thread_id_{record.thread_id.value},
queue_id_{record.queue_id.handle},
agent_id_{record.agent_id.handle},
correlation_id_{record.correlation_id.value} {}
std::uint32_t GetOp() const noexcept { return op_; }
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
private:
std::uint32_t op_;
std::uint32_t thread_id_;
std::uint64_t queue_id_;
std::uint64_t agent_id_;
std::uint64_t correlation_id_;
};
// Returns the beginning clock value of the tracer or profiler record
// `record`.
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
return record.timestamps.begin.value;
}
// Returns the end clock value of the tracer or profiler record
// `record`.
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
return record.timestamps.end.value;
}
// Queries allocated string data using the size query function
// `query_size_func` and the data query function `query_data_func`,
// returning the corresponding string and freeing temporary allocated
// memory.
//
// Returns an empty string if anything goes wrong.
template <typename QuerySizeFuncT, typename QueryDataFuncT>
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
// Query size first.
std::size_t size = 0;
[[maybe_unused]] auto ret = query_size_func(&size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
if (size == 0) {
// No size: return empty string.
return {};
}
// Query data (allocated by query_data_func()).
char* alloc_str = nullptr;
ret = query_data_func(&alloc_str);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
if (!alloc_str) {
// No data: return empty string.
return {};
}
// Allocate return value.
std::string str_ret{alloc_str};
// Free allocated data.
std::free(alloc_str);
// Return string object.
return str_ret;
}
// rocTX event record.
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
public:
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
id_{QueryId(record, session_id)},
msg_{QueryMsg(record, session_id)} {}
void Write(barectf_roctx_ctx& barectf_ctx) const override {
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
}
private:
// Queries and returns the rocTX message of the record `record` and
// session ID `session_id`.
//
// Returns an empty string if not available.
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first.
std::size_t msg_size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
&msg_size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
if (msg_size == 0) {
// No size: return empty string.
return {};
}
// Query data (borrowed from the record: no need to free).
char* msg = nullptr;
ret = rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
if (!msg) {
// No data: return empty string.
return {};
}
return rocmtools::cxx_demangle(msg);
}
// Queries and returns the rocTX ID of the record `record` and the
// session ID `session_id`.
//
// Returns 0 if anything goes wrong.
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
try {
return std::stoull(QueryAllocStr(
[&record, session_id](const auto size) {
return rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
},
[&record, session_id](const auto str) {
return rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
}));
} catch (...) {
return 0;
}
}
std::uint64_t id_;
std::string msg_;
};
// Abstract HSA API event record.
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
protected:
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)} {}
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
private:
// Queries and returns the API data of the record `record` and session
// ID `session_id`.
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first (only for assertions).
[[maybe_unused]] std::size_t size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
assert(size > 0);
// Query data (borrowed from the record).
char* data = nullptr;
ret = rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
assert(data);
// Reinterpret as an HSA API data pointer.
return *reinterpret_cast<const hsa_api_data_t*>(data);
}
hsa_api_data_t api_data_;
};
// HSA API event record (beginning).
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
public:
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hsa_begin.cpp.i"
}
};
// HSA API event record (end).
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
public:
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hsa_end.cpp.i"
}
};
// Abstract HIP API event record.
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
protected:
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)},
kernel_name_{QueryKernelName(record, session_id)} {}
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
private:
// Queries and returns the API data of the record `record` and session
// ID `session_id`.
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first (only for assertions).
[[maybe_unused]] std::size_t size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
assert(size > 0);
// Query data (borrowed from the record).
char* data = nullptr;
ret = rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
assert(data);
// Reinterpret as an HIP API data pointer.
return *reinterpret_cast<const hip_api_data_t*>(data);
}
// Queries and returns the kernel name of the record `record` and
// session ID `session_id`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
const auto kernel_name = QueryAllocStr(
[&record, session_id](const auto size) {
return rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
size);
},
[&record, session_id](const auto str) {
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
record.api_data_handle,
record.operation_id, str);
});
if (kernel_name.size() > 1) {
// Return demangled version.
return rocmtools::cxx_demangle(kernel_name);
}
return kernel_name;
}
hip_api_data_t api_data_;
std::string kernel_name_;
};
// HIP API event record (beginning).
class HipApiEventRecordBegin final : public HipApiEventRecord {
public:
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hip_begin.cpp.i"
}
};
// HIP API event record (end).
class HipApiEventRecordEnd final : public HipApiEventRecord {
public:
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hip_end.cpp.i"
}
};
// HSA API handle type event record.
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
public:
enum class Type {
CPU = 0,
GPU = 1,
};
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
static_cast<std::uint8_t>(type_));
}
private:
std::uint64_t handle_;
Type type_;
};
// Abstract API operation event record.
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
protected:
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
};
// HSA API operation event record (beginning).
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
public:
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// HSA API operation event record (end).
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
public:
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// HIP API operation event record (beginning).
class HipOpEventRecordBegin final : public ApiOpEventRecord {
public:
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
kernel_name_{QueryKernelName(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId(), kernel_name_.c_str());
}
private:
// Queries and returns the kernel name of the record `record`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
if (record.operation_id.id == 0) {
if (const auto api_handle = record.api_data_handle.handle) {
const auto str = reinterpret_cast<const char*>(api_handle);
if (std::strlen(str) > 1) {
// Return demangled version.
return rocmtools::cxx_demangle(str);
}
}
}
return {};
}
std::string kernel_name_;
};
// HIP API operation event record (end).
class HipOpEventRecordEnd final : public ApiOpEventRecord {
public:
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// Profiler record base.
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
public:
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id)
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
dispatch_{record.header.id.handle},
gpu_id_{record.gpu_id.handle},
queue_id_{record.queue_id.handle},
queue_index_{record.queue_idx.value},
process_id_{GetPid()},
thread_id_{record.thread_id.value},
kernel_id_{record.kernel_id.handle},
kernel_name_{QueryKernelName(record)},
counter_infos_{QueryCounterInfos(record, session_id)} {}
void Write(barectf_profiler_ctx& barectf_ctx) const override {
barectf_profiler_trace_profiler_record(
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
counter_infos_.values.size(), counter_infos_.values.data());
}
protected:
// Counter infos.
//
// `names[i]` names the counter value `values[i]`.
struct CounterInfos final {
// `names_storage` owns the strings while the elements of `names`
// point to the internal C strings of `names_storage`.
//
// This is needed because barectf expects an array of contiguous
// C string pointers.
std::vector<std::string> names_storage;
std::vector<const char*> names;
// Counter values.
std::vector<std::uint64_t> values;
};
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
std::uint32_t GetProcessId() const noexcept { return process_id_; }
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
private:
// Queries and returns the kernel name of the record `record`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
const auto kernel_name = QueryAllocStr(
[&record](const auto size) {
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
},
[&record](const auto str) {
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
const_cast<const char**>(str));
});
if (kernel_name.size() <= 1) {
return {};
}
// Return truncated and demangled version.
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
}
// Queries and returns the counter infos of the record `record` and
// session ID `session_id`.
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id) {
if (!record.counters) {
// No counters.
return {};
}
CounterInfos infos;
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
auto& counter = record.counters[i];
if (counter.counter_handler.handle == 0) {
// Not available: continue.
continue;
}
// Query counter name size first
std::size_t counter_name_size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
if (counter_name_size == 0) {
// No size: continue.
continue;
}
// Query counter name (borrowed from `record`: no need to free).
const char* counter_name = nullptr;
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
counter.counter_handler, &counter_name);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
if (!counter_name) {
// Not available: continue.
continue;
}
// Push back infos.
infos.names_storage.emplace_back(counter_name);
infos.names.push_back(infos.names_storage.back().c_str());
infos.values.push_back(counter.value.value);
}
return infos;
}
std::uint64_t dispatch_;
std::uint64_t gpu_id_;
std::uint64_t queue_id_;
std::uint64_t queue_index_;
std::uint32_t process_id_;
std::uint32_t thread_id_;
std::uint64_t kernel_id_;
std::string kernel_name_;
CounterInfos counter_infos_;
};
// Profiler record base.
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
private:
// According to `plugin/file/file.cpp`:
//
// > Taken from rocprofiler: The size hasn't changed in recent past
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
public:
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id)
: ProfilerEventRecord{record, session_id},
grid_size_{record.kernel_properties.grid_size},
workgroup_size_{record.kernel_properties.workgroup_size},
lds_size_{
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
scratch_size_{record.kernel_properties.scratch_size},
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
sgpr_count_{record.kernel_properties.sgpr_count},
wave_size_{record.kernel_properties.wave_size},
signal_handle_{record.kernel_properties.signal_handle} {}
void Write(barectf_profiler_ctx& barectf_ctx) const override {
barectf_profiler_trace_profiler_record_with_kernel_properties(
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
}
private:
std::uint64_t grid_size_;
std::uint64_t workgroup_size_;
std::uint64_t lds_size_;
std::uint64_t scratch_size_;
std::uint64_t arch_vgpr_count_;
std::uint64_t accum_vgpr_count_;
std::uint64_t sgpr_count_;
std::uint64_t wave_size_;
std::uint64_t signal_handle_;
};
} // namespace
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
const fs::path& metadata_stream_path)
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
// Make sure the trace directory doesn't exist.
if (fs::exists(trace_dir)) {
std::ostringstream ss;
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
throw std::runtime_error{ss.str()};
}
// Make sure the metadata stream file exists.
if (!fs::exists(metadata_stream_path)) {
std::ostringstream ss;
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
throw std::runtime_error{ss.str()};
}
// Create trace directory.
if (!fs::create_directory(trace_dir)) {
std::ostringstream ss;
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
throw std::runtime_error{ss.str()};
}
// Copy adjusted metadata stream file to trace directory.
try {
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
} catch (const std::exception& exc) {
std::ostringstream ss;
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
throw std::runtime_error{ss.str()};
}
// Write HSA handle type event records.
WriteHsaHandleTypes();
}
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock{lock_};
// Depending on the domain, create and add an event record to the
// corresponding tracer.
switch (record.domain) {
case ACTIVITY_DOMAIN_ROCTX:
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
break;
case ACTIVITY_DOMAIN_HSA_API: {
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
break;
}
case ACTIVITY_DOMAIN_HSA_OPS:
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
break;
case ACTIVITY_DOMAIN_HIP_OPS:
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
break;
default:
// Warn
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
<< "ignoring record for unknown domain #" << record.domain << std::endl;
break;
}
}
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock{lock_};
profiler_tracer_.AddEventRecord(
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
}
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* const end,
const rocprofiler_session_id_t session_id,
const rocprofiler_buffer_id_t buffer_id) {
while (begin && begin < end) {
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
} else {
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
session_id);
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
}
void Plugin::WriteHsaHandleTypes() {
[[maybe_unused]] const auto status = hsa_iterate_agents(
[](const auto agent, const auto user_data) {
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
hsa_device_type_t type;
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
return HSA_STATUS_ERROR;
}
using Type = HsaHandleTypeEventRecord::Type;
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
tracer.AddEventRecord(std::move(event_record));
return HSA_STATUS_SUCCESS;
},
&hsa_handles_tracer_);
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
}
namespace {
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
// Samples the ROCMTools clock and returns the value.
std::uint64_t GetClkVal() {
rocprofiler_timestamp_t ts;
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
return ts.value;
}
// Updates `offset` and `delta`, if needed, to a more accurate clock
// class offset and a smaller ROCMTools clock value delta.
//
// This function samples the ROCMTools clock twice, also sampling the
// real-time clock in between, and uses the average ROCMTools clock
// value to approximate the actual clock class offset.
//
// This strategy is based on the measure_single_clock_offset() function
// of the LTTng-tools project <https://lttng.org/>.
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
// Sample ROCMTools clock (first time).
const auto rocm_clk_val1 = GetClkVal();
// Sample real-time clock.
timespec realtime_spec = {0, 0};
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
assert(ret == 0);
// Sample ROCMTools clock (second time).
const auto rocm_clk_val2 = GetClkVal();
// Compute the current ROCMTools clock value delta.
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
if (this_delta > delta) {
// Discard larger delta.
return;
}
// Compute the average ROCMTools clock value.
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
// Compute the real-time clock value in nanoseconds.
const auto realtime_ns =
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
// Update clock class offset and delta.
assert(rocm_clk_val_avg < realtime_ns);
offset = realtime_ns - rocm_clk_val_avg;
delta = this_delta;
}
// Computes and returns the most possible accurate clock class offset.
std::uint64_t GetMetadataClkClsOffset() {
std::uint64_t offset = 0;
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
// Best effort to find the most accurate offset.
for (auto i = 0U; i < 50U; ++i) {
UpdateClkClsOffsetAndDelta(offset, delta);
}
return offset;
}
} // namespace
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
const fs::path& trace_dir) {
// Load installed metadata stream file contents.
std::string metadata;
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
// Replace the original `offset` property.
{
static constexpr auto offset_term = "offset = 0;";
std::ostringstream ss;
ss << "offset = " << GetMetadataClkClsOffset() << ';';
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
}
// Write adjusted metadata stream to trace directory.
{
std::ofstream output{trace_dir / "metadata"};
output.write(metadata.data(), metadata.size());
}
}
} // namespace rocm_ctf
@@ -0,0 +1,146 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_PLUGIN_H
#define PLUGIN_CTF_PLUGIN_H
#include <mutex>
#include <cstdlib>
#include <experimental/filesystem>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "barectf.h"
#include "barectf_tracer.h"
namespace rocm_ctf {
// CTF plugin.
//
// Build a plugin instance, and then call HandleTracerRecord(),
// HandleProfilerRecord(), and HandleBufferRecords() to add event
// records.
//
// A plugin instance performs important tasks at destruction time.
class Plugin final {
public:
// Builds a plugin instance to write a CTF trace in the `trace_dir`
// directory with packets of size `packet_size` bytes.
//
// `trace_dir` must not exist.
//
// This constructor immediately adjusts and copies the metadata stream
// file `metadata_stream_path` to the trace directory (`trace_dir`).
explicit Plugin(std::size_t packet_size, const std::experimental::filesystem::path& trace_dir,
const std::experimental::filesystem::path& metadata_stream_path);
// Handles a tracer record.
void HandleTracerRecord(const rocprofiler_record_tracer_t& record,
rocprofiler_session_id_t session_id);
// Handles a profiler record.
void HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
rocprofiler_session_id_t session_id);
// Handles tracer or profiler records from `begin` to `end`
// (excluded).
void HandleBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id);
private:
// rocTX barectf platform descriptor.
struct RocTxPlatformDescr final {
using Ctx = barectf_roctx_ctx;
static void OpenPacket(Ctx& ctx) { barectf_roctx_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_roctx_close_packet(&ctx); }
};
// HSA API barectf platform descriptor.
struct HsaApiPlatformDescr final {
using Ctx = barectf_hsa_api_ctx;
static void OpenPacket(Ctx& ctx) { barectf_hsa_api_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_hsa_api_close_packet(&ctx); }
};
// HIP API barectf platform descriptor.
struct HipApiPlatformDescr final {
using Ctx = barectf_hip_api_ctx;
static void OpenPacket(Ctx& ctx) { barectf_hip_api_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_hip_api_close_packet(&ctx); }
};
// HSA handles barectf platform descriptor.
struct HsaHandlesPlatformDescr final {
using Ctx = barectf_hsa_handles_ctx;
static void OpenPacket(Ctx& ctx) { barectf_hsa_handles_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_hsa_handles_close_packet(&ctx); }
};
// API operations barectf platform descriptor.
struct ApiOpsPlatformDescr final {
using Ctx = barectf_api_ops_ctx;
static void OpenPacket(Ctx& ctx) { barectf_api_ops_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_api_ops_close_packet(&ctx); }
};
// Profiler barectf platform descriptor.
struct ProfilerPlatformDescr final {
using Ctx = barectf_profiler_ctx;
static void OpenPacket(Ctx& ctx) { barectf_profiler_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_profiler_close_packet(&ctx); }
};
// barectf tracer for HSA handle mappings.
using HsaHandlesTracer = BarectfTracer<HsaHandlesPlatformDescr>;
// Writes the HSA handle type mappings to a dedicated data stream
// file.
void WriteHsaHandleTypes();
// Loads the existing metadata stream file `metadata_stream_path`,
// adjusts the `offset` property of its single clock class, and writes
// the result to the `metadata` file within the `trace_dir` directory.
void CopyAdjustedMetadataStreamFile(
const std::experimental::filesystem::path& metadata_stream_path,
const std::experimental::filesystem::path& trace_dir);
// Dedicated tracers.
BarectfTracer<RocTxPlatformDescr> roctx_tracer_;
BarectfTracer<HsaApiPlatformDescr> hsa_api_tracer_;
BarectfTracer<HipApiPlatformDescr> hip_api_tracer_;
BarectfTracer<ApiOpsPlatformDescr> api_ops_tracer_;
HsaHandlesTracer hsa_handles_tracer_;
BarectfTracer<ProfilerPlatformDescr> profiler_tracer_;
// Locks any operation performed on the data of this.
std::mutex lock_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_PLUGIN_H
@@ -0,0 +1,7 @@
{
global: rocprofiler_plugin_initialize;
rocprofiler_plugin_finalize;
rocprofiler_plugin_write_buffer_records;
rocprofiler_plugin_write_record;
local: *;
};
@@ -0,0 +1,44 @@
# ###############################################################################
# # Copyright (c) 2022 Advanced Micro Devices, Inc.
# #
# # Permission is hereby granted, free of charge, to any person obtaining a copy
# # of this software and associated documentation files (the "Software"), to
# # deal in the Software without restriction, including without limitation the
# # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# # sell copies of the Software, and to permit persons to whom the Software is
# # furnished to do so, subject to the following conditions:
# #
# # The above copyright notice and this permission notice shall be included in
# # all copies or substantial portions of the Software.
# #
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# # IN THE SOFTWARE.
# ###############################################################################
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
file(GLOB FILE_SOURCES "*.cpp")
add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
set_target_properties(file_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_compile_definitions(file_plugin
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR})
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
target_link_libraries(file_plugin PRIVATE ${ROCPROFILER_TARGET} hsa-runtime64::hsa-runtime64 systemd stdc++fs amd_comgr dl)
install(TARGETS file_plugin LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT runtime)
@@ -0,0 +1,472 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cxxabi.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <experimental/filesystem>
#include <fstream>
#include <iostream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <hsa/hsa.h>
#include <mutex>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
namespace fs = std::experimental::filesystem;
namespace {
static std::string output_file_name;
class file_plugin_t {
private:
enum class output_type_t {
COUNTER,
TRACER,
PC_SAMPLING
};
class output_file_t {
public:
output_file_t(std::string name) : name_(std::move(name)) {}
std::string name() const { return name_; }
template <typename T> std::ostream& operator<<(T&& value) {
if (!is_open()) open();
return stream_ << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
if (!is_open()) open();
return stream_ << func;
}
void open() {
// If the stream is already in the failed state, there's no need to try
// to open the file.
if (fail()) return;
const char* output_dir = getenv("OUTPUT_PATH");
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) + "_" : "";
if (output_dir == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
return;
}
fs::path output_prefix(output_dir);
if (!fs::is_directory(fs::status(output_prefix))) {
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
std::stringstream ss;
ss << output_file_name << GetPid() << "_" << name_;
stream_.open(output_prefix / ss.str());
}
bool is_open() const { return stream_.is_open(); }
bool fail() const { return stream_.fail(); }
private:
const std::string name_;
std::ofstream stream_;
};
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
switch (output_type) {
case output_type_t::COUNTER:
return &output_file_;
case output_type_t::TRACER:
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return &roctx_file_;
case ACTIVITY_DOMAIN_HSA_API:
return &hsa_api_file_;
case ACTIVITY_DOMAIN_HIP_API:
return &hip_api_file_;
case ACTIVITY_DOMAIN_HIP_OPS:
return &hip_activity_file_;
case ACTIVITY_DOMAIN_HSA_OPS:
return &hsa_async_copy_file_;
default:
assert(!"domain/op not supported!");
break;
}
break;
case output_type_t::PC_SAMPLING:
return &pc_sample_file_;
}
return nullptr;
}
public:
file_plugin_t() {
output_file_t hsa_handles("hsa_handles.txt");
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
[](hsa_agent_t agent, void* user_data) {
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
hsa_device_type_t type;
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
*file << std::hex << std::showbase << agent.handle << " agent "
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << std::endl;
return HSA_STATUS_SUCCESS;
},
&hsa_handles);
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
if (hsa_handles.fail()) {
rocmtools::warning("Cannot write to '%s'", hsa_handles.name().c_str());
return;
}
// App begin timestamp begin_ts_file.txt
output_file_t begin_ts("begin_ts_file.txt");
[[maybe_unused]] rocprofiler_timestamp_t app_begin_timestamp = {};
CHECK_ROCMTOOLS(rocprofiler_get_timestamp(&app_begin_timestamp));
begin_ts << std::dec << app_begin_timestamp.value << std::endl;
if (begin_ts.fail()) {
rocmtools::warning("Cannot write to '%s'", begin_ts.name().c_str());
return;
}
valid_ = true;
}
std::mutex writing_lock;
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
std::lock_guard<std::mutex> lock(writing_lock);
std::string kernel_name;
std::string function_name;
std::string roctx_message;
uint64_t roctx_id;
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
if (tracer_record.api_data_handle.handle &&
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
kernel_name = rocmtools::cxx_demangle(
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = (char*)malloc(function_name_size);
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = (char*)malloc(function_name_size);
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
size_t kernel_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
if (kernel_name_size > 1) {
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
}
}
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
size_t roctx_message_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
[[maybe_unused]] char* roctx_message_str =
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
&roctx_id_size));
if (roctx_id_size > 1) {
[[maybe_unused]] char* roctx_id_str =
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
}
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
*output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
<< GetDomainName(tracer_record.domain) << "), Begin("
<< tracer_record.timestamps.begin.value << "), End("
<< tracer_record.timestamps.end.value << "), Correlation ID( "
<< tracer_record.correlation_id.value << ")";
if (roctx_id >= 0) *output_file << ", ROCTX ID(" << roctx_id << ")";
if (roctx_message.size() > 1) *output_file << ", ROCTX Message(" << roctx_message << ")";
if (function_name.size() > 1) *output_file << ", Function(" << function_name << ")";
if (kernel_name.size() > 1) *output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
*output_file << std::endl;
}
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
std::lock_guard<std::mutex> lock(writing_lock);
size_t name_length = 0;
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::COUNTER);
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &name_length));
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
const char* kernel_name_c;
if (name_length > 1) {
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
&kernel_name_c));
}
*output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
<< "], " << std::string("gpu_id(")
<< std::to_string(profiler_record->gpu_id.handle) << "), "
<< std::string("queue_id(") << std::to_string(profiler_record->queue_id.handle)
<< "), " << std::string("queue_index(")
<< std::to_string(profiler_record->queue_idx.value) << "), " << std::string("pid(")
<< std::to_string(GetPid()) << "), " << std::string("tid(")
<< std::to_string(profiler_record->thread_id.value) << ")";
*output_file << ", " << std::string("grd(")
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
<< std::string("wgr(")
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
<< std::string("lds(")
<< std::to_string(
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1)))
<< "), " << std::string("scr(")
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
<< std::string("arch_vgpr(")
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
<< std::string("accum_vgpr(")
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
<< std::string("sgpr(")
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
<< std::string("wave_size(")
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
<< std::string("sig(")
<< std::to_string(profiler_record->kernel_properties.signal_handle);
std::string kernel_name = "";
if (name_length > 1) {
kernel_name = rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name_c));
}
*output_file << "), " << std::string("obj(")
<< std::to_string(profiler_record->kernel_id.handle) << "), "
<< std::string("kernel-name(\"") << kernel_name << "\")"
<< std::string(", start_time(")
<< std::to_string(profiler_record->timestamps.begin.value) << ")"
<< std::string(", end_time(")
<< std::to_string(profiler_record->timestamps.end.value) << ")";
// For Counters
*output_file << std::endl;
if (profiler_record->counters) {
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
if (profiler_record->counters[i].counter_handler.handle > 0) {
size_t counter_name_length = 0;
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
&counter_name_length));
if (counter_name_length > 1) {
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_counter_info(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
&name_c));
*output_file << ", " << name_c << " ("
<< std::to_string(profiler_record->counters[i].value.value) << ")"
<< std::endl;
}
}
}
}
}
void FlushPCSamplingRecord(
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::PC_SAMPLING);
const auto &sample = pc_sampling_record->pc_sample;
*output_file << "dispatch[" << sample.dispatch_id.value << "], "
<< "timestamp(" << sample.timestamp.value << "), "
<< "gpu_id(" << sample.gpu_id.handle << "), "
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
<< "se(" << sample.se << ')'
<< std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
const rocprofiler_record_profiler_t* profiler_record =
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
FlushProfilerRecord(profiler_record, session_id, buffer_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id, buffer_id);
break;
}
case ROCPROFILER_PC_SAMPLING_RECORD: {
const rocprofiler_record_pc_sample_t *pc_sampling_record =
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
FlushPCSamplingRecord(pc_sampling_record);
break;
}
default:
break;
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
bool is_valid() const { return valid_; }
private:
bool valid_{false};
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
output_file_{"results.txt"};
};
file_plugin_t* file_plugin = nullptr;
} // namespace
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
return -1;
if (file_plugin != nullptr) return -1;
file_plugin = new file_plugin_t();
if (file_plugin->is_valid()) return 0;
// The plugin failed to initialied, destroy it and return an error.
delete file_plugin;
file_plugin = nullptr;
return -1;
}
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
if (!file_plugin) return;
delete file_plugin;
file_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
if (record.header.id.handle == 0) return 0;
file_plugin->FlushTracerRecord(record, session_id);
return 0;
}
@@ -0,0 +1,27 @@
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
add_library(perfetto_plugin
${LIBRARY_TYPE} ${ROCPROFILER_UTIL_SRC_FILES}
perfetto.cpp perfetto_sdk/sdk/perfetto.cc)
set_target_properties(perfetto_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_compile_definitions(perfetto_plugin
PRIVATE HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_HCC__=1)
target_include_directories(perfetto_plugin
PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/plugin/perfetto/perfetto_sdk/sdk)
target_link_options(perfetto_plugin
PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
target_link_libraries(perfetto_plugin PRIVATE ${ROCPROFILER_TARGET} Threads::Threads systemd stdc++fs amd_comgr)
install(TARGETS perfetto_plugin LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT runtime)
@@ -0,0 +1,804 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "rocprofiler.h"
#include <cassert>
#include <condition_variable>
#include <cstdint>
#include <cstdlib>
#include <experimental/filesystem>
#include <fstream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <functional>
#include <iostream>
#include <cxxabi.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <systemd/sd-id128.h>
#include "perfetto_sdk/sdk/perfetto.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
#define STREAM_CONSTANT 98736677
#define QUEUE_CONSTANT 18746479
namespace fs = std::experimental::filesystem;
PERFETTO_DEFINE_CATEGORIES(
perfetto::Category("GENERIC").SetDescription("GENERAL_CATEGORY"),
perfetto::Category("ROCTX_API").SetDescription("ACTIVITY_DOMAIN_ROCTX_API"),
perfetto::Category("HSA_API").SetDescription("ACTIVITY_DOMAIN_HSA_API"),
perfetto::Category("HIP_API").SetDescription("ACTIVITY_DOMAIN_HIP_API"),
perfetto::Category("External_API").SetDescription("ACTIVITY_DOMAIN_EXT_API"),
perfetto::Category("HIP_OPS").SetDescription("ACTIVITY_DOMAIN_HIP_OPS"),
perfetto::Category("HSA_OPS").SetDescription("ACTIVITY_DOMAIN_HSA_OPS"),
perfetto::Category("KERNELS").SetDescription("KERNEL_DISPATCHES"),
perfetto::Category("COUNTERS").SetDescription("PERFORMANCE_COUNTERS"));
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
namespace {
std::string process_name;
static std::string output_file_name;
std::string get_kernel_name(rocprofiler_record_profiler_t& profiler_record) {
std::string kernel_name = "";
size_t name_length = 1;
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
&name_length));
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#pragma GCC diagnostic ignored "-Wstringop-overread"
if (name_length > 1) {
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
&kernel_name_c));
if (kernel_name_c && strlen(kernel_name_c) > 1)
kernel_name = rocmtools::cxx_demangle(strdup(kernel_name_c));
}
#pragma GCC diagnostic pop
return kernel_name;
}
class perfetto_plugin_t {
public:
perfetto_plugin_t() {
const char* output_dir = getenv("OUTPUT_PATH");
const char* temp_file_name = getenv("OUT_FILE_NAME");
output_file_name = temp_file_name ? std::string(temp_file_name) + "_" : "";
if (output_dir == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
return;
}
output_prefix_ = output_dir;
if (!fs::is_directory(fs::status(output_prefix_))) {
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
perfetto::TracingInitArgs args;
args.backends |= perfetto::kInProcessBackend;
perfetto::Tracing::Initialize(args);
perfetto::TrackEvent::Register();
perfetto::protos::gen::TrackEventConfig track_event_cfg;
track_event_cfg.add_disabled_categories("*");
track_event_cfg.add_enabled_categories("GENERIC");
track_event_cfg.add_enabled_categories("ROCTX_API");
track_event_cfg.add_enabled_categories("HSA_API");
track_event_cfg.add_enabled_categories("HIP_API");
track_event_cfg.add_enabled_categories("External_API");
track_event_cfg.add_enabled_categories("HIP_OPS");
track_event_cfg.add_enabled_categories("HSA_OPS");
track_event_cfg.add_enabled_categories("KERNELS");
track_event_cfg.add_enabled_categories("COUNTERS");
perfetto::TraceConfig trace_cfg;
auto buffer_cfg = trace_cfg.add_buffers();
uint32_t max_buffer_size = 10 * 1024 * 1024; // Default max buffer size is 10 GB
const char* max_buffer_size_str = getenv("rocprofiler_PERFETTO_MAX_BUFFER_SIZE_KIB");
if (max_buffer_size_str && std::atol(max_buffer_size_str) > 0)
max_buffer_size = std::atol(max_buffer_size_str);
// Record up to max buffer size determined by user or the 10 GB (default value)
buffer_cfg->set_size_kb(max_buffer_size);
auto* data_source_cfg = trace_cfg.add_data_sources()->mutable_config();
data_source_cfg->set_name("track_event");
data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace");
file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
if (file_descriptor_ == -1) rocmtools::warning("Can't open output file\n");
tracing_session_ = perfetto::Tracing::NewTrace();
tracing_session_->Setup(trace_cfg, file_descriptor_);
tracing_session_->StartBlocking();
hostname_[1023] = '\0';
gethostname(hostname_, 1023);
sd_id128_t ret;
char machine_id[SD_ID128_STRING_MAX];
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
assert(status == 0 && "Error: Couldn't get machine id!");
if (sd_id128_to_string(ret, machine_id)) machine_id_ = std::hash<std::string>{}(machine_id);
{
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
process_name =
perfetto::ProcessTrack::Current().Serialize().mutable_process()->process_name();
auto process_track_desc = perfetto::ProcessTrack::Current().Serialize();
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
}
}
std::string thread_track_str =
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
process_track_desc.mutable_process()->set_process_name(thread_track_str);
perfetto::TrackEvent::SetTrackDescriptor(perfetto::ProcessTrack::Current(),
process_track_desc);
perfetto::ProcessTrack::Current().Serialize().set_uuid(track_id);
thread_tracks_.emplace(GetPid(), perfetto::ProcessTrack::Current());
}
is_valid_ = true;
}
~perfetto_plugin_t() {
if (is_valid_) {
tracing_session_->StopBlocking();
close(file_descriptor_);
}
}
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
std::mutex writing_lock;
int FlushProfilerRecord(rocprofiler_record_profiler_t profiler_record,
rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(writing_lock);
// ToDO: rename this variable?
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
int device_id = profiler_record.gpu_id.handle;
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
{
std::lock_guard<std::mutex> lock(device_tracks_lock_);
device_track_it = device_tracks_.find(device_id);
if (device_track_it == device_tracks_.end()) {
/* Create a new perfetto::Track (Sub-Track) */
device_track_it =
device_tracks_
.emplace(device_id, perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
.first;
auto gpu_desc = device_track_it->second.Serialize();
gpu_desc.mutable_process()->set_pid(device_id);
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
gpu_desc.mutable_process()->set_process_name(gpu_str);
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
track_ids_used_.emplace_back(device_id + 1 + machine_id_);
}
}
auto& gpu_track = device_track_it->second;
std::pair<int, uint64_t> gpu_queue_id =
std::make_pair(device_id, profiler_record.queue_id.handle);
auto queue_track_it = queue_tracks_.find(gpu_queue_id.first);
{
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
if (queue_track_it == queue_tracks_.end()) {
/* Create a new perfetto::Track */
queue_track_it = queue_tracks_
.emplace(gpu_queue_id.first,
perfetto::Track((profiler_record.queue_id.handle + 1 +
profiler_record.gpu_id.handle) *
QUEUE_CONSTANT * machine_id_ * GetPid(),
gpu_track))
.first;
auto queue_desc = queue_track_it->second.Serialize();
std::string queue_str =
rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(), gpu_queue_id.second);
queue_desc.set_name(queue_str);
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
}
track_ids_used_.emplace_back(profiler_record.queue_id.handle + machine_id_ + 1 +
profiler_record.gpu_id.handle);
}
auto& queue_track = queue_track_it->second;
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
std::string full_kernel_name = get_kernel_name(profiler_record);
// std::string truncated_kernel_name = rocmtools::truncate_name(full_kernel_name);
// perfetto::StaticString kernel_name(truncated_kernel_name.c_str());
TRACE_EVENT_BEGIN("KERNELS", perfetto::StaticString(full_kernel_name.c_str()), queue_track,
profiler_record.timestamps.begin.value, "Full Kernel Name",
full_kernel_name.c_str(), "Agent ID", device_id, "Queue ID",
profiler_record.queue_id.handle, "GRD",
profiler_record.kernel_properties.grid_size, "WGR",
profiler_record.kernel_properties.workgroup_size, "LDS",
(((profiler_record.kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1))),
"SCR", profiler_record.kernel_properties.scratch_size, "Arch. VGPR",
profiler_record.kernel_properties.arch_vgpr_count, "Accumilative Vgpr",
profiler_record.kernel_properties.accum_vgpr_count, "SGPR",
profiler_record.kernel_properties.sgpr_count, "Wave Size",
profiler_record.kernel_properties.wave_size, "Signal",
profiler_record.kernel_properties.signal_handle);
TRACE_EVENT_END("KERNELS", queue_track, profiler_record.timestamps.end.value);
auto get_counter_track_fn = [&](std::string counter_name) {
std ::string counter_track_id =
std::to_string(machine_id_) + std::to_string(GetPid()) + counter_name;
std::pair<int, std::string> gpu_counter_track_id = std::make_pair(device_id, counter_name);
std::unordered_map<std::string, perfetto::CounterTrack>::iterator counters_track_it;
{
std::lock_guard<std::mutex> lock(counter_tracks_lock_);
counters_track_it = counter_tracks_.find(gpu_counter_track_id.second);
if (counters_track_it == counter_tracks_.end()) {
/* Create a new perfetto::Track */
counters_track_it =
counter_tracks_
.emplace(gpu_counter_track_id.second,
perfetto::CounterTrack(counter_track_id.c_str(), gpu_track))
.first;
auto counter_track_desc = counters_track_it->second.Serialize();
std::string counter_track_str = "Process ID " + std::to_string(GetPid()) + " - Counter " +
gpu_counter_track_id.second;
counter_track_desc.set_name(counter_track_str);
perfetto::TrackEvent::SetTrackDescriptor(counters_track_it->second, counter_track_desc);
}
}
return counters_track_it->second;
};
// For Counters
if (profiler_record.counters) {
for (uint64_t i = 0; i < profiler_record.counters_count.value; i++) {
if (profiler_record.counters[i].counter_handler.handle > 0) {
size_t name_length = 0;
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record.counters[i].counter_handler,
&name_length));
if (name_length > 1) {
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(
rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
profiler_record.counters[i].counter_handler, &name_c));
perfetto::CounterTrack counters_track = get_counter_track_fn(std::string(name_c));
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.begin.value,
profiler_record.counters[i].value.value);
// Added an extra zero event for maintaining start-end of the counter
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0.001);
}
}
}
}
return 0;
}
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(writing_lock);
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
std::string kernel_name;
char* function_name;
char* activity_name;
std::string roctx_message;
uint64_t roctx_id = 0;
uint64_t thread_id = tracer_record.thread_id.value;
std::unordered_map<uint64_t, perfetto::Track>::iterator thread_track_it;
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS ||
tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS) {
int device_id = tracer_record.agent_id.handle;
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS && device_id > 0) device_id--;
{
std::lock_guard<std::mutex> lock(device_tracks_lock_);
device_track_it = device_tracks_.find(device_id);
if (device_track_it == device_tracks_.end()) {
/* Create a new perfetto::Track (Sub-Track) */
device_track_it =
device_tracks_
.emplace(device_id,
perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
.first;
auto gpu_desc = device_track_it->second.Serialize();
gpu_desc.mutable_process()->set_pid(device_id);
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
gpu_desc.mutable_process()->set_process_name(gpu_str);
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
track_ids_used_.emplace_back(1 + machine_id_ + device_id);
}
}
} else {
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
thread_track_it = thread_tracks_.find(thread_id);
if (thread_track_it == thread_tracks_.end()) {
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
}
}
thread_track_it =
thread_tracks_.emplace(thread_id, perfetto::ProcessTrack::Global(track_id)).first;
auto thread_track_desc = thread_track_it->second.Serialize();
std::string thread_track_str =
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
thread_track_desc.mutable_process()->set_pid(thread_id);
thread_track_desc.mutable_process()->set_process_name(thread_track_str);
perfetto::TrackEvent::SetTrackDescriptor(thread_track_it->second, thread_track_desc);
}
}
auto& thread_track = thread_track_it->second;
auto& gpu_track = device_track_it->second;
switch (tracer_record.domain) {
case ACTIVITY_DOMAIN_ROCTX: {
std::unordered_map<uint64_t, perfetto::Track>::iterator roctx_track_it;
{
std::lock_guard<std::mutex> lock(roctx_tracks_lock_);
roctx_track_it = roctx_tracks_.find(thread_id);
if (roctx_track_it == roctx_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
std::memory_order_acquire);
}
}
roctx_track_it =
roctx_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
auto roctx_track_desc = roctx_track_it->second.Serialize();
std::string roctx_track_str = rocmtools::string_printf("ROCTX Markers");
roctx_track_desc.set_name(roctx_track_str);
perfetto::TrackEvent::SetTrackDescriptor(roctx_track_it->second, roctx_track_desc);
}
}
auto& roctx_track = roctx_track_it->second;
size_t roctx_message_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
char* roctx_message_str = static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_size));
if (roctx_id_size > 1) {
char* roctx_id_str = static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
if (tracer_record.operation_id.id == 1) {
perfetto::StaticString roctx_message_pft(
(!roctx_message.empty() ? roctx_message.c_str() : ""));
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track,
tracer_record.timestamps.begin.value, "Timestamp(ns)",
tracer_record.timestamps.begin.value, "RocTx ID", roctx_id);
roctx_track_entries_++;
} else {
TRACE_EVENT_END("ROCTX_API", roctx_track, tracer_record.timestamps.begin.value);
roctx_track_entries_--;
}
break;
}
case ACTIVITY_DOMAIN_HSA_API: {
std::unordered_map<uint64_t, perfetto::Track>::iterator hsa_track_it;
{
std::lock_guard<std::mutex> lock(hsa_tracks_lock_);
hsa_track_it = hsa_tracks_.find(thread_id);
if (hsa_track_it == hsa_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
std::memory_order_acquire);
}
}
hsa_track_it =
hsa_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
auto hsa_track_desc = hsa_track_it->second.Serialize();
std::string hsa_track_str = rocmtools::string_printf("HSA API");
hsa_track_desc.set_name(hsa_track_str);
perfetto::TrackEvent::SetTrackDescriptor(hsa_track_it->second, hsa_track_desc);
}
}
auto& hsa_track = hsa_track_it->second;
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name));
}
TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track,
tracer_record.timestamps.begin.value,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
TRACE_EVENT_END("HSA_API", hsa_track, tracer_record.timestamps.end.value);
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
std::unordered_map<uint64_t, perfetto::Track>::iterator hip_track_it;
{
std::lock_guard<std::mutex> lock(hip_tracks_lock_);
hip_track_it = hip_tracks_.find(thread_id);
if (hip_track_it == hip_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
std::memory_order_acquire);
}
}
hip_track_it =
hip_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
auto hip_track_desc = hip_track_it->second.Serialize();
std::string hip_track_str = rocmtools::string_printf("HIP API");
hip_track_desc.set_name(hip_track_str);
perfetto::TrackEvent::SetTrackDescriptor(hip_track_it->second, hip_track_desc);
}
}
auto& hip_track = hip_track_it->second;
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name));
}
size_t kernel_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
char* kernel_name_str;
if (kernel_name_size > 1) {
kernel_name_str = static_cast<char*>(malloc(kernel_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) {
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
free(kernel_name_str);
}
}
if (kernel_name.size() > 0) {
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
tracer_record.timestamps.begin.value, "Kernel Name", kernel_name,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
} else {
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
tracer_record.timestamps.begin.value,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
}
TRACE_EVENT_END("HIP_API", hip_track, tracer_record.timestamps.end.value);
break;
}
case ACTIVITY_DOMAIN_EXT_API: {
printf("Warning: External API is not supported!\n");
break;
}
case ACTIVITY_DOMAIN_HIP_OPS: {
uint64_t stream_id = 0;
size_t stream_id_str_size = 0;
char* stream_id_str;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
&stream_id_str_size));
if (stream_id_str_size > 1) {
stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
&stream_id_str));
if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str);
}
std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
{
std::lock_guard<std::mutex> lock(stream_tracks_lock_);
stream_track_it = stream_tracks_.find(stream_id);
if (stream_track_it == stream_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id = ((1 + stream_id + tracer_record.agent_id.handle) * machine_id_ *
STREAM_CONSTANT * GetPid());
stream_track_it =
stream_tracks_.emplace(stream_id, perfetto::Track(track_id, gpu_track)).first;
auto stream_desc = stream_track_it->second.Serialize();
std::string stream_str =
rocmtools::string_printf("Process ID: %lu Stream %d", GetPid(), stream_id);
stream_desc.set_name(stream_str);
perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
track_ids_used_.emplace_back(1 + machine_id_ + tracer_record.agent_id.handle);
}
}
auto& stream_track = stream_track_it->second;
if (tracer_record.api_data_handle.handle && tracer_record.api_data_handle.size > 1) {
kernel_name = rocmtools::cxx_demangle(
strdup(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)));
TRACE_EVENT_BEGIN(
"HIP_OPS",
perfetto::StaticString(strdup(rocmtools::truncate_name(kernel_name).c_str())),
stream_track, tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(), "Kernel Name", kernel_name,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
} else {
size_t activity_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name_size));
if (activity_name_size > 1) {
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name));
} else {
activity_name = const_cast<char*>(std::string("N/A").c_str());
}
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
}
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
break;
}
case ACTIVITY_DOMAIN_HSA_OPS: {
std::pair<int, uint64_t> gpu_queue_id =
std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
{
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
if (queue_track_it == queue_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
((1 + tracer_record.queue_id.handle + tracer_record.agent_id.handle) * machine_id_ *
QUEUE_CONSTANT * GetPid());
queue_track_it =
queue_tracks_.emplace(gpu_queue_id.first, perfetto::Track(track_id, gpu_track))
.first;
auto queue_desc = queue_track_it->second.Serialize();
std::string queue_str = rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(),
gpu_queue_id.second);
queue_desc.set_name(queue_str);
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
}
track_ids_used_.emplace_back(tracer_record.queue_id.handle + machine_id_ + 1 +
tracer_record.agent_id.handle);
}
auto& queue_track = queue_track_it->second;
size_t activity_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name_size));
if (activity_name_size > 1) {
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name));
}
TRACE_EVENT_BEGIN("HSA_OPS", perfetto::StaticString(activity_name), queue_track,
tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Queue ID", tracer_record.queue_id.handle,
"Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
TRACE_EVENT_END("HSA_OPS", queue_track, tracer_record.timestamps.end.value);
break;
}
default: {
rocmtools::warning("ignored record for domain %d", tracer_record.domain);
break;
}
}
return 0;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
rocprofiler_record_profiler_t* profiler_record = const_cast<rocprofiler_record_profiler_t*>(
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin));
FlushProfilerRecord(*profiler_record, session_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id);
break;
}
default:
break;
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
bool IsValid() const { return is_valid_; }
private:
fs::path output_prefix_;
std::unique_ptr<perfetto::TracingSession> tracing_session_;
int file_descriptor_;
bool is_valid_{false};
size_t roctx_track_entries_{0};
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
std::unordered_map<uint64_t, uint64_t> stream_ids_;
// Callback Tracks
std::unordered_map<uint64_t, perfetto::Track> thread_tracks_;
std::unordered_map<uint64_t, perfetto::Track> roctx_tracks_, hsa_tracks_, hip_tracks_,
hip_ext_tracks_;
// Activity Tracks
std::unordered_map<int, perfetto::Track> device_tracks_;
std::unordered_map<int, perfetto::Track> queue_tracks_, stream_tracks_;
std::unordered_map<std::string, perfetto::CounterTrack> counter_tracks_;
std::atomic<uint64_t> track_counter_{GetPid()};
std::vector<uint64_t> track_ids_used_;
std::mutex stream_ids_lock_, thread_tracks_lock_, roctx_tracks_lock_, hsa_tracks_lock_,
hip_tracks_lock_, hip_ext_tracks_lock_, device_tracks_lock_, queue_tracks_lock_,
stream_tracks_lock_, counter_tracks_lock_;
char hostname_[1024];
uint64_t machine_id_;
std::ofstream stream_;
};
perfetto_plugin_t* perfetto_plugin = nullptr;
} // namespace
int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
return -1;
if (perfetto_plugin != nullptr) return -1;
perfetto_plugin = new perfetto_plugin_t();
if (perfetto_plugin->IsValid()) return 0;
delete perfetto_plugin;
perfetto_plugin = nullptr;
return -1;
}
void rocprofiler_plugin_finalize() {
if (!perfetto_plugin) return;
delete perfetto_plugin;
perfetto_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
if (record.header.id.handle == 0) return 0;
perfetto_plugin->FlushTracerRecord(record, session_id);
return 0;
}
@@ -0,0 +1,189 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright (c) 2017, The Android Open Source Project
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
@@ -0,0 +1,35 @@
# Global OWNERS that can approve Perfetto changes.
# Please look first at OWNERS in the various subdirectories before falling back
# on this, as the former tend to be more brain-cache-hot.
# Perfetto tracing internals and API/ABI boundaries.
primiano@google.com
skyostil@google.com
# UI, Ftrace interop, traced_probes, protozero, Android internals.
hjd@google.com
# Trace Processor, metrics, infra.
lalitm@google.com
# Callstack / memory profilers, traced_probes & Linux internals.
ddiproietto@google.com
rsavitski@google.com
# Chromium-related things and tracing SDK.
eseckler@google.com
nuskos@google.com
oysteine@google.com
# Most Android-related metrics.
ilkos@google.com
# fmayer@ left the team. Please try first rsavitski@, ddiproietto@ or primiano@
# and leave fmayer@ as an emergency-only escalation on profilers.
fmayer@google.com
# chromium.org aliases for adding DEPS entries from chromium subprojects to
# third_party/perfetto.
eseckler@chromium.org
nuskos@chromium.org
skyostil@chromium.org
@@ -0,0 +1,394 @@
# Tracing SDK
The Perfetto Tracing SDK is a C++11 library that allows userspace applications
to emit trace events and add more app-specific context to a Perfetto trace.
When using the Tracing SDK there are two main aspects to consider:
1. Whether you are interested only in tracing events coming from your own app
or want to collect full-stack traces that overlay app trace events with
system trace events like scheduler traces, syscalls or any other Perfetto
data source.
2. For app-specific tracing, whether you need to trace simple types of timeline
events (e.g., slices, counters) or need to define complex data sources with a
custom strongly-typed schema (e.g., for dumping the state of a subsystem of
your app into the trace).
For Android-only instrumentation, the advice is to keep using the existing
[android.os.Trace (SDK)][atrace-sdk] / [ATrace_* (NDK)][atrace-ndk] if they
are sufficient for your use cases. Atrace-based instrumentation is fully
supported in Perfetto.
See the [Data Sources -> Android System -> Atrace Instrumentation][atrace-ds]
for details.
## Getting started
TIP: The code from these examples is also available [in the
repository](/examples/sdk/README.md).
To start using the Client API, first check out the latest SDK release:
```bash
git clone https://android.googlesource.com/platform/external/perfetto -b v23.0
```
The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
an amalgamation of the Client API designed to easy to integrate to existing
build systems. The sources are self-contained and require only a C++11 compliant
standard library.
For example, to add the SDK to a CMake project, edit your CMakeLists.txt:
```cmake
cmake_minimum_required(VERSION 3.13)
project(PerfettoExample)
find_package(Threads)
# Define a static library for Perfetto.
include_directories(perfetto/sdk)
add_library(perfetto STATIC perfetto/sdk/perfetto.cc)
# Link the library to your main executable.
add_executable(example example.cc)
target_link_libraries(example perfetto ${CMAKE_THREAD_LIBS_INIT})
```
Next, initialize Perfetto in your program:
```C++
#include <perfetto.h>
int main(int argc, char** argv) {
perfetto::TracingInitArgs args;
// The backends determine where trace events are recorded. You may select one
// or more of:
// 1) The in-process backend only records within the app itself.
args.backends |= perfetto::kInProcessBackend;
// 2) The system backend writes events into a system Perfetto daemon,
// allowing merging app and system events (e.g., ftrace) on the same
// timeline. Requires the Perfetto `traced` daemon to be running (e.g.,
// on Android Pie and newer).
args.backends |= perfetto::kSystemBackend;
perfetto::Tracing::Initialize(args);
}
```
You are now ready to instrument your app with trace events.
## Custom data sources vs Track events
The SDK offers two abstraction layers to inject tracing data, built on top of
each other, which trade off code complexity vs expressive power:
[track events](#track-events) and [custom data sources](#custom-data-sources).
### Track events
Track events are the suggested option when dealing with app-specific tracing as
they take care of a number of subtleties (e.g., thread safety, flushing, string
interning).
Track events are time bounded events (e.g., slices, counter) based on simple
`TRACE_EVENT` annotation tags in the codebase, like this:
```c++
#include <perfetto.h>
PERFETTO_DEFINE_CATEGORIES(
perfetto::Category("rendering")
.SetDescription("Events from the graphics subsystem"),
perfetto::Category("network")
.SetDescription("Network upload and download statistics"));
...
int main(int argc, char** argv) {
...
perfetto::Tracing::Initialize(args);
perfetto::TrackEvent::Register();
}
...
void LayerTreeHost::DoUpdateLayers() {
TRACE_EVENT("rendering", "LayerTreeHost::DoUpdateLayers");
...
for (PictureLayer& pl : layers) {
TRACE_EVENT("rendering", "PictureLayer::Update");
pl.Update();
}
}
```
Which are rendered in the UI as follows:
![Track event example](/docs/images/track-events.png)
Track events are the best default option and serve most tracing use cases with
very little complexity.
To include your new track events in the trace, ensure that the `track_event`
data source is included in the trace config. If you do not specify any
categories then all non-debug categories will be included by default. However,
you can also add just the categories you are interested in like so:
```protobuf
data_sources {
config {
name: "track_event"
track_event_config {
enabled_categories: "rendering"
}
}
}
```
See the [Track events page](track-events.md) for full instructions.
### Custom data sources
For most uses, track events are the most straightforward way of instrumenting
apps for tracing. However, in some rare circumstances they are not
flexible enough, e.g., when the data doesn't fit the notion of a track or is
high volume enough that it needs a strongly typed schema to minimize the size of
each event. In this case, you can implement a *custom data source* for
Perfetto.
Unlike track events, when working with custom data sources, you will also need
corresponding changes in [trace processor](/docs/analysis/trace-processor.md)
to enable importing your data format.
A custom data source is a subclass of `perfetto::DataSource`. Perfetto will
automatically create one instance of the class for each tracing session it is
active in (usually just one).
```C++
class CustomDataSource : public perfetto::DataSource<CustomDataSource> {
public:
void OnSetup(const SetupArgs&) override {
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
}
void OnStart(const StartArgs&) override {
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
}
void OnStop(const StopArgs&) override {
// Undo any initialization done in OnStart.
}
// Data sources can also have per-instance state.
int my_custom_state = 0;
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
```
The data source's static data should be defined in one source file like this:
```C++
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
```
Custom data sources need to be registered with Perfetto:
```C++
int main(int argc, char** argv) {
...
perfetto::Tracing::Initialize(args);
// Add the following:
perfetto::DataSourceDescriptor dsd;
dsd.set_name("com.example.custom_data_source");
CustomDataSource::Register(dsd);
}
```
As with all data sources, the custom data source needs to be specified in the
trace config to enable tracing:
```C++
perfetto::TraceConfig cfg;
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("com.example.custom_data_source");
```
Finally, call the `Trace()` method to record an event with your custom data
source. The lambda function passed to that method will only be called if tracing
is enabled. It is always called synchronously and possibly multiple times if
multiple concurrent tracing sessions are active.
```C++
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
auto packet = ctx.NewTracePacket();
packet->set_timestamp(perfetto::TrackEvent::GetTraceTimeNs());
packet->set_for_testing()->set_str("Hello world!");
});
```
If necessary the `Trace()` method can access the custom data source state
(`my_custom_state` in the example above). Doing so, will take a mutex to
ensure data source isn't destroyed (e.g., because of stopping tracing) while
the `Trace()` method is called on another thread. For example:
```C++
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
auto safe_handle = trace_args.GetDataSourceLocked(); // Holds a RAII lock.
DoSomethingWith(safe_handle->my_custom_state);
});
```
## In-process vs System mode
The two modes are not mutually exclusive. An app can be configured to work
in both modes and respond both to in-process tracing requests and system
tracing requests. Both modes generate the same trace file format.
### In-process mode
In this mode both the perfetto service and the app-defined data sources are
hosted fully in-process, in the same process of the profiled app. No connection
to the system `traced` daemon will be attempted.
In-process mode can be enabled by setting
`TracingInitArgs.backends = perfetto::kInProcessBackend` when initializing the
SDK, see examples below.
This mode is used to generate traces that contain only events emitted by
the app, but not other types of events (e.g. scheduler traces).
The main advantage is that by running fully in-process, it doesn't require any
special OS privileges and the profiled process can control the lifecycle of
tracing sessions.
This mode is supported on Android, Linux, MacOS and Windows.
### System mode
In this mode the app-defined data sources will connect to the external `traced`
service using the [IPC over UNIX socket][ipc].
System mode can be enabled by setting
`TracingInitArgs.backends = perfetto::kSystemBackend` when initializing the SDK,
see examples below.
The main advantage of this mode is that it is possible to create fused traces where
app events are overlaid on the same timeline of OS events. This enables
full-stack performance investigations, looking all the way through syscalls and
kernel scheduling events.
The main limitation of this mode is that it requires the external `traced` daemon
to be up and running and reachable through the UNIX socket connection.
This is suggested for local debugging or lab testing scenarios where the user
(or the test harness) can control the OS deployment (e.g., sideload binaries on
Android).
When using system mode, the tracing session must be controlled from the outside,
using the `perfetto` command-line client
(See [reference](/docs/reference/perfetto-cli)). This is because when collecting
system traces, tracing data producers are not allowed to read back the trace
data as it might disclose information about other processes and allow
side-channel attacks.
* On Android 9 (Pie) and beyond, traced is shipped as part of the platform.
* On older versions of Android, traced can be built from sources using the
the [standalone NDK-based workflow](/docs/contributing/build-instructions.md)
and sideloaded via adb shell.
* On Linux and MacOS `traced` must be built and run separately. See the
[Linux quickstart](/docs/quickstart/linux-tracing.md) for instructions.
_System mode is not yet supported on Windows, due to the lack of an IPC
implementation_.
## {#recording} Recording traces through the API
_Tracing through the API is currently only supported with the in-process mode.
When using system mode, use the `perfetto` cmdline client (see quickstart
guides)._
First initialize a [TraceConfig](/docs/reference/trace-config-proto.autogen)
message which specifies what type of data to record.
If your app includes [track events](track-events.md) (i.e, `TRACE_EVENT`), you
typically want to choose the categories which are enabled for tracing.
By default, all non-debug categories are enabled, but you can enable a specific
one like this:
```C++
perfetto::protos::gen::TrackEventConfig track_event_cfg;
track_event_cfg.add_disabled_categories("*");
track_event_cfg.add_enabled_categories("rendering");
```
Next, build the main trace config together with the track event part:
```C++
perfetto::TraceConfig cfg;
cfg.add_buffers()->set_size_kb(1024); // Record up to 1 MiB.
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("track_event");
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
```
If your app includes a custom data source, you can also enable it here:
```C++
ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("my_data_source");
```
After building the trace config, you can begin tracing:
```C++
std::unique_ptr<perfetto::TracingSession> tracing_session(
perfetto::Tracing::NewTrace());
tracing_session->Setup(cfg);
tracing_session->StartBlocking();
```
TIP: API methods with `Blocking` in their name will suspend the calling thread
until the respective operation is complete. There are also asynchronous
variants that don't have this limitation.
Now that tracing is active, instruct your app to perform the operation you
want to record. After that, stop tracing and collect the
protobuf-formatted trace data:
```C++
tracing_session->StopBlocking();
std::vector<char> trace_data(tracing_session->ReadTraceBlocking());
// Write the trace into a file.
std::ofstream output;
output.open("example.perfetto-trace", std::ios::out | std::ios::binary);
output.write(&trace_data[0], trace_data.size());
output.close();
```
To save memory with longer traces, you can also tell Perfetto to write
directly into a file by passing a file descriptor into Setup(), remembering
to close the file after tracing is done:
```C++
int fd = open("example.perfetto-trace", O_RDWR | O_CREAT | O_TRUNC, 0600);
tracing_session->Setup(cfg, fd);
tracing_session->StartBlocking();
// ...
tracing_session->StopBlocking();
close(fd);
```
The resulting trace file can be directly opened in the [Perfetto
UI](https://ui.perfetto.dev) or the [Trace Processor](/docs/analysis/trace-processor.md).
[ipc]: /docs/design-docs/api-and-abi.md#socket-protocol
[atrace-ds]: /docs/data-sources/atrace.md
[atrace-ndk]: https://developer.android.com/ndk/reference/group/tracing
[atrace-sdk]: https://developer.android.com/reference/android/os/Trace
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,63 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include <cxxabi.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <systemd/sd-id128.h>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <string>
#include "src/utils/helper.h"
// Macro to check ROCMTools calls status
#define CHECK_ROCMTOOLS(call) \
do { \
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
} while (false)
namespace {
[[maybe_unused]] uint32_t GetPid() {
static uint32_t pid = syscall(__NR_getpid);
return pid;
}
[[maybe_unused]] uint64_t GetMachineID() {
char hostname[1023] = "\0";
gethostname(hostname, 1023);
sd_id128_t ret;
char machine_id[SD_ID128_STRING_MAX];
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
assert(status == 0 && "Error: Couldn't get machine id!");
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
return std::rand();
}
} // namespace
+247
مشاهده پرونده
@@ -0,0 +1,247 @@
#!/bin/bash
ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]}));
ROCM_DIR=$(dirname -- ${ROCPROFV2_DIR})
RUN_FROM_BUILD=0
if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
RUN_FROM_BUILD=1
elif [[ $ROCPROFV2_DIR == *"/rocmtools"* ]]; then
RUN_FROM_BUILD=1
ROCM_DIR=$ROCPROFV2_DIR
fi
usage() {
echo -e "ROCMTools Run Script Usage:"
echo -e "-h | --help For showing this message"
echo -e "--list-counters For showing all available counters for the current GPUs"
if [ $RUN_FROM_BUILD == 1 ]; then
echo -e "-b | --build For compiling"
echo -e "-cb | --clean-build For full clean build"
echo -e "-t | --test For Running the tests"
echo -e "-ct | --clean-build-test For Running the tests after a clean build"
echo -e "-mt | --mem-test For Running the Memory Leak tests. This run requires building using -acb | --asan-clean-build option"
echo -e "-acb | --asan-clean-build For compiling with ASAN library attached"
echo -e "--install For installing rocmtools without clean build in the default installation folder (review build.sh to know more about the default paths)"
echo -e "--clean-install For installing rocmtools with new clean build in the default installation folder (review build.sh to know more about the default paths)"
fi
echo -e "--hip-api For Collecting HIP API Traces"
echo -e "--hip-activity For Collecting HSA API Activities Traces"
echo -e "--hsa-api For Collecting HIP API Traces"
echo -e "--hsa-activity For Collecting HSA API Activities Traces"
echo -e "--roctx-trace For Collecting ROCTx Traces"
echo -e "--kernel-trace For Collecting Kernel dispatch Traces"
echo -e "--sys-trace For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces"
echo -e "--plugin PLUGIN_NAME For enabling a plugin (file/perfetto)"
echo -e "-i | --input For adding counters file path (every line in the text file represents a counter)"
echo -e "-o | --output-file For the output file name"
echo -e "-d | --output-directory For adding output path where the output files will be saved"
echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed"
exit 1
}
if [ -z "$1" ] ; then
usage
exit 1
fi
while [ 1 ] ; do
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
usage
exit 1
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=no ./build.sh
exit 1
fi
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
ASAN=yes TO_CLEAN=yes ./build.sh
exit 1
fi
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=yes ./build.sh
exit 1
fi
elif [[ "$1" = "-t" || "$1" = "--test" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
TO_CLEAN=no $ROCM_DIR/build.sh
pushd build
./run_tests.sh
exit 1
fi
elif [[ "$1" = "-mt" || "$1" = "--mem-test" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
ASAN=yes TO_CLEAN=yes ./build.sh
./tests/memorytests/run_asan_tests.sh $ROCM_DIR/build/tests/featuretests/profiler/gtests/apps/hip_vectoradd $ROCM_DIR/build/memleaks.log
exit 1
fi
elif [[ "$1" = "-ct" || "$1" = "--clean-build-test" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=yes $ROCM_DIR/build.sh
pushd build
./run_tests.sh
exit 1
fi
elif [[ "$1" = "--install" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=no $ROCM_DIR/build.sh
pushd build
make install
exit 1
fi
elif [[ "$1" = "--clean-install" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=yes $ROCM_DIR/build.sh
pushd build
make install
exit 1
fi
elif [[ "$1" = "--list-counters" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
eval $ROCM_DIR/build/src/tools/ctrl
else
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocmtools/counters/derived_counters.xml
export LD_LIBRARY_PATH=$ROCPROFV2_DIR/../lib:$LD_LIBRARY_PATH
export LD_PRELOAD=$ROCPROFV2_DIR/../lib/librocprofiler_tool.so
eval $ROCPROFV2_DIR/../libexec/rocmtools/ctrl
fi
exit 1
elif [[ "$1" = "-i" || "$1" = "--input" ]] ; then
if [ $2 ] && [ -n $2 ] && [ -r $2 ] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
else
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocmtools/counters/derived_counters.xml
fi
export COUNTERS_PATH=$2
else
echo -e "Error: \"$2\" doesn't exist!"
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-o" || "$1" = "--output-file-name" ]] ; then
if [ $2 ] ; then
export OUT_FILE_NAME=$2
else
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-d" || "$1" = "--output-directory" ]] ; then
if [ $2 ] ; then
mkdir -p $2
export OUTPUT_PATH=$2
OUTPUT_PATH_INTERNAL=$2
else
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-fi" || "$1" = "--flush-interval" ]] ; then
if [ $2 ] && [ $2 -gt 0 ] ; then
export ROCPROFILER_FLUSH_INTERVAL=$2
else
echo -e "Wrong input \"$2\" for flush interval, it needs to be integer greater than zero!"
usage
exit 1
fi
shift
shift
elif [ "$1" = "--hip-api" ] ; then
export ROCPROFILER_HIP_API_TRACE=1
shift
elif [ "$1" = "--hip-activity" ] ; then
export ROCPROFILER_HIP_API_TRACE=1
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
shift
elif [ "$1" = "--hsa-api" ] ; then
export ROCPROFILER_HSA_API_TRACE=1
shift
elif [ "$1" = "--hsa-activity" ] ; then
export ROCPROFILER_HSA_API_TRACE=1
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
shift
elif [ "$1" = "--roctx-trace" ] ; then
export ROCPROFILER_ROCTX_TRACE=1
shift
elif [ "$1" = "--kernel-trace" ] ; then
export ROCPROFILER_KERNEL_TRACE=1
shift
elif [ "$1" = "--sys-trace" ] ; then
export ROCPROFILER_HIP_API_TRACE=1
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
export ROCPROFILER_HSA_API_TRACE=1
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
export ROCPROFILER_ROCTX_TRACE=1
export ROCPROFILER_KERNEL_TRACE=1
shift
elif [ "$1" = "--amd-sys" ] ; then
export ROCPROFILER_ENABLE_AMDSYS=$2
shift
shift
elif [ "$1" = "--plugin" ] ; then
if [ -n $2 ] ; then
PLUGIN=$2
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so
else
export ROCPROFILER_PLUGIN_LIB=rocmtools/lib${PLUGIN}_plugin.so
fi
else
echo -e "Wrong input \"$2\" for plugin!"
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
echo -e "Wrong option \"$1\", Please use the following options:\n"
usage
exit 1
else
break
fi
done
PMC_LINES=()
if [ -n "$COUNTERS_PATH" ]; then
input=$COUNTERS_PATH
while IFS= read -r line || [[ -n "$line" ]]; do
PMC_LINES+=( "$line" )
done < $input
fi
if [ -n "$PMC_LINES" ]; then
COUNTER=1
for i in ${!PMC_LINES[@]}; do
export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}"
if [ -n "$OUTPUT_PATH" ]; then
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
echo -e "\nThe output path for the following counters: $FINAL_PATH"
mkdir -p $FINAL_PATH
echo $ROCPROFILER_COUNTERS > $FINAL_PATH/pmc.txt
export OUTPUT_PATH=$FINAL_PATH
let COUNTER=COUNTER+1
fi
if [ $RUN_FROM_BUILD == 1 ]; then
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
else
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
fi
done
else
if [ $RUN_FROM_BUILD == 1 ]; then
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
else
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
fi
fi
@@ -20,43 +20,14 @@
# THE SOFTWARE.
################################################################################
## Build is not supported on Windows plaform
if ( WIN32 )
message ( FATAL_ERROR "Windows build is not supported." )
endif ()
## Compiler Preprocessor definitions.
add_definitions ( -D__linux__ )
add_definitions ( -DUNIX_OS )
add_definitions ( -DLINUX )
add_definitions ( -D__AMD64__ )
add_definitions ( -D__x86_64__ )
add_definitions ( -DLITTLEENDIAN_CPU=1 )
add_definitions ( -DHSA_LARGE_MODEL= )
add_definitions ( -DHSA_DEPRECATED= )
## Linux Compiler options
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=unused-result" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" )
add_link_options ("-Bdynamic -z,neexecstack")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
add_definitions ( -DNEW_TRACE_API=1 )
## CLANG options
if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" )
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" )
if("$ENV{CXX}" STREQUAL "/usr/bin/clang++")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000")
endif()
## Enable debug trace
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,92 @@
# rocmtools
## Getting started
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
## Add your files
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
```
cd existing_repo
git remote add origin http://gitlab1.amd.com/vinodtipparaju/rocmtools.git
git branch -M main
git push -uf origin main
```
## Integrate with your tools
- [ ] [Set up project integrations](http://gitlab1.amd.com/vinodtipparaju/rocmtools/-/settings/integrations)
## Collaborate with your team
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
## Test and Deploy
Use the built-in continuous integration in GitLab.
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
***
# Editing this README
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
## Suggestions for a good README
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
## Name
Choose a self-explaining name for your project.
## Description
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
## Badges
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
## Visuals
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
## Installation
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
## Usage
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
## Support
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
## Roadmap
If you have ideas for releases in the future, it is a good idea to list them in the README.
## Contributing
State if you are open to contributions and what your requirements are for accepting them.
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
## Authors and acknowledgment
Show your appreciation to those who have contributed to the project.
## License
For open source projects, say how it is licensed.
## Project status
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
فایل باینری نشان داده نشده است.
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,135 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/** \section rocprofiler_plugin_api ROCMTools Plugin API
*
* The ROCMTools Plugin API is used by the ROCMTools Tool to output all
* profiling information. Different implementations of the ROCMTools Plugin
* API can be developed that output the data in different formats. The
* ROCMTools Tool can be configured to load a specific library that supports
* the user desired format.
*
* The API is not thread safe. It is the responsibility of the ROCMTools Tool
* to ensure the operations are synchronized and not called concurrently. There
* is no requirement for the ROCMTools Tool to report trace data in any
* specific order. If the format supported by plugin requires specific
* ordering, it is the responsibility of the plugin implementation to perform
* any necessary sorting.
*/
/**
* \file
* ROCMTools Tool Plugin API interface.
*/
#ifndef ROCPROFILER_PLUGIN_H_
#define ROCPROFILER_PLUGIN_H_
#include <stdint.h>
#include "rocprofiler.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** \defgroup rocprofiler_plugins ROCMTools Plugin API Specification
* @{
*/
/** \defgroup initialization_group Initialization and Finalization
* \ingroup rocprofiler_plugins
*
* The ROCMTools Plugin API must be initialized before using any of the
* operations to report trace data, and finalized after the last trace data has
* been reported.
*
* @{
*/
/**
* Initialize plugin.
* Must be called before any other operation.
*
* @param[in] rocprofiler_major_version The major version of the ROCMTools API
* being used by the ROCMTools Tool. An error is reported if this does not
* match the major version of the ROCMTools API used to build the plugin
* library. This ensures compatibility of the trace data format.
* @param[in] rocprofiler_minor_version The minor version of the ROCMTools API
* being used by the ROCMTools Tool. An error is reported if the
* \p ROCMTools_major_version matches and this is greater than the minor
* version of the ROCMTools API used to build the plugin library. This ensures
* compatibility of the trace data format.
* @return Returns 0 on success and -1 on error.
*/
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version);
/**
* Finalize plugin.
* This must be called after ::rocprofiler_plugin_initialize and after all
* profiling data has been reported by
* ::rocprofiler_plugin_write_kernel_records
*/
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize();
/** @} */
/** \defgroup profiling_record_write_functions Profiling data reporting
* \ingroup rocprofiler_plugins
* Operations to output profiling data.
* @{
*/
// TODO(aelwazir): Recheck wording of the description
/**
* Report Buffer Records.
*
* @param[in] begin Pointer to the first record.
* @param[in] end Pointer to one past the last record.
* @param[in] session_id Session ID
* @param[in] buffer_id Buffer ID
* @return Returns 0 on success and -1 on error.
*/
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id);
/**
* Report Synchronous Record.
*
* @param[in] record Pointer to the Synchronous Tracer record.
* @param[in] session_id Session ID
* @return Returns 0 on success and -1 on error.
*/
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id);
/** @} */
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* ROCPROFILER_PLUGIN_H_ */
@@ -0,0 +1,25 @@
################################################################################
## Copyright (c) 2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
add_subdirectory(file)
add_subdirectory(perfetto)
add_subdirectory(ctf)
@@ -0,0 +1 @@
README.html
@@ -0,0 +1,161 @@
################################################################################
## Copyright (c) 2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
# Plugin shared object.
add_library(ctf_plugin SHARED
ctf.cpp
plugin.cpp
barectf.c "${CMAKE_CURRENT_BINARY_DIR}/barectf.h"
${PROJECT_SOURCE_DIR}/src/utils/helper.cpp
hsa_begin.cpp.i hsa_end.cpp.i
hip_begin.cpp.i hip_end.cpp.i)
set_target_properties(ctf_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}")
set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf")
target_compile_definitions(ctf_plugin PRIVATE
HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_HCC__=1
CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata")
target_include_directories(ctf_plugin PRIVATE
"${PROJECT_SOURCE_DIR}/inc"
"${PROJECT_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/src/api"
"${CMAKE_CURRENT_BINARY_DIR}")
target_link_options(ctf_plugin PRIVATE
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
-Wl,--no-undefined)
target_link_libraries(ctf_plugin PRIVATE
${ROCPROFILER_TARGET}
hsa-runtime64::hsa-runtime64
systemd
stdc++fs
dl)
install(TARGETS ctf_plugin LIBRARY
DESTINATION "${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}"
COMPONENT plugins)
# `gen_api_files.py` and `gen_env_yaml.py` require Python 3,
# CppHeaderParser, PyYAML, and barectf.
find_package(Python3 COMPONENTS Interpreter REQUIRED)
message("Python: ${Python3_EXECUTABLE})")
execute_process(COMMAND Python3::Interpreter -c "print('hello')")
function(check_py3_pkg pkg_name)
execute_process(COMMAND "${Python3_EXECUTABLE}" -c "import ${pkg_name}"
RESULT_VARIABLE PY3_IMPORT_RES
OUTPUT_QUIET)
if(NOT (${PY3_IMPORT_RES} EQUAL 0))
message(FATAL_ERROR "Cannot find Python 3 package `${pkg_name}`")
endif()
message(STATUS "Found Python 3 package `${pkg_name}`")
endfunction()
check_py3_pkg(CppHeaderParser)
check_py3_pkg(yaml)
find_program(BARECTF_RES barectf REQUIRED)
# Generate barectf YAML and C++ files for HSA API.
get_property(HSA_RUNTIME_INCLUDE_DIRS
TARGET hsa-runtime64::hsa-runtime64
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HSA_H hsa.h
PATHS ${HSA_RUNTIME_INCLUDE_DIRS}
PATH_SUFFIXES hsa
NO_DEFAULT_PATH
REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH "${HSA_H}" DIRECTORY)
add_custom_command(
OUTPUT hsa_erts.yaml hsa_begin.cpp.i hsa_end.cpp.i
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
-o hsa_ext_amd.h.i
COMMAND ${CMAKE_COMMAND} -E cat hsa.h.i
hsa_ext_amd.h.i
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
> hsa_input.h
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
hsa hsa_input.h
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i hsa_input.h
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
"${HSA_RUNTIME_INC_PATH}/hsa.h"
"${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
COMMENT "Generating HSA API files for the `ctf` plugin...")
# Generate barectf YAML and C++ files for HIP API.
get_property(HIP_INCLUDE_DIRS TARGET hip::amdhip64
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
PATHS ${HIP_INCLUDE_DIRS}
PATH_SUFFIXES hip
NO_DEFAULT_PATH
REQUIRED)
find_file(HIP_PROF_STR_H hip_prof_str.h
PATHS ${HIP_INCLUDE_DIRS}
PATH_SUFFIXES hip hip/amd_detail
NO_DEFAULT_PATH
REQUIRED)
list(TRANSFORM HIP_INCLUDE_DIRS PREPEND -I)
add_custom_command(
OUTPUT hip_erts.yaml hip_begin.cpp.i hip_end.cpp.i
COMMAND ${CMAKE_C_COMPILER} ${HIP_INCLUDE_DIRS}
-E "${HIP_RUNTIME_API_H}"
-D__HIP_PLATFORM_HCC__=1
-D__HIP_ROCclr__=1
-o hip_runtime_api.h.i
COMMAND cat hip_runtime_api.h.i "${HIP_PROF_STR_H}" > hip_input.h
BYPRODUCTS hip_runtime_api.h.i hip_input.h
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
hip hip_input.h
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
"${HIP_RUNTIME_API_H}"
"${HIP_PROF_STR_H}"
COMMENT "Generating HIP API files for the `ctf` plugin...")
# Generate `env.yaml` (trace environment for barectf).
add_custom_command(
OUTPUT env.yaml
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
${PROJECT_VERSION}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
COMMENT "Generating `env.yaml`...")
# Generate raw CTF tracer with barectf.
add_custom_command(
OUTPUT barectf.c barectf.h barectf-bitfield.h metadata
COMMAND "${BARECTF_RES}" gen "-I${CMAKE_CURRENT_BINARY_DIR}"
"-I${CMAKE_CURRENT_SOURCE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
DEPENDS hsa_erts.yaml
hip_erts.yaml
env.yaml
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
"${CMAKE_CURRENT_SOURCE_DIR}/dst_base.yaml"
COMMENT "Generating raw CTF tracer with barectf...")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/metadata"
DESTINATION "${METADATA_STREAM_FILE_DIR}" COMPONENT plugins)
@@ -0,0 +1,260 @@
= CTF plugin for ROCMTools
13 December 2022
Philippe Proulx
This plugin writes the received ROCMTools tracer and profiler records to
a https://diamon.org/ctf/[CTF] trace.
== Build requirements
* Python ≥ 3.10
* barectf ≥ 3.1.1 (`pip3 install barectf`)
* PyYAML (`apt-get install python3-yaml`)
* CppHeaderParser (`pip3 install CppHeaderParser`)
== Usage
Once installed, you may load this plugin with `rocprofv2` using
the `--plugin ctf` command-line arguments.
This plugin honours the `OUTPUT_PATH` environment variable which
`rocprofv2` sets with the `-d` option. If you pass `-d my-dir` to
`rocprofv2`, then the plugin will write the CTF trace to the
`my-dir/trace` directory.
IMPORTANT: This plugin performs important cleanup tasks at finalization
time, so the resulting CTF trace could be corrupted if the plugin is
never finalized.
Once the plugin is finalized, open the resulting trace directory with
either https://babeltrace.org/[Babeltrace{nbsp}2] or
https://www.eclipse.org/tracecompass/[Trace Compass] to view or analyze
it.
=== Event record types
This plugin writes to different CTF data streams having different types.
On the file system, the prefix of a data stream file name indicates the
data stream type, that is:
`roctx_`::
rocTX messages.
+
Each CTF event record is named `roctx` and corresponds to a rocTX
tracer record.
+
The fields are:
+
--
[horizontal]
`thread_id`::
Thread ID.
`id`::
rocTX ID.
`msg`::
rocTX message.
--
`hsa_api_`::
HSA API beginning and end function calls.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`thread_id`::
Thread ID.
`queue_id`::
Queue ID.
`agent_id`::
Agent ID.
`correlation_id`::
Correlation ID.
--
+
For each ROCMTools HSA API tracer record for the HSA function named
`__name__`, this plugin writes two event records:
+
`__name___begin`:::
Beginning of the function call.
+
The event record contains fields which correspond to most of the
parameters of the HSA function.
`__name___end`:::
End of the function call.
`hip_api_`::
HIP API beginning and end function calls.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`thread_id`::
Thread ID.
`queue_id`::
Queue ID.
`agent_id`::
Agent ID.
`correlation_id`::
Correlation ID.
`kernel_name`::
Kernel name (empty string if not available).
--
+
For each ROCMTools HIP API tracer record for the HIP function named
`__name__`, this plugin writes two event records:
+
`__name__Begin`:::
Beginning of the function call.
+
The event record contains fields which correspond to most of the
parameters of the HIP function.
`__name__End`:::
End of the function call.
`api_ops_`::
HSA/HIP API beginning and end operations.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`thread_id`::
Thread ID.
`queue_id`::
Queue ID.
`agent_id`::
Agent ID.
`correlation_id`::
Correlation ID.
--
+
The possible CTF event records are:
+
`hsa_op_begin`:::
HSA API operation beginning.
`hsa_op_end`:::
HSA API operation end.
`hip_op_begin`:::
HIP API operation beginning.
+
Such an event record also has the field `kernel_name` which is the
kernel name (empty string if not available).
`hip_op_end`:::
HIP API operation end.
`profiler_`::
Profiler records.
+
All CTF event records have the following common fields:
+
--
[horizontal]
`dispatch`::
Dispatch ID.
`gpu_id`::
GPU ID.
`queue_id`::
Queue ID.
`queue_index`::
Queue index.
`process_id`::
Process ID.
`thread_id`::
Thread ID.
`kernel_id`::
Kernel ID.
`kernel_name`::
Kernel name (empty string if not available).
`counter_names`::
Array of counter names, each one having a corresponding integral
value in the `counter_values` field.
`counter_values`::
Array of integers, each one being the value of a counter of which
the name is a corresponding string in the `counter_names` field.
--
+
The possible CTF event records are:
+
`profiler_record`:::
Profiler record.
`profiler_record_with_kernel_properties`:::
Profiler record with kernel properties.
+
Such an event record also has the following fields:
+
--
`grid_size`::
Grid size.
`workgroup_size`::
Workgroup size.
`lds_size`::
Local memory size.
`scratch_size`::
Scratch size.
`arch_vgpr_count`::
Architecture vector general purpose register count.
`accum_vgpr_count`::
Accum. vector general purpose register count
`sgpr_count`::
Scalar general purpose register count.
`wave_size`::
Wavefront size.
`signal_handle`::
Signal handle.
--
`hsa_handles_`::
HSA handle type mappings.
+
Each CTF event record is named `hsa_handle_type` and maps an HSA handle
to a processor unit type (CPU or GPU).
+
The clock value of those event records is irrelevant (always{nbsp}0).
+
The fields are:
+
--
[horizontal]
`handle`::
HSA handle.
`type`::
Processor unit type (`CPU` or `GPU` enumeration label).
--
@@ -0,0 +1,67 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_EVENT_RECORD_H
#define PLUGIN_CTF_BARECTF_EVENT_RECORD_H
#include <memory>
#include <cstdint>
struct barectf_default_ctx;
namespace rocm_ctf {
// Abstract base class of any barectf event record.
//
// A concrete event record class must implement Write() which must call
// a corresponding barectf tracing function.
//
// `CtxT` is the specific type of the barectf context which Write()
// receives.
template <typename CtxT> class BarectfEventRecord {
protected:
// Builds a barectf event record having the clock value `clock_val`.
explicit BarectfEventRecord(const std::uint64_t clock_val) noexcept : clock_val_{clock_val} {}
public:
// Shared pointer to const barectf event record.
using SP = std::shared_ptr<const BarectfEventRecord>;
virtual ~BarectfEventRecord() = default;
// Disabled copy operations to make this class simpler.
BarectfEventRecord(const BarectfEventRecord&) = delete;
BarectfEventRecord& operator=(const BarectfEventRecord&) = delete;
// Clock value of this event record.
std::uint64_t GetClockVal() const noexcept { return clock_val_; }
// Calls a corresponding barectf tracing function using the barectf
// context `barectf_ctx`.
virtual void Write(CtxT& barectf_ctx) const = 0;
private:
// Clock value.
std::uint64_t clock_val_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_EVENT_RECORD_H
@@ -0,0 +1,192 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_PLATFORM_H
#define PLUGIN_CTF_BARECTF_PLATFORM_H
#include <cstdlib>
#include <cstdint>
#include <fstream>
#include <vector>
#include <functional>
#include <experimental/filesystem>
#include "barectf.h"
namespace rocm_ctf {
template <typename> class BarectfWriter;
// A barectf platform for any barectf writer.
//
// The user doesn't deal directly with such an object: it's closely
// coupled with a barectf writer.
//
// Each platform takes care of a single CTF data stream file.
//
// After building such a platform, get the raw barectf context with
// GetCtx() to call tracing functions. The platform must still exist
// when calling a tracing function.
//
// Such a platform opens the data stream file on construction and closes
// it on destruction.
//
// `DescrT` is the specific barectf platform descriptor. It must be a
// structure having:
//
// `Ctx`:
// Specific barectf context type.
//
// `static void OpenPacket(Ctx&)`:
// Packet opening function.
//
// `static void ClosePacket(Ctx&)`:
// Packet closing function.
template <typename DescrT> class BarectfPlatform final {
friend class BarectfWriter<DescrT>;
private:
// Builds a barectf platform.
//
// The platform writes CTF packets of size `packet_size` bytes to the
// CTF data stream file `data_stream_file_path`.
//
// For each event record to write, the platform reads `clock_val` to
// know the current timestamp.
explicit BarectfPlatform(const std::size_t packet_size,
const std::experimental::filesystem::path& data_stream_file_path,
const std::uint64_t& clock_val)
: clock_val_{&clock_val}, buffer_(packet_size) {
// Initialize barectf callbacks.
barectf_platform_callbacks callbacks;
callbacks.default_clock_get_value = GetClockCb;
callbacks.is_backend_full = IsBackendFullCb;
callbacks.open_packet = OpenPacketCb;
callbacks.close_packet = ClosePacketCb;
// Configure exceptions so that stream operations throw instead of
// just setting flags on error.
output_.exceptions(std::ofstream::failbit | std::ofstream::badbit);
// Open CTF data stream output file in binary mode.
output_.open(data_stream_file_path, std::ios_base::out | std::ios_base::binary);
// Initialize the raw barectf context.
barectf_init(&ctx_, buffer_.data(), buffer_.size(), callbacks, this);
// Open the initial packet.
OpenPacketCb();
}
public:
// Disabled copy operations to make this class simpler.
BarectfPlatform(const BarectfPlatform&) = delete;
BarectfPlatform& operator=(const BarectfPlatform&) = delete;
// Closes/writes any last CTF packet and closes the data stream file.
~BarectfPlatform() {
if (barectf_packet_is_open(&ctx_) && !barectf_packet_is_empty(&ctx_)) {
// Close and write last CTF packet (not empty).
ClosePacketCb();
}
// Close data stream output file.
output_.close();
}
// Returns the raw barectf context of this platform.
const typename DescrT::Ctx& GetCtx() const noexcept { return ctx_; }
typename DescrT::Ctx& GetCtx() noexcept { return ctx_; }
private:
static BarectfPlatform& AsPlatform(void* const data) noexcept {
return *static_cast<BarectfPlatform*>(data);
}
// Four callbacks for barectf.
//
// Those four functions receive an instance of this class as `data`.
static std::uint64_t GetClockCb(void* const data) noexcept {
// Forward to instance method.
return AsPlatform(data).GetClockCb();
}
static int IsBackendFullCb(void* const data) noexcept {
// Forward to instance method.
return AsPlatform(data).IsBackendFullCb();
}
static void OpenPacketCb(void* const data) {
// Forward to instance method.
AsPlatform(data).OpenPacketCb();
}
static void ClosePacketCb(void* const data) {
// Forward to instance method.
AsPlatform(data).ClosePacketCb();
}
// Instance version of the "get clock value" callback.
std::uint64_t GetClockCb() noexcept { return *clock_val_; }
// Instance version of the "is the back end full?" callback.
int IsBackendFullCb() noexcept {
// Never full.
return 0;
}
// Instance version of the "open packet" callback.
void OpenPacketCb() {
// Forward to user (descriptor) function.
DescrT::OpenPacket(ctx_);
}
// Instance version of the "close packet" callback.
void ClosePacketCb() {
// Forward to user (descriptor) function to finalize the packet.
DescrT::ClosePacket(ctx_);
// Write to the data stream file.
WriteCurrentPacket();
}
// Writes the current CTF packet (`buffer_`) to the data stream file.
void WriteCurrentPacket() {
output_.write(reinterpret_cast<const char*>(buffer_.data()), buffer_.size());
}
// Clock value pointer.
const std::uint64_t* clock_val_;
// CTF data stream output file stream.
std::ofstream output_;
// Raw barectf context.
typename DescrT::Ctx ctx_;
// CTF packet buffer.
std::vector<std::uint8_t> buffer_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_PLATFORM_H
@@ -0,0 +1,124 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_TRACER_H
#define PLUGIN_CTF_BARECTF_TRACER_H
#include <cstdlib>
#include <memory>
#include <vector>
#include <string>
#include <experimental/filesystem>
#include "barectf_event_record.h"
#include "barectf_writer.h"
namespace rocm_ctf {
// A barectf tracer offers the AddEventRecord() method to add an event
// record which it will ultimately write to some CTF data stream file
// within some specified CTF trace directory.
//
// One important feature of such a tracer is that you don't need to add
// event records in order of time. A barectf tracer manages one or more
// barectf writers, each one managing a single barectf platform/context
// (CTF data stream file).
//
// All the CTF data stream files which a barectf tracer indirectly
// manages share a common specified prefix. You must not use the same
// prefix for two barectf tracers writing to the same CTF trace
// directory.
//
// `PlatformDescrT` is the specific barectf platform descriptor (see the
// documentation of the `BarectfPlatform` class template).
template <typename PlatformDescrT> class BarectfTracer final {
public:
// Specific barectf event record type.
using EventRecord = typename BarectfWriter<PlatformDescrT>::EventRecord;
// Builds a barectf tracer to write CTF packets of size `packet_size`
// bytes to CTF data stream files having the prefix
// `data_stream_file_name_prefix` within the CTF trace directory
// `trace_dir`.
//
// The internal barectf writers manage event record queues having a
// maximum size of `max_writer_queue_size`. Increasing
// `max_writer_queue_size` increases the memory footprint of the
// tracer, but may reduce the number of required CTF data stream files
// to ensure time-ordered event records.
explicit BarectfTracer(const std::size_t packet_size,
std::experimental::filesystem::path trace_dir,
const char* const data_stream_file_name_prefix,
const std::size_t max_writer_queue_size = 200)
: packet_size_{packet_size},
trace_dir_{std::move(trace_dir)},
data_stream_file_name_prefix_{data_stream_file_name_prefix},
max_writer_queue_size_{max_writer_queue_size} {}
// Disabled copy operations to make this class simpler.
BarectfTracer(const BarectfTracer&) = delete;
BarectfTracer& operator=(const BarectfTracer&) = delete;
// Adds the event record `event_record` to this tracer.
//
// The clock value of `event_record` may be less than the clock value
// of previously added event records.
void AddEventRecord(typename EventRecord::SP event_record) {
// Try to find a barectf writer to accept `event_record`.
for (auto& writer : writers_) {
if (writer->MayAddEventRecord(*event_record)) {
// Found: add the event record to this writer and return.
writer->AddEventRecord(std::move(event_record));
return;
}
}
// No barectf writer found: create a new one.
std::ostringstream ss;
ss << data_stream_file_name_prefix_ << writers_.size();
writers_.emplace_back(new BarectfWriter<PlatformDescrT>{packet_size_, trace_dir_ / ss.str(),
max_writer_queue_size_});
// Add the event record to this new barectf writer.
assert(writers_.back()->MayAddEventRecord(*event_record));
writers_.back()->AddEventRecord(std::move(event_record));
}
private:
// CTF packet size.
std::size_t packet_size_;
// CTF trace directory.
std::experimental::filesystem::path trace_dir_;
// CTF data stream file name prefix.
std::string data_stream_file_name_prefix_;
// Maximum event record queue size of a barectf writer.
std::size_t max_writer_queue_size_;
// barectf writers.
std::vector<std::unique_ptr<BarectfWriter<PlatformDescrT>>> writers_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_TRACER_H
@@ -0,0 +1,178 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_BARECTF_WRITER_H
#define PLUGIN_CTF_BARECTF_WRITER_H
#include <cassert>
#include <cstdlib>
#include <cstdint>
#include <cassert>
#include <queue>
#include <utility>
#include <experimental/filesystem>
#include "barectf_platform.h"
#include "barectf_event_record.h"
namespace rocm_ctf {
template <typename> class BarectfTracer;
// A barectf writer manages a queue of event records, writing them
// through barectf when needed.
//
// Such an object makes it possible to add some event record with a
// clock value V and then some other event record of which the clock
// value is less than V. The barectf writer ensures that actual barectf
// tracing functions are called chronologically, a requirement of CTF.
//
// A barectf writer keeps event records in memory until its queue is
// full (you provide the maximum queue size at construction time), in
// which case it writes the oldest event record to some current CTF
// packet through a barectf tracing function.
//
// Call MayAddEventRecord() to check whether or not you may add an event
// record to the barectf writer, and then AddEventRecord() if you may.
//
// A barectf writer writes all its remaining event records on
// destruction.
//
// `PlatformDescrT` is the specific barectf platform descriptor (see the
// documentation of the `BarectfPlatform` class template).
template <typename PlatformDescrT> class BarectfWriter final {
friend class BarectfTracer<PlatformDescrT>;
public:
// Specific barectf event record type.
using EventRecord = BarectfEventRecord<typename PlatformDescrT::Ctx>;
private:
// Builds a barectf writer to write CTF packets of size `packet_size`
// bytes to the CTF data stream file `data_stream_file_path`.
//
// The built barectf writer manages an event record queue having a
// maximum size of `max_queue_size`.
explicit BarectfWriter(const std::size_t packet_size,
const std::experimental::filesystem::path& data_stream_file_path,
const std::size_t max_queue_size)
: platform_{packet_size, data_stream_file_path, clock_val_},
max_queue_size_{max_queue_size} {}
public:
// Writes all its remaining event records.
~BarectfWriter() {
// Write all the remaining event records from the oldest to the
// newest.
while (!queue_.empty()) {
WriteOldestEventRecord();
}
}
// Disabled copy operations to make this class simpler.
BarectfWriter(const BarectfWriter&) = delete;
BarectfWriter& operator=(const BarectfWriter&) = delete;
// Whether or not you may add the event record `event_record` to this
// writer with AddEventRecord().
bool MayAddEventRecord(const EventRecord& event_record) const noexcept {
if (queue_.empty()) {
return true;
}
// One may only add an event record if its clock value is greater
// than or equal to the clock value of the most recently written
// event record.
return event_record.GetClockVal() >= clock_val_;
}
// Adds the event record `event_record` to this writer.
//
// `MayAddEventRecord(*event_record)` must return `true`.
void AddEventRecord(typename EventRecord::SP event_record) {
assert(MayAddEventRecord(*event_record) && "May add event record");
// Add event record to queue.
queue_.emplace(std::move(event_record));
if (queue_.size() > max_queue_size_) {
// Queue is too large: write the oldest event record now to
// satisfy the requirement.
WriteOldestEventRecord();
}
}
private:
// Comparison type for `queue_`.
struct EventRecordQueueCompare final {
bool operator()(const typename EventRecord::SP& left,
const typename EventRecord::SP& right) const noexcept {
// "Greater than" so that the top element of the queue is the
// oldest event record.
return left->GetClockVal() > right->GetClockVal();
}
};
// Oldest event record within `queue_`.
//
// `queue_` must not be empty.
const EventRecord& GetOldestEventRecord() const noexcept {
assert(!queue_.empty() && "Queue isn't empty");
return *queue_.top();
}
// Writes the oldest event record through a barectf tracing function
// and removes it from the event record queue.
void WriteOldestEventRecord() {
auto& oldest_event_record = GetOldestEventRecord();
// When calling a barectf tracing function, it calls the clock value
// accessor callback of the platform, which itself reads from
// `clock_val_`.
clock_val_ = oldest_event_record.GetClockVal();
// Forward to a barectf tracing function.
oldest_event_record.Write(platform_.GetCtx());
// Remove from queue.
queue_.pop();
}
// barectf platform (manages file I/O).
BarectfPlatform<PlatformDescrT> platform_;
// Current clock value for `platform_`.
//
// This is also the clock value of the most recently written event
// record, therefore that MayAddEventRecord() can rely on this.
std::uint64_t clock_val_ = 0;
// Maximum size of `queue_` below.
std::size_t max_queue_size_;
// Event record queue.
std::priority_queue<typename EventRecord::SP, std::vector<typename EventRecord::SP>,
EventRecordQueueCompare>
queue_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_BARECTF_WRITER_H
@@ -0,0 +1,165 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
%YAML 1.2
--- !<tag:barectf.org,2020/3/config>
trace:
$include:
# Environment (generated file).
- env.yaml
type:
$include:
- stdint.yaml
- stdmisc.yaml
native-byte-order: little-endian
clock-types:
default:
origin-is-unix-epoch: true
$c-type: uint64_t
data-stream-types:
hsa_api:
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
- _queue_id: uint32
- _agent_id: uint32
- _correlation_id: uint64
$include:
# Base.
- dst_base.yaml
# HSA API event record types (generated file).
- hsa_erts.yaml
hip_api:
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
- _queue_id: uint32
- _agent_id: uint32
- _correlation_id: uint64
- _kernel_name: str
$include:
# Base.
- dst_base.yaml
# HIP API event record types (generated file).
- hip_erts.yaml
roctx:
$include:
# Base
- dst_base.yaml
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
event-record-types:
roctx:
payload-field-type:
class: struct
members:
- _id: sint64
- _msg: str
hsa_handles:
$include:
# Base.
- dst_base.yaml
event-record-types:
hsa_handle_type:
payload-field-type:
class: struct
members:
- _handle: uint64
- _type:
field-type:
class: uenum
size: 8
mappings:
CPU: [0]
GPU: [1]
api_ops:
$include:
# Base.
- dst_base.yaml
event-record-common-context-field-type:
class: struct
members:
- _thread_id: uint32
- _queue_id: uint32
- _agent_id: uint32
- _correlation_id: uint64
event-record-types:
hsa_op_begin:
payload-field-type:
class: struct
hsa_op_end:
payload-field-type:
class: struct
hip_op_begin:
payload-field-type:
class: struct
members:
- _kernel_name: str
hip_op_end:
payload-field-type:
class: struct
profiler:
$include:
# Base.
- dst_base.yaml
event-record-common-context-field-type:
class: struct
members:
- _dispatch: uint64
- _gpu_id: uint64
- _queue_id: uint64
- _queue_index: uint64
- _process_id: uint32
- _thread_id: uint32
- _kernel_id: uint64
- _kernel_name: str
- _counter_names:
field-type:
class: dynamic-array
element-field-type: str
- _counter_values:
field-type:
class: dynamic-array
element-field-type: uint64
event-record-types:
profiler_record:
payload-field-type:
class: struct
profiler_record_with_kernel_properties:
payload-field-type:
class: struct
members:
- _grid_size: uint64
- _workgroup_size: uint64
- _lds_size: uint64
- _scratch_size: uint64
- _arch_vgpr_count: uint64
- _accum_vgpr_count: uint64
- _sgpr_count: uint64
- _wave_size: uint64
- _signal_handle: uint64
@@ -0,0 +1,107 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cassert>
#include <stdexcept>
#include <iostream>
#include <experimental/filesystem>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "plugin.h"
namespace fs = std::experimental::filesystem;
namespace {
// Global plugin instance
rocm_ctf::Plugin* the_plugin = nullptr;
} // namespace
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(const uint32_t rocprofiler_major_version,
const uint32_t rocprofiler_minor_version) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR) {
return -1;
}
if (the_plugin) {
return -1;
}
const auto output_dir = getenv("OUTPUT_PATH");
if (!output_dir) {
std::cerr << "rocprofiler_plugin_initialize(): "
<< "`OUTPUT_PATH` environment variable isn't set" << std::endl;
return -1;
}
// Create the plugin instance.
try {
the_plugin = new rocm_ctf::Plugin{256 * 1024, fs::path{output_dir} / "trace",
CTF_PLUGIN_METADATA_FILE_PATH};
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_initialize(): " << exc.what() << std::endl;
return -1;
}
return 0;
}
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
delete the_plugin;
the_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
const rocprofiler_record_header_t* const begin, const rocprofiler_record_header_t* const end,
const rocprofiler_session_id_t session_id, const rocprofiler_buffer_id_t buffer_id) {
assert(the_plugin);
try {
the_plugin->HandleBufferRecords(begin, end, session_id, buffer_id);
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_write_buffer_records(): " << exc.what() << std::endl;
return -1;
}
return 0;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record,
const rocprofiler_session_id_t session_id) {
assert(the_plugin);
if (record.header.id.handle == 0) {
return 0;
}
try {
the_plugin->HandleTracerRecord(record, session_id);
} catch (const std::exception& exc) {
std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl;
return -1;
}
return 0;
}
@@ -0,0 +1,28 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
$default-clock-type-name: default
$features:
packet:
beginning-timestamp-field-type: false
discarded-event-records-counter-snapshot-field-type: false
end-timestamp-field-type: false
@@ -0,0 +1,645 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import os
import os.path
import sys
import re
import yaml
import CppHeaderParser
# Numeric field type (abstract).
class _NumericFt:
# Returns the C++ expression to cast the expression `expr` to the C
# type of this field type.
def cast(self, expr):
return f'static_cast<{self.c_type}>({expr})'
# Integer field type (abstract).
class _IntFt(_NumericFt):
def __init__(self, size, pref_disp_base='dec'):
self._size = size
self._pref_disp_base = pref_disp_base
# Size (bits).
@property
def size(self):
return self._size
# Preferred display base (`dec` or `hex`).
@property
def pref_disp_base(self):
return self._pref_disp_base
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
return {
'size': self._size,
'preferred-display-base': self._pref_disp_base,
}
# Signed integer field type.
class _SIntFt(_IntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'sint'
return ret
# Equivalent C type
@property
def c_type(self):
return f'std::int{self._size}_t'
# Unsigned integer field type.
class _UIntFt(_IntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'uint'
return ret
# Equivalent C type.
@property
def c_type(self):
return f'std::uint{self._size}_t'
# Pointer field type.
class _PointerFt(_UIntFt):
def __init__(self):
super().__init__(64, 'hex')
# Returns the C++ expression to cast the expression `expr` to the C
# type of this field type.
def cast(self, expr):
return f'static_cast<{self.c_type}>(reinterpret_cast<std::uintptr_t>({expr}))'
# Enumeration field type (abstract).
class _EnumFt(_IntFt):
def __init__(self, size, mappings):
super().__init__(size)
self._mappings = mappings.copy()
# Mappings (names to integers).
@property
def mappings(self):
return self._mappings
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
mappings = {}
for name, val in self._mappings.items():
mappings[name] = [val]
ret['mappings'] = mappings
return ret
# Unsigned enumeration field type.
class _UEnumFt(_EnumFt, _UIntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'uenum'
return ret
# Signed enumeration field type.
class _SEnumFt(_EnumFt, _UIntFt):
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
ret = super().barectf_yaml
ret['class'] = 'senum'
return ret
# Optional string field type.
class _OptStrFt:
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
return {
'class': 'str',
}
# String field type.
class _StrFt(_OptStrFt):
pass
# Floating-point number field type.
class _FloatFt(_NumericFt):
def __init__(self, size):
self._size = size
# Size (bits): 32 or 64.
@property
def size(self):
return self._size
# Equivalent barectf field type in YAML.
@property
def barectf_yaml(self):
return {
'class': 'real',
'size': self._size,
}
# Equivalent C type.
@property
def c_type(self):
if self._size == 32:
return 'float'
else:
assert self._size == 64
return 'double'
# Event record type.
class _Ert:
def __init__(self, api_func_name, members):
self._api_func_name = api_func_name
self._members = members
# API function name
@property
def api_func_name(self):
return self._api_func_name
# Parameters of function (list of `_ErtMember`).
@property
def members(self):
return self._members
# Beginning event record type.
class _BeginErt(_Ert):
# Name of event record type depending on the API prefix.
def name(self, api_prefix):
suffix = '_begin' if api_prefix == 'hsa' else 'Begin'
return f'{self._api_func_name}{suffix}'
# End event record type.
class _EndErt(_Ert):
# Name of event record type depending on the API prefix.
def name(self, api_prefix):
suffix = '_end' if api_prefix == 'hsa' else 'End'
return f'{self._api_func_name}{suffix}'
# Event record type member.
class _ErtMember:
def __init__(self, access, member_names, ft):
self._access = access
self._member_names = member_names.copy()
self._ft = ft
# C++ access expression.
@property
def access(self):
return self._access
# List of member names.
@property
def member_names(self):
return self._member_names
# Equivalent field type.
@property
def ft(self):
return self._ft
# Makes sure some condition is satisfied, or prints the error message
# `error_msg` and quits with exit status 1 otherwise.
#
# This is an unconditional assertion.
def _make_sure(cond, error_msg):
if not cond:
print(f'Error: {error_msg}', file=sys.stderr)
sys.exit(1)
def _enumerator_effective_val(enum_val):
# Try the value, but this value may be a string (an
# enumerator/definition).
val = enum_val.get('value')
if type(val) is int:
return val
# Try the raw value.
val = enum_val.get('raw_value')
if val is not None:
if type(val) is int:
# Raw value is already an integer.
return val
else:
# Try to parse the raw value string as an integer.
try:
return int(val, 0)
except:
pass
_make_sure(False,
f'Cannot get the integral value of enumerator `{enum_val["name"]}`')
# Returns the equivalent field type of the C type `c_type`.
def _number_ft_from_c_type(cpp_header, c_type):
# Check for known enumeration.
m = re.match(r'(?:enum\s+)?(\w+)', c_type)
if m:
size = 32
for enum_info in cpp_header.enums:
if m.group(1) == enum_info.get('name'):
# Fill enumeration field type mappings.
mappings = {
str(v['name']): _enumerator_effective_val(v)
for v in enum_info['values']
}
if len(mappings) == 0:
return _SIntFt(64)
if max(mappings.values()) >= 2**31 or min(mappings.values()) < -2**31:
size = 64
_make_sure(len(mappings) > 0, f'Enumeration `{enum_info["name"]}` is empty')
# Create corresponding enumeration field type.
return _SEnumFt(size, mappings)
# Find corresponding basic field type.
is_unsigned = 'unsigned' in c_type
if 'long' in c_type:
if is_unsigned:
return _UIntFt(64)
else:
return _SIntFt(64)
elif 'short' in c_type:
if is_unsigned:
return _UIntFt(16)
else:
return _SIntFt(16)
elif 'char' in c_type:
if is_unsigned:
return _UIntFt(8)
else:
return _SIntFt(8)
elif 'float' in c_type:
return _FloatFt(32)
elif 'double' in c_type:
return _FloatFt(64)
else:
# Assume `int` (often an unresolved C enumeration).
if is_unsigned:
return _UIntFt(32)
else:
return _SIntFt(32)
# Returns whether or not a property has a pointer type.
def _prop_is_pointer(prop, c_type):
if prop['pointer'] or prop['function_pointer']:
return True
if prop['array'] and 'array_size' in prop:
return True
if prop['unresolved']:
# HSA API function pointers.
if prop['name'] in ('callback', 'handler'):
return True
# HIP API function pointers.
if c_type.endswith('Fn_t'):
return True
# Check the C type itself.
if '*' in c_type or '*' in prop.get('raw_type', ''):
return True
return False
# Returns a list of event record type member objects for the structure
# `struct` considering the initial C++ access expression `access` and
# member names `member_names`.
def _get_ert_members_for_struct(cpp_header, struct, access, member_names):
members = []
member_names = member_names.copy()
member_names.append(None)
props = struct['properties']['public']
for index, prop in enumerate(props):
# Property name.
name = prop['name']
# Member names, access, and C type.
member_names[-1] = str(name)
this_access = f'{access}.{name}'
c_type = prop['type']
aliases = prop['aliases']
# Skip no type.
if c_type == '':
continue
# Skip unnamed or union.
if name == '' or 'union' in name or re.match(r'\bunion\b', c_type):
continue
# Check for known C type alias.
while True:
c_type_alias = cpp_header.typedefs.get(c_type)
if c_type_alias is None:
break
c_type = c_type_alias
# Check for C string.
if re.match(r'^((const\s+char)|(char\s+const)|char)\s*\*$',
c_type.strip()):
members.append(_ErtMember(this_access, member_names, _OptStrFt()))
continue
# Check for pointer.
if _prop_is_pointer(prop, c_type):
# Pointer: use numeric value.
members.append(_ErtMember(this_access, member_names, _PointerFt()))
continue
# Check for substructure.
sub_struct = cpp_header.classes.get(c_type)
if sub_struct is None and len(aliases) == 1:
sub_struct = cpp_header.classes.get(aliases[0])
if sub_struct is not None:
members += _get_ert_members_for_struct(cpp_header, sub_struct,
this_access, member_names)
continue
# Use a basic field type.
members.append(_ErtMember(this_access, member_names,
_number_ft_from_c_type(cpp_header, c_type)))
return members
# Returns the beginning and end event record type objects for the
# callback data structure `struct`.
def _erts_from_cb_data_struct(api_prefix, cpp_header, retval_info, struct):
# The location of the `args` union within the nested structures of
# `struct`.
args_nested_cls_index = 0
# Create return value members (to be used later).
if retval_info is not None:
args_nested_cls_index = 1
retval_members = {}
nested_classes = struct['nested_classes']
_make_sure(len(nested_classes) >= 1,
f"Return value union doesn't exist in `{struct['name']}`")
retval_union = nested_classes[0]
for prop in retval_union['properties']['public']:
name = str(prop['name'])
member = _ErtMember(f'GetApiData().{name}', ['retval'],
_number_ft_from_c_type(cpp_header, prop['type']))
retval_members[prop['name']] = member
# Make sure we have everything we need.
for api_func_name, retval_name in retval_info.items():
if retval_name is not None:
_make_sure(retval_name in retval_members,
f"Return value union member `{retval_name}` doesn't exist (function {api_func_name}())")
# Create beginning/end event record type objects.
begin_erts = []
end_erts = []
nested_classes = struct['nested_classes'][args_nested_cls_index]['nested_classes']
props = struct['nested_classes'][args_nested_cls_index]['properties']['public']
_make_sure(len(nested_classes) == len(props),
f'Mismatch between nested structure and member count in `{struct["name"]}`')
for index, prop in enumerate(props):
# API function name is the name of the member.
api_func_name = str(prop['name'])
# Get the parameters.
members = _get_ert_members_for_struct(cpp_header,
nested_classes[index],
f'GetApiData().args.{api_func_name}',
[])
# Append new beginning event record type object.
begin_erts.append(_BeginErt(api_func_name, members))
# Append new end event record type object if possible.
ret_members = []
if retval_info is not None:
retval_type = retval_info.get(api_func_name)
if retval_type is not None:
ret_members.append(retval_members[retval_type])
end_erts.append(_EndErt(api_func_name, ret_members))
return begin_erts, end_erts
# Creates and returns the return value information dictionary.
#
# This dictionary maps API function names to the member to get within
# the callback data structure.
#
# This only applies to the HSA API: for other APIs, this function
# returns `None`.
def _get_retval_info(path):
if 'hsa' not in os.path.basename(path):
return
retval_info = {}
cur_api_func_name = None
with open(path) as f:
for line in f:
if 'out << ")' in line and cur_api_func_name is not None:
m = re.search(r'api_data.(\w+_retval)', line)
retval_info[cur_api_func_name] = m.group(1) if m else None
else:
m = re.search(r'out << "(hsa_\w+)\(";', line)
if m:
cur_api_func_name = m.group(1)
return retval_info
# Returns a partial barectf data stream type in YAML with the event
# record types `erts`.
def _yaml_dst_from_erts(api_prefix, erts):
# Base.
yaml_erts = {}
yaml_dst = {
'event-record-types': yaml_erts,
}
# Create one event record type per API function.
for ert in erts:
# Base.
yaml_members = []
yaml_ert = {
'payload-field-type': {
'class': 'struct',
'members': yaml_members,
},
}
# Create one structure field type member per member.
for member in ert.members:
# barectf doesn't support nested CTF structures, so join
# individual member names with `__` to flatten.
yaml_members.append({
'_' + '__'.join(member.member_names): {
'field-type': member.ft.barectf_yaml,
},
})
# Add event record type.
yaml_erts[ert.name(api_prefix)] = yaml_ert
# Convert to YAML.
return yaml.dump(yaml_dst)
# Returns the C++ switch statement which calls the correct barectf
# tracing function depending on the API function operation ID.
def _cpp_switch_statement_from_erts(api_prefix, erts):
lines = []
lines.append('switch (GetOp()) {')
for ert in erts:
lines.append(f' case {api_prefix.upper()}_API_ID_{ert.api_func_name}:')
lines.append(f' barectf_{api_prefix}_api_trace_{ert.name(api_prefix)}(')
lines.append(f' &barectf_ctx,')
lines.append(f' GetThreadId(),')
lines.append(f' GetQueueId(),')
lines.append(f' GetAgentId(),')
lines.append(f' GetCorrelationId(),')
if api_prefix == 'hip':
lines.append(f' GetKernelName().c_str(),')
if len(ert.members) == 0:
# Remove last comma.
lines[-1] = lines[-1].replace(',', '')
for index, member in enumerate(ert.members):
if type(member.ft) is _OptStrFt:
# Only dereference C string if not null, otherwise use
# an empty string.
lines.append(f' {member.access} ? {member.access} : ""')
elif type(member.ft) is _StrFt:
lines.append(f' {member.access}')
else:
lines.append(f' {member.ft.cast(member.access)}')
if index + 1 < len(ert.members):
lines[-1] += ','
lines.append(' );')
lines.append(' break;')
lines.append('}')
return lines
# Processes the complete API header file `path`.
def _process_file(api_prefix, path):
# Create `CppHeader` object.
try:
cpp_header = CppHeaderParser.CppHeader(path)
except CppHeaderParser.CppParseError as exc:
print(exc, file=sys.stderr)
sys.exit(1)
# Get return value information dictionary.
retval_info = _get_retval_info(path)
# Find callback data structure.
for struct_name, struct in cpp_header.classes.items():
if re.match(r'^' + api_prefix + r'_api_data\w+$', struct_name):
# Process callback data structure.
begin_erts, end_erts = _erts_from_cb_data_struct(api_prefix,
cpp_header,
retval_info,
struct)
# Write barectf YAML file.
with open(f'{api_prefix}_erts.yaml', 'w') as f:
f.write(_yaml_dst_from_erts(api_prefix, begin_erts + end_erts))
# Write C++ code (beginning event record).
with open(f'{api_prefix}_begin.cpp.i', 'w') as f:
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
begin_erts)))
# Write C++ code (end event record).
with open(f'{api_prefix}_end.cpp.i', 'w') as f:
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
end_erts)))
if __name__ == '__main__':
# Disable `CppHeaderParser` printing to standard output.
CppHeaderParser.CppHeaderParser.print_warnings = 0
CppHeaderParser.CppHeaderParser.print_errors = 0
CppHeaderParser.CppHeaderParser.debug = 0
CppHeaderParser.CppHeaderParser.debug_trace = 0
# Process the complete API header file.
_process_file(sys.argv[1], sys.argv[2])
@@ -0,0 +1,33 @@
################################################################################
# Copyright (c) 2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import sys
import yaml
if __name__ == '__main__':
with open('env.yaml', 'w') as f:
f.write(yaml.dump({
'environment': {
'rocprofiler_version': sys.argv[1],
}
}))
@@ -0,0 +1,869 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cassert>
#include <cstdlib>
#include <sstream>
#include <stdexcept>
#include <iostream>
#include <utility>
#include <string>
#include <memory>
#include <limits>
#include <fstream>
#include <experimental/filesystem>
#include <time.h>
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include "hsa_prof_str.h"
#include <hip/hip_runtime.h>
#include <hip/amd_detail/hip_prof_str.h>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
#include "barectf.h"
#include "barectf_event_record.h"
#include "barectf_tracer.h"
#include "plugin.h"
namespace fs = std::experimental::filesystem;
namespace rocm_ctf {
namespace {
// Abstract tracer event record using the barectf context type `CtxT`.
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
protected:
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
: BarectfEventRecord<CtxT>{clock_val},
op_{record.operation_id.id},
thread_id_{record.thread_id.value},
queue_id_{record.queue_id.handle},
agent_id_{record.agent_id.handle},
correlation_id_{record.correlation_id.value} {}
std::uint32_t GetOp() const noexcept { return op_; }
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
private:
std::uint32_t op_;
std::uint32_t thread_id_;
std::uint64_t queue_id_;
std::uint64_t agent_id_;
std::uint64_t correlation_id_;
};
// Returns the beginning clock value of the tracer or profiler record
// `record`.
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
return record.timestamps.begin.value;
}
// Returns the end clock value of the tracer or profiler record
// `record`.
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
return record.timestamps.end.value;
}
// Queries allocated string data using the size query function
// `query_size_func` and the data query function `query_data_func`,
// returning the corresponding string and freeing temporary allocated
// memory.
//
// Returns an empty string if anything goes wrong.
template <typename QuerySizeFuncT, typename QueryDataFuncT>
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
// Query size first.
std::size_t size = 0;
[[maybe_unused]] auto ret = query_size_func(&size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
if (size == 0) {
// No size: return empty string.
return {};
}
// Query data (allocated by query_data_func()).
char* alloc_str = nullptr;
ret = query_data_func(&alloc_str);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
if (!alloc_str) {
// No data: return empty string.
return {};
}
// Allocate return value.
std::string str_ret{alloc_str};
// Free allocated data.
std::free(alloc_str);
// Return string object.
return str_ret;
}
// rocTX event record.
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
public:
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
id_{QueryId(record, session_id)},
msg_{QueryMsg(record, session_id)} {}
void Write(barectf_roctx_ctx& barectf_ctx) const override {
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
}
private:
// Queries and returns the rocTX message of the record `record` and
// session ID `session_id`.
//
// Returns an empty string if not available.
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first.
std::size_t msg_size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
&msg_size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
if (msg_size == 0) {
// No size: return empty string.
return {};
}
// Query data (borrowed from the record: no need to free).
char* msg = nullptr;
ret = rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
if (!msg) {
// No data: return empty string.
return {};
}
return rocmtools::cxx_demangle(msg);
}
// Queries and returns the rocTX ID of the record `record` and the
// session ID `session_id`.
//
// Returns 0 if anything goes wrong.
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
try {
return std::stoull(QueryAllocStr(
[&record, session_id](const auto size) {
return rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
},
[&record, session_id](const auto str) {
return rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
}));
} catch (...) {
return 0;
}
}
std::uint64_t id_;
std::string msg_;
};
// Abstract HSA API event record.
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
protected:
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)} {}
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
private:
// Queries and returns the API data of the record `record` and session
// ID `session_id`.
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first (only for assertions).
[[maybe_unused]] std::size_t size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
assert(size > 0);
// Query data (borrowed from the record).
char* data = nullptr;
ret = rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
assert(data);
// Reinterpret as an HSA API data pointer.
return *reinterpret_cast<const hsa_api_data_t*>(data);
}
hsa_api_data_t api_data_;
};
// HSA API event record (beginning).
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
public:
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hsa_begin.cpp.i"
}
};
// HSA API event record (end).
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
public:
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hsa_end.cpp.i"
}
};
// Abstract HIP API event record.
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
protected:
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
api_data_{QueryApiData(record, session_id)},
kernel_name_{QueryKernelName(record, session_id)} {}
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
private:
// Queries and returns the API data of the record `record` and session
// ID `session_id`.
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
// Query size first (only for assertions).
[[maybe_unused]] std::size_t size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
assert(size > 0);
// Query data (borrowed from the record).
char* data = nullptr;
ret = rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
assert(data);
// Reinterpret as an HIP API data pointer.
return *reinterpret_cast<const hip_api_data_t*>(data);
}
// Queries and returns the kernel name of the record `record` and
// session ID `session_id`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
const auto kernel_name = QueryAllocStr(
[&record, session_id](const auto size) {
return rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
size);
},
[&record, session_id](const auto str) {
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
record.api_data_handle,
record.operation_id, str);
});
if (kernel_name.size() > 1) {
// Return demangled version.
return rocmtools::cxx_demangle(kernel_name);
}
return kernel_name;
}
hip_api_data_t api_data_;
std::string kernel_name_;
};
// HIP API event record (beginning).
class HipApiEventRecordBegin final : public HipApiEventRecord {
public:
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hip_begin.cpp.i"
}
};
// HIP API event record (end).
class HipApiEventRecordEnd final : public HipApiEventRecord {
public:
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id)
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
// Include generated switch statement.
#include "hip_end.cpp.i"
}
};
// HSA API handle type event record.
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
public:
enum class Type {
CPU = 0,
GPU = 1,
};
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
static_cast<std::uint8_t>(type_));
}
private:
std::uint64_t handle_;
Type type_;
};
// Abstract API operation event record.
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
protected:
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
};
// HSA API operation event record (beginning).
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
public:
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// HSA API operation event record (end).
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
public:
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// HIP API operation event record (beginning).
class HipOpEventRecordBegin final : public ApiOpEventRecord {
public:
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
kernel_name_{QueryKernelName(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId(), kernel_name_.c_str());
}
private:
// Queries and returns the kernel name of the record `record`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
if (record.operation_id.id == 0) {
if (const auto api_handle = record.api_data_handle.handle) {
const auto str = reinterpret_cast<const char*>(api_handle);
if (std::strlen(str) > 1) {
// Return demangled version.
return rocmtools::cxx_demangle(str);
}
}
}
return {};
}
std::string kernel_name_;
};
// HIP API operation event record (end).
class HipOpEventRecordEnd final : public ApiOpEventRecord {
public:
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
GetCorrelationId());
}
};
// Profiler record base.
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
public:
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id)
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
dispatch_{record.header.id.handle},
gpu_id_{record.gpu_id.handle},
queue_id_{record.queue_id.handle},
queue_index_{record.queue_idx.value},
process_id_{GetPid()},
thread_id_{record.thread_id.value},
kernel_id_{record.kernel_id.handle},
kernel_name_{QueryKernelName(record)},
counter_infos_{QueryCounterInfos(record, session_id)} {}
void Write(barectf_profiler_ctx& barectf_ctx) const override {
barectf_profiler_trace_profiler_record(
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
counter_infos_.values.size(), counter_infos_.values.data());
}
protected:
// Counter infos.
//
// `names[i]` names the counter value `values[i]`.
struct CounterInfos final {
// `names_storage` owns the strings while the elements of `names`
// point to the internal C strings of `names_storage`.
//
// This is needed because barectf expects an array of contiguous
// C string pointers.
std::vector<std::string> names_storage;
std::vector<const char*> names;
// Counter values.
std::vector<std::uint64_t> values;
};
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
std::uint32_t GetProcessId() const noexcept { return process_id_; }
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
const std::string& GetKernelName() const noexcept { return kernel_name_; }
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
private:
// Queries and returns the kernel name of the record `record`.
//
// Returns an empty string if not available.
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
const auto kernel_name = QueryAllocStr(
[&record](const auto size) {
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
},
[&record](const auto str) {
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
const_cast<const char**>(str));
});
if (kernel_name.size() <= 1) {
return {};
}
// Return truncated and demangled version.
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
}
// Queries and returns the counter infos of the record `record` and
// session ID `session_id`.
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id) {
if (!record.counters) {
// No counters.
return {};
}
CounterInfos infos;
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
auto& counter = record.counters[i];
if (counter.counter_handler.handle == 0) {
// Not available: continue.
continue;
}
// Query counter name size first
std::size_t counter_name_size = 0;
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
if (counter_name_size == 0) {
// No size: continue.
continue;
}
// Query counter name (borrowed from `record`: no need to free).
const char* counter_name = nullptr;
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
counter.counter_handler, &counter_name);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
if (!counter_name) {
// Not available: continue.
continue;
}
// Push back infos.
infos.names_storage.emplace_back(counter_name);
infos.names.push_back(infos.names_storage.back().c_str());
infos.values.push_back(counter.value.value);
}
return infos;
}
std::uint64_t dispatch_;
std::uint64_t gpu_id_;
std::uint64_t queue_id_;
std::uint64_t queue_index_;
std::uint32_t process_id_;
std::uint32_t thread_id_;
std::uint64_t kernel_id_;
std::string kernel_name_;
CounterInfos counter_infos_;
};
// Profiler record base.
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
private:
// According to `plugin/file/file.cpp`:
//
// > Taken from rocprofiler: The size hasn't changed in recent past
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
public:
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id)
: ProfilerEventRecord{record, session_id},
grid_size_{record.kernel_properties.grid_size},
workgroup_size_{record.kernel_properties.workgroup_size},
lds_size_{
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
scratch_size_{record.kernel_properties.scratch_size},
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
sgpr_count_{record.kernel_properties.sgpr_count},
wave_size_{record.kernel_properties.wave_size},
signal_handle_{record.kernel_properties.signal_handle} {}
void Write(barectf_profiler_ctx& barectf_ctx) const override {
barectf_profiler_trace_profiler_record_with_kernel_properties(
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
}
private:
std::uint64_t grid_size_;
std::uint64_t workgroup_size_;
std::uint64_t lds_size_;
std::uint64_t scratch_size_;
std::uint64_t arch_vgpr_count_;
std::uint64_t accum_vgpr_count_;
std::uint64_t sgpr_count_;
std::uint64_t wave_size_;
std::uint64_t signal_handle_;
};
} // namespace
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
const fs::path& metadata_stream_path)
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
// Make sure the trace directory doesn't exist.
if (fs::exists(trace_dir)) {
std::ostringstream ss;
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
throw std::runtime_error{ss.str()};
}
// Make sure the metadata stream file exists.
if (!fs::exists(metadata_stream_path)) {
std::ostringstream ss;
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
throw std::runtime_error{ss.str()};
}
// Create trace directory.
if (!fs::create_directory(trace_dir)) {
std::ostringstream ss;
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
throw std::runtime_error{ss.str()};
}
// Copy adjusted metadata stream file to trace directory.
try {
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
} catch (const std::exception& exc) {
std::ostringstream ss;
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
throw std::runtime_error{ss.str()};
}
// Write HSA handle type event records.
WriteHsaHandleTypes();
}
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
const rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock{lock_};
// Depending on the domain, create and add an event record to the
// corresponding tracer.
switch (record.domain) {
case ACTIVITY_DOMAIN_ROCTX:
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
break;
case ACTIVITY_DOMAIN_HSA_API: {
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
hsa_api_tracer_.AddEventRecord(
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
hip_api_tracer_.AddEventRecord(
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
break;
}
case ACTIVITY_DOMAIN_HSA_OPS:
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
break;
case ACTIVITY_DOMAIN_HIP_OPS:
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
break;
default:
// Warn
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
<< "ignoring record for unknown domain #" << record.domain << std::endl;
break;
}
}
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
const rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock{lock_};
profiler_tracer_.AddEventRecord(
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
}
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* const end,
const rocprofiler_session_id_t session_id,
const rocprofiler_buffer_id_t buffer_id) {
while (begin && begin < end) {
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
} else {
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
session_id);
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
}
void Plugin::WriteHsaHandleTypes() {
[[maybe_unused]] const auto status = hsa_iterate_agents(
[](const auto agent, const auto user_data) {
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
hsa_device_type_t type;
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
return HSA_STATUS_ERROR;
}
using Type = HsaHandleTypeEventRecord::Type;
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
tracer.AddEventRecord(std::move(event_record));
return HSA_STATUS_SUCCESS;
},
&hsa_handles_tracer_);
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
}
namespace {
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
// Samples the ROCMTools clock and returns the value.
std::uint64_t GetClkVal() {
rocprofiler_timestamp_t ts;
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
return ts.value;
}
// Updates `offset` and `delta`, if needed, to a more accurate clock
// class offset and a smaller ROCMTools clock value delta.
//
// This function samples the ROCMTools clock twice, also sampling the
// real-time clock in between, and uses the average ROCMTools clock
// value to approximate the actual clock class offset.
//
// This strategy is based on the measure_single_clock_offset() function
// of the LTTng-tools project <https://lttng.org/>.
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
// Sample ROCMTools clock (first time).
const auto rocm_clk_val1 = GetClkVal();
// Sample real-time clock.
timespec realtime_spec = {0, 0};
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
assert(ret == 0);
// Sample ROCMTools clock (second time).
const auto rocm_clk_val2 = GetClkVal();
// Compute the current ROCMTools clock value delta.
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
if (this_delta > delta) {
// Discard larger delta.
return;
}
// Compute the average ROCMTools clock value.
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
// Compute the real-time clock value in nanoseconds.
const auto realtime_ns =
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
// Update clock class offset and delta.
assert(rocm_clk_val_avg < realtime_ns);
offset = realtime_ns - rocm_clk_val_avg;
delta = this_delta;
}
// Computes and returns the most possible accurate clock class offset.
std::uint64_t GetMetadataClkClsOffset() {
std::uint64_t offset = 0;
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
// Best effort to find the most accurate offset.
for (auto i = 0U; i < 50U; ++i) {
UpdateClkClsOffsetAndDelta(offset, delta);
}
return offset;
}
} // namespace
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
const fs::path& trace_dir) {
// Load installed metadata stream file contents.
std::string metadata;
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
// Replace the original `offset` property.
{
static constexpr auto offset_term = "offset = 0;";
std::ostringstream ss;
ss << "offset = " << GetMetadataClkClsOffset() << ';';
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
}
// Write adjusted metadata stream to trace directory.
{
std::ofstream output{trace_dir / "metadata"};
output.write(metadata.data(), metadata.size());
}
}
} // namespace rocm_ctf
@@ -0,0 +1,146 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef PLUGIN_CTF_PLUGIN_H
#define PLUGIN_CTF_PLUGIN_H
#include <mutex>
#include <cstdlib>
#include <experimental/filesystem>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "barectf.h"
#include "barectf_tracer.h"
namespace rocm_ctf {
// CTF plugin.
//
// Build a plugin instance, and then call HandleTracerRecord(),
// HandleProfilerRecord(), and HandleBufferRecords() to add event
// records.
//
// A plugin instance performs important tasks at destruction time.
class Plugin final {
public:
// Builds a plugin instance to write a CTF trace in the `trace_dir`
// directory with packets of size `packet_size` bytes.
//
// `trace_dir` must not exist.
//
// This constructor immediately adjusts and copies the metadata stream
// file `metadata_stream_path` to the trace directory (`trace_dir`).
explicit Plugin(std::size_t packet_size, const std::experimental::filesystem::path& trace_dir,
const std::experimental::filesystem::path& metadata_stream_path);
// Handles a tracer record.
void HandleTracerRecord(const rocprofiler_record_tracer_t& record,
rocprofiler_session_id_t session_id);
// Handles a profiler record.
void HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
rocprofiler_session_id_t session_id);
// Handles tracer or profiler records from `begin` to `end`
// (excluded).
void HandleBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id);
private:
// rocTX barectf platform descriptor.
struct RocTxPlatformDescr final {
using Ctx = barectf_roctx_ctx;
static void OpenPacket(Ctx& ctx) { barectf_roctx_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_roctx_close_packet(&ctx); }
};
// HSA API barectf platform descriptor.
struct HsaApiPlatformDescr final {
using Ctx = barectf_hsa_api_ctx;
static void OpenPacket(Ctx& ctx) { barectf_hsa_api_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_hsa_api_close_packet(&ctx); }
};
// HIP API barectf platform descriptor.
struct HipApiPlatformDescr final {
using Ctx = barectf_hip_api_ctx;
static void OpenPacket(Ctx& ctx) { barectf_hip_api_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_hip_api_close_packet(&ctx); }
};
// HSA handles barectf platform descriptor.
struct HsaHandlesPlatformDescr final {
using Ctx = barectf_hsa_handles_ctx;
static void OpenPacket(Ctx& ctx) { barectf_hsa_handles_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_hsa_handles_close_packet(&ctx); }
};
// API operations barectf platform descriptor.
struct ApiOpsPlatformDescr final {
using Ctx = barectf_api_ops_ctx;
static void OpenPacket(Ctx& ctx) { barectf_api_ops_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_api_ops_close_packet(&ctx); }
};
// Profiler barectf platform descriptor.
struct ProfilerPlatformDescr final {
using Ctx = barectf_profiler_ctx;
static void OpenPacket(Ctx& ctx) { barectf_profiler_open_packet(&ctx); }
static void ClosePacket(Ctx& ctx) { barectf_profiler_close_packet(&ctx); }
};
// barectf tracer for HSA handle mappings.
using HsaHandlesTracer = BarectfTracer<HsaHandlesPlatformDescr>;
// Writes the HSA handle type mappings to a dedicated data stream
// file.
void WriteHsaHandleTypes();
// Loads the existing metadata stream file `metadata_stream_path`,
// adjusts the `offset` property of its single clock class, and writes
// the result to the `metadata` file within the `trace_dir` directory.
void CopyAdjustedMetadataStreamFile(
const std::experimental::filesystem::path& metadata_stream_path,
const std::experimental::filesystem::path& trace_dir);
// Dedicated tracers.
BarectfTracer<RocTxPlatformDescr> roctx_tracer_;
BarectfTracer<HsaApiPlatformDescr> hsa_api_tracer_;
BarectfTracer<HipApiPlatformDescr> hip_api_tracer_;
BarectfTracer<ApiOpsPlatformDescr> api_ops_tracer_;
HsaHandlesTracer hsa_handles_tracer_;
BarectfTracer<ProfilerPlatformDescr> profiler_tracer_;
// Locks any operation performed on the data of this.
std::mutex lock_;
};
} // namespace rocm_ctf
#endif // PLUGIN_CTF_PLUGIN_H
@@ -0,0 +1,7 @@
{
global: rocprofiler_plugin_initialize;
rocprofiler_plugin_finalize;
rocprofiler_plugin_write_buffer_records;
rocprofiler_plugin_write_record;
local: *;
};
@@ -0,0 +1,44 @@
# ###############################################################################
# # Copyright (c) 2022 Advanced Micro Devices, Inc.
# #
# # Permission is hereby granted, free of charge, to any person obtaining a copy
# # of this software and associated documentation files (the "Software"), to
# # deal in the Software without restriction, including without limitation the
# # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# # sell copies of the Software, and to permit persons to whom the Software is
# # furnished to do so, subject to the following conditions:
# #
# # The above copyright notice and this permission notice shall be included in
# # all copies or substantial portions of the Software.
# #
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# # IN THE SOFTWARE.
# ###############################################################################
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
file(GLOB FILE_SOURCES "*.cpp")
add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
set_target_properties(file_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_compile_definitions(file_plugin
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR})
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
target_link_libraries(file_plugin PRIVATE ${ROCPROFILER_TARGET} hsa-runtime64::hsa-runtime64 systemd stdc++fs amd_comgr dl)
install(TARGETS file_plugin LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT runtime)
@@ -0,0 +1,472 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cxxabi.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <experimental/filesystem>
#include <fstream>
#include <iostream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <hsa/hsa.h>
#include <mutex>
#include "rocprofiler.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
namespace fs = std::experimental::filesystem;
namespace {
static std::string output_file_name;
class file_plugin_t {
private:
enum class output_type_t {
COUNTER,
TRACER,
PC_SAMPLING
};
class output_file_t {
public:
output_file_t(std::string name) : name_(std::move(name)) {}
std::string name() const { return name_; }
template <typename T> std::ostream& operator<<(T&& value) {
if (!is_open()) open();
return stream_ << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
if (!is_open()) open();
return stream_ << func;
}
void open() {
// If the stream is already in the failed state, there's no need to try
// to open the file.
if (fail()) return;
const char* output_dir = getenv("OUTPUT_PATH");
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) + "_" : "";
if (output_dir == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
return;
}
fs::path output_prefix(output_dir);
if (!fs::is_directory(fs::status(output_prefix))) {
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
std::stringstream ss;
ss << output_file_name << GetPid() << "_" << name_;
stream_.open(output_prefix / ss.str());
}
bool is_open() const { return stream_.is_open(); }
bool fail() const { return stream_.fail(); }
private:
const std::string name_;
std::ofstream stream_;
};
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
switch (output_type) {
case output_type_t::COUNTER:
return &output_file_;
case output_type_t::TRACER:
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return &roctx_file_;
case ACTIVITY_DOMAIN_HSA_API:
return &hsa_api_file_;
case ACTIVITY_DOMAIN_HIP_API:
return &hip_api_file_;
case ACTIVITY_DOMAIN_HIP_OPS:
return &hip_activity_file_;
case ACTIVITY_DOMAIN_HSA_OPS:
return &hsa_async_copy_file_;
default:
assert(!"domain/op not supported!");
break;
}
break;
case output_type_t::PC_SAMPLING:
return &pc_sample_file_;
}
return nullptr;
}
public:
file_plugin_t() {
output_file_t hsa_handles("hsa_handles.txt");
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
[](hsa_agent_t agent, void* user_data) {
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
hsa_device_type_t type;
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
*file << std::hex << std::showbase << agent.handle << " agent "
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << std::endl;
return HSA_STATUS_SUCCESS;
},
&hsa_handles);
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
if (hsa_handles.fail()) {
rocmtools::warning("Cannot write to '%s'", hsa_handles.name().c_str());
return;
}
// App begin timestamp begin_ts_file.txt
output_file_t begin_ts("begin_ts_file.txt");
[[maybe_unused]] rocprofiler_timestamp_t app_begin_timestamp = {};
CHECK_ROCMTOOLS(rocprofiler_get_timestamp(&app_begin_timestamp));
begin_ts << std::dec << app_begin_timestamp.value << std::endl;
if (begin_ts.fail()) {
rocmtools::warning("Cannot write to '%s'", begin_ts.name().c_str());
return;
}
valid_ = true;
}
std::mutex writing_lock;
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
std::lock_guard<std::mutex> lock(writing_lock);
std::string kernel_name;
std::string function_name;
std::string roctx_message;
uint64_t roctx_id;
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
if (tracer_record.api_data_handle.handle &&
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
kernel_name = rocmtools::cxx_demangle(
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = (char*)malloc(function_name_size);
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = (char*)malloc(function_name_size);
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
size_t kernel_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
if (kernel_name_size > 1) {
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
}
}
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
size_t roctx_message_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
[[maybe_unused]] char* roctx_message_str =
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
&roctx_id_size));
if (roctx_id_size > 1) {
[[maybe_unused]] char* roctx_id_str =
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
}
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
*output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
<< GetDomainName(tracer_record.domain) << "), Begin("
<< tracer_record.timestamps.begin.value << "), End("
<< tracer_record.timestamps.end.value << "), Correlation ID( "
<< tracer_record.correlation_id.value << ")";
if (roctx_id >= 0) *output_file << ", ROCTX ID(" << roctx_id << ")";
if (roctx_message.size() > 1) *output_file << ", ROCTX Message(" << roctx_message << ")";
if (function_name.size() > 1) *output_file << ", Function(" << function_name << ")";
if (kernel_name.size() > 1) *output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
*output_file << std::endl;
}
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
std::lock_guard<std::mutex> lock(writing_lock);
size_t name_length = 0;
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::COUNTER);
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &name_length));
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
const char* kernel_name_c;
if (name_length > 1) {
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
&kernel_name_c));
}
*output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
<< "], " << std::string("gpu_id(")
<< std::to_string(profiler_record->gpu_id.handle) << "), "
<< std::string("queue_id(") << std::to_string(profiler_record->queue_id.handle)
<< "), " << std::string("queue_index(")
<< std::to_string(profiler_record->queue_idx.value) << "), " << std::string("pid(")
<< std::to_string(GetPid()) << "), " << std::string("tid(")
<< std::to_string(profiler_record->thread_id.value) << ")";
*output_file << ", " << std::string("grd(")
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
<< std::string("wgr(")
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
<< std::string("lds(")
<< std::to_string(
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1)))
<< "), " << std::string("scr(")
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
<< std::string("arch_vgpr(")
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
<< std::string("accum_vgpr(")
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
<< std::string("sgpr(")
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
<< std::string("wave_size(")
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
<< std::string("sig(")
<< std::to_string(profiler_record->kernel_properties.signal_handle);
std::string kernel_name = "";
if (name_length > 1) {
kernel_name = rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name_c));
}
*output_file << "), " << std::string("obj(")
<< std::to_string(profiler_record->kernel_id.handle) << "), "
<< std::string("kernel-name(\"") << kernel_name << "\")"
<< std::string(", start_time(")
<< std::to_string(profiler_record->timestamps.begin.value) << ")"
<< std::string(", end_time(")
<< std::to_string(profiler_record->timestamps.end.value) << ")";
// For Counters
*output_file << std::endl;
if (profiler_record->counters) {
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
if (profiler_record->counters[i].counter_handler.handle > 0) {
size_t counter_name_length = 0;
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
&counter_name_length));
if (counter_name_length > 1) {
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_counter_info(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
&name_c));
*output_file << ", " << name_c << " ("
<< std::to_string(profiler_record->counters[i].value.value) << ")"
<< std::endl;
}
}
}
}
}
void FlushPCSamplingRecord(
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
output_file_t* output_file{nullptr};
output_file = get_output_file(output_type_t::PC_SAMPLING);
const auto &sample = pc_sampling_record->pc_sample;
*output_file << "dispatch[" << sample.dispatch_id.value << "], "
<< "timestamp(" << sample.timestamp.value << "), "
<< "gpu_id(" << sample.gpu_id.handle << "), "
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
<< "se(" << sample.se << ')'
<< std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
const rocprofiler_record_profiler_t* profiler_record =
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
FlushProfilerRecord(profiler_record, session_id, buffer_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id, buffer_id);
break;
}
case ROCPROFILER_PC_SAMPLING_RECORD: {
const rocprofiler_record_pc_sample_t *pc_sampling_record =
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
FlushPCSamplingRecord(pc_sampling_record);
break;
}
default:
break;
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
bool is_valid() const { return valid_; }
private:
bool valid_{false};
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
output_file_{"results.txt"};
};
file_plugin_t* file_plugin = nullptr;
} // namespace
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
return -1;
if (file_plugin != nullptr) return -1;
file_plugin = new file_plugin_t();
if (file_plugin->is_valid()) return 0;
// The plugin failed to initialied, destroy it and return an error.
delete file_plugin;
file_plugin = nullptr;
return -1;
}
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
if (!file_plugin) return;
delete file_plugin;
file_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
if (record.header.id.handle == 0) return 0;
file_plugin->FlushTracerRecord(record, session_id);
return 0;
}
@@ -0,0 +1,27 @@
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
add_library(perfetto_plugin
${LIBRARY_TYPE} ${ROCPROFILER_UTIL_SRC_FILES}
perfetto.cpp perfetto_sdk/sdk/perfetto.cc)
set_target_properties(perfetto_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_compile_definitions(perfetto_plugin
PRIVATE HIP_PROF_HIP_API_STRING=1
__HIP_PLATFORM_HCC__=1)
target_include_directories(perfetto_plugin
PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/plugin/perfetto/perfetto_sdk/sdk)
target_link_options(perfetto_plugin
PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
target_link_libraries(perfetto_plugin PRIVATE ${ROCPROFILER_TARGET} Threads::Threads systemd stdc++fs amd_comgr)
install(TARGETS perfetto_plugin LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT plugins)
@@ -0,0 +1,804 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "rocprofiler.h"
#include <cassert>
#include <condition_variable>
#include <cstdint>
#include <cstdlib>
#include <experimental/filesystem>
#include <fstream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <functional>
#include <iostream>
#include <cxxabi.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <systemd/sd-id128.h>
#include "perfetto_sdk/sdk/perfetto.h"
#include "rocprofiler_plugin.h"
#include "../utils.h"
#define STREAM_CONSTANT 98736677
#define QUEUE_CONSTANT 18746479
namespace fs = std::experimental::filesystem;
PERFETTO_DEFINE_CATEGORIES(
perfetto::Category("GENERIC").SetDescription("GENERAL_CATEGORY"),
perfetto::Category("ROCTX_API").SetDescription("ACTIVITY_DOMAIN_ROCTX_API"),
perfetto::Category("HSA_API").SetDescription("ACTIVITY_DOMAIN_HSA_API"),
perfetto::Category("HIP_API").SetDescription("ACTIVITY_DOMAIN_HIP_API"),
perfetto::Category("External_API").SetDescription("ACTIVITY_DOMAIN_EXT_API"),
perfetto::Category("HIP_OPS").SetDescription("ACTIVITY_DOMAIN_HIP_OPS"),
perfetto::Category("HSA_OPS").SetDescription("ACTIVITY_DOMAIN_HSA_OPS"),
perfetto::Category("KERNELS").SetDescription("KERNEL_DISPATCHES"),
perfetto::Category("COUNTERS").SetDescription("PERFORMANCE_COUNTERS"));
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
namespace {
std::string process_name;
static std::string output_file_name;
std::string get_kernel_name(rocprofiler_record_profiler_t& profiler_record) {
std::string kernel_name = "";
size_t name_length = 1;
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
&name_length));
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#pragma GCC diagnostic ignored "-Wstringop-overread"
if (name_length > 1) {
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
&kernel_name_c));
if (kernel_name_c && strlen(kernel_name_c) > 1)
kernel_name = rocmtools::cxx_demangle(strdup(kernel_name_c));
}
#pragma GCC diagnostic pop
return kernel_name;
}
class perfetto_plugin_t {
public:
perfetto_plugin_t() {
const char* output_dir = getenv("OUTPUT_PATH");
const char* temp_file_name = getenv("OUT_FILE_NAME");
output_file_name = temp_file_name ? std::string(temp_file_name) + "_" : "";
if (output_dir == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
return;
}
output_prefix_ = output_dir;
if (!fs::is_directory(fs::status(output_prefix_))) {
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
perfetto::TracingInitArgs args;
args.backends |= perfetto::kInProcessBackend;
perfetto::Tracing::Initialize(args);
perfetto::TrackEvent::Register();
perfetto::protos::gen::TrackEventConfig track_event_cfg;
track_event_cfg.add_disabled_categories("*");
track_event_cfg.add_enabled_categories("GENERIC");
track_event_cfg.add_enabled_categories("ROCTX_API");
track_event_cfg.add_enabled_categories("HSA_API");
track_event_cfg.add_enabled_categories("HIP_API");
track_event_cfg.add_enabled_categories("External_API");
track_event_cfg.add_enabled_categories("HIP_OPS");
track_event_cfg.add_enabled_categories("HSA_OPS");
track_event_cfg.add_enabled_categories("KERNELS");
track_event_cfg.add_enabled_categories("COUNTERS");
perfetto::TraceConfig trace_cfg;
auto buffer_cfg = trace_cfg.add_buffers();
uint32_t max_buffer_size = 10 * 1024 * 1024; // Default max buffer size is 10 GB
const char* max_buffer_size_str = getenv("rocprofiler_PERFETTO_MAX_BUFFER_SIZE_KIB");
if (max_buffer_size_str && std::atol(max_buffer_size_str) > 0)
max_buffer_size = std::atol(max_buffer_size_str);
// Record up to max buffer size determined by user or the 10 GB (default value)
buffer_cfg->set_size_kb(max_buffer_size);
auto* data_source_cfg = trace_cfg.add_data_sources()->mutable_config();
data_source_cfg->set_name("track_event");
data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace");
file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
if (file_descriptor_ == -1) rocmtools::warning("Can't open output file\n");
tracing_session_ = perfetto::Tracing::NewTrace();
tracing_session_->Setup(trace_cfg, file_descriptor_);
tracing_session_->StartBlocking();
hostname_[1023] = '\0';
gethostname(hostname_, 1023);
sd_id128_t ret;
char machine_id[SD_ID128_STRING_MAX];
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
assert(status == 0 && "Error: Couldn't get machine id!");
if (sd_id128_to_string(ret, machine_id)) machine_id_ = std::hash<std::string>{}(machine_id);
{
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
process_name =
perfetto::ProcessTrack::Current().Serialize().mutable_process()->process_name();
auto process_track_desc = perfetto::ProcessTrack::Current().Serialize();
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
}
}
std::string thread_track_str =
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
process_track_desc.mutable_process()->set_process_name(thread_track_str);
perfetto::TrackEvent::SetTrackDescriptor(perfetto::ProcessTrack::Current(),
process_track_desc);
perfetto::ProcessTrack::Current().Serialize().set_uuid(track_id);
thread_tracks_.emplace(GetPid(), perfetto::ProcessTrack::Current());
}
is_valid_ = true;
}
~perfetto_plugin_t() {
if (is_valid_) {
tracing_session_->StopBlocking();
close(file_descriptor_);
}
}
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
std::mutex writing_lock;
int FlushProfilerRecord(rocprofiler_record_profiler_t profiler_record,
rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(writing_lock);
// ToDO: rename this variable?
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
int device_id = profiler_record.gpu_id.handle;
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
{
std::lock_guard<std::mutex> lock(device_tracks_lock_);
device_track_it = device_tracks_.find(device_id);
if (device_track_it == device_tracks_.end()) {
/* Create a new perfetto::Track (Sub-Track) */
device_track_it =
device_tracks_
.emplace(device_id, perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
.first;
auto gpu_desc = device_track_it->second.Serialize();
gpu_desc.mutable_process()->set_pid(device_id);
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
gpu_desc.mutable_process()->set_process_name(gpu_str);
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
track_ids_used_.emplace_back(device_id + 1 + machine_id_);
}
}
auto& gpu_track = device_track_it->second;
std::pair<int, uint64_t> gpu_queue_id =
std::make_pair(device_id, profiler_record.queue_id.handle);
auto queue_track_it = queue_tracks_.find(gpu_queue_id.first);
{
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
if (queue_track_it == queue_tracks_.end()) {
/* Create a new perfetto::Track */
queue_track_it = queue_tracks_
.emplace(gpu_queue_id.first,
perfetto::Track((profiler_record.queue_id.handle + 1 +
profiler_record.gpu_id.handle) *
QUEUE_CONSTANT * machine_id_ * GetPid(),
gpu_track))
.first;
auto queue_desc = queue_track_it->second.Serialize();
std::string queue_str =
rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(), gpu_queue_id.second);
queue_desc.set_name(queue_str);
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
}
track_ids_used_.emplace_back(profiler_record.queue_id.handle + machine_id_ + 1 +
profiler_record.gpu_id.handle);
}
auto& queue_track = queue_track_it->second;
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
std::string full_kernel_name = get_kernel_name(profiler_record);
// std::string truncated_kernel_name = rocmtools::truncate_name(full_kernel_name);
// perfetto::StaticString kernel_name(truncated_kernel_name.c_str());
TRACE_EVENT_BEGIN("KERNELS", perfetto::StaticString(full_kernel_name.c_str()), queue_track,
profiler_record.timestamps.begin.value, "Full Kernel Name",
full_kernel_name.c_str(), "Agent ID", device_id, "Queue ID",
profiler_record.queue_id.handle, "GRD",
profiler_record.kernel_properties.grid_size, "WGR",
profiler_record.kernel_properties.workgroup_size, "LDS",
(((profiler_record.kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1))),
"SCR", profiler_record.kernel_properties.scratch_size, "Arch. VGPR",
profiler_record.kernel_properties.arch_vgpr_count, "Accumilative Vgpr",
profiler_record.kernel_properties.accum_vgpr_count, "SGPR",
profiler_record.kernel_properties.sgpr_count, "Wave Size",
profiler_record.kernel_properties.wave_size, "Signal",
profiler_record.kernel_properties.signal_handle);
TRACE_EVENT_END("KERNELS", queue_track, profiler_record.timestamps.end.value);
auto get_counter_track_fn = [&](std::string counter_name) {
std ::string counter_track_id =
std::to_string(machine_id_) + std::to_string(GetPid()) + counter_name;
std::pair<int, std::string> gpu_counter_track_id = std::make_pair(device_id, counter_name);
std::unordered_map<std::string, perfetto::CounterTrack>::iterator counters_track_it;
{
std::lock_guard<std::mutex> lock(counter_tracks_lock_);
counters_track_it = counter_tracks_.find(gpu_counter_track_id.second);
if (counters_track_it == counter_tracks_.end()) {
/* Create a new perfetto::Track */
counters_track_it =
counter_tracks_
.emplace(gpu_counter_track_id.second,
perfetto::CounterTrack(counter_track_id.c_str(), gpu_track))
.first;
auto counter_track_desc = counters_track_it->second.Serialize();
std::string counter_track_str = "Process ID " + std::to_string(GetPid()) + " - Counter " +
gpu_counter_track_id.second;
counter_track_desc.set_name(counter_track_str);
perfetto::TrackEvent::SetTrackDescriptor(counters_track_it->second, counter_track_desc);
}
}
return counters_track_it->second;
};
// For Counters
if (profiler_record.counters) {
for (uint64_t i = 0; i < profiler_record.counters_count.value; i++) {
if (profiler_record.counters[i].counter_handler.handle > 0) {
size_t name_length = 0;
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record.counters[i].counter_handler,
&name_length));
if (name_length > 1) {
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCMTOOLS(
rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
profiler_record.counters[i].counter_handler, &name_c));
perfetto::CounterTrack counters_track = get_counter_track_fn(std::string(name_c));
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.begin.value,
profiler_record.counters[i].value.value);
// Added an extra zero event for maintaining start-end of the counter
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0.001);
}
}
}
}
return 0;
}
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(writing_lock);
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
std::string kernel_name;
char* function_name;
char* activity_name;
std::string roctx_message;
uint64_t roctx_id = 0;
uint64_t thread_id = tracer_record.thread_id.value;
std::unordered_map<uint64_t, perfetto::Track>::iterator thread_track_it;
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS ||
tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS) {
int device_id = tracer_record.agent_id.handle;
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS && device_id > 0) device_id--;
{
std::lock_guard<std::mutex> lock(device_tracks_lock_);
device_track_it = device_tracks_.find(device_id);
if (device_track_it == device_tracks_.end()) {
/* Create a new perfetto::Track (Sub-Track) */
device_track_it =
device_tracks_
.emplace(device_id,
perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
.first;
auto gpu_desc = device_track_it->second.Serialize();
gpu_desc.mutable_process()->set_pid(device_id);
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
gpu_desc.mutable_process()->set_process_name(gpu_str);
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
track_ids_used_.emplace_back(1 + machine_id_ + device_id);
}
}
} else {
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
thread_track_it = thread_tracks_.find(thread_id);
if (thread_track_it == thread_tracks_.end()) {
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
}
}
thread_track_it =
thread_tracks_.emplace(thread_id, perfetto::ProcessTrack::Global(track_id)).first;
auto thread_track_desc = thread_track_it->second.Serialize();
std::string thread_track_str =
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
thread_track_desc.mutable_process()->set_pid(thread_id);
thread_track_desc.mutable_process()->set_process_name(thread_track_str);
perfetto::TrackEvent::SetTrackDescriptor(thread_track_it->second, thread_track_desc);
}
}
auto& thread_track = thread_track_it->second;
auto& gpu_track = device_track_it->second;
switch (tracer_record.domain) {
case ACTIVITY_DOMAIN_ROCTX: {
std::unordered_map<uint64_t, perfetto::Track>::iterator roctx_track_it;
{
std::lock_guard<std::mutex> lock(roctx_tracks_lock_);
roctx_track_it = roctx_tracks_.find(thread_id);
if (roctx_track_it == roctx_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
std::memory_order_acquire);
}
}
roctx_track_it =
roctx_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
auto roctx_track_desc = roctx_track_it->second.Serialize();
std::string roctx_track_str = rocmtools::string_printf("ROCTX Markers");
roctx_track_desc.set_name(roctx_track_str);
perfetto::TrackEvent::SetTrackDescriptor(roctx_track_it->second, roctx_track_desc);
}
}
auto& roctx_track = roctx_track_it->second;
size_t roctx_message_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
char* roctx_message_str = static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_size));
if (roctx_id_size > 1) {
char* roctx_id_str = static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
if (tracer_record.operation_id.id == 1) {
perfetto::StaticString roctx_message_pft(
(!roctx_message.empty() ? roctx_message.c_str() : ""));
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track,
tracer_record.timestamps.begin.value, "Timestamp(ns)",
tracer_record.timestamps.begin.value, "RocTx ID", roctx_id);
roctx_track_entries_++;
} else {
TRACE_EVENT_END("ROCTX_API", roctx_track, tracer_record.timestamps.begin.value);
roctx_track_entries_--;
}
break;
}
case ACTIVITY_DOMAIN_HSA_API: {
std::unordered_map<uint64_t, perfetto::Track>::iterator hsa_track_it;
{
std::lock_guard<std::mutex> lock(hsa_tracks_lock_);
hsa_track_it = hsa_tracks_.find(thread_id);
if (hsa_track_it == hsa_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
std::memory_order_acquire);
}
}
hsa_track_it =
hsa_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
auto hsa_track_desc = hsa_track_it->second.Serialize();
std::string hsa_track_str = rocmtools::string_printf("HSA API");
hsa_track_desc.set_name(hsa_track_str);
perfetto::TrackEvent::SetTrackDescriptor(hsa_track_it->second, hsa_track_desc);
}
}
auto& hsa_track = hsa_track_it->second;
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name));
}
TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track,
tracer_record.timestamps.begin.value,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
TRACE_EVENT_END("HSA_API", hsa_track, tracer_record.timestamps.end.value);
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
std::unordered_map<uint64_t, perfetto::Track>::iterator hip_track_it;
{
std::lock_guard<std::mutex> lock(hip_tracks_lock_);
hip_track_it = hip_tracks_.find(thread_id);
if (hip_track_it == hip_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
for (uint64_t tid : track_ids_used_) {
while (track_id == tid) {
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
std::memory_order_acquire);
}
}
hip_track_it =
hip_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
auto hip_track_desc = hip_track_it->second.Serialize();
std::string hip_track_str = rocmtools::string_printf("HIP API");
hip_track_desc.set_name(hip_track_str);
perfetto::TrackEvent::SetTrackDescriptor(hip_track_it->second, hip_track_desc);
}
}
auto& hip_track = hip_track_it->second;
size_t function_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name));
}
size_t kernel_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
char* kernel_name_str;
if (kernel_name_size > 1) {
kernel_name_str = static_cast<char*>(malloc(kernel_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) {
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
free(kernel_name_str);
}
}
if (kernel_name.size() > 0) {
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
tracer_record.timestamps.begin.value, "Kernel Name", kernel_name,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
} else {
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
tracer_record.timestamps.begin.value,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
}
TRACE_EVENT_END("HIP_API", hip_track, tracer_record.timestamps.end.value);
break;
}
case ACTIVITY_DOMAIN_EXT_API: {
printf("Warning: External API is not supported!\n");
break;
}
case ACTIVITY_DOMAIN_HIP_OPS: {
uint64_t stream_id = 0;
size_t stream_id_str_size = 0;
char* stream_id_str;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
&stream_id_str_size));
if (stream_id_str_size > 1) {
stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
&stream_id_str));
if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str);
}
std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
{
std::lock_guard<std::mutex> lock(stream_tracks_lock_);
stream_track_it = stream_tracks_.find(stream_id);
if (stream_track_it == stream_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id = ((1 + stream_id + tracer_record.agent_id.handle) * machine_id_ *
STREAM_CONSTANT * GetPid());
stream_track_it =
stream_tracks_.emplace(stream_id, perfetto::Track(track_id, gpu_track)).first;
auto stream_desc = stream_track_it->second.Serialize();
std::string stream_str =
rocmtools::string_printf("Process ID: %lu Stream %d", GetPid(), stream_id);
stream_desc.set_name(stream_str);
perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
track_ids_used_.emplace_back(1 + machine_id_ + tracer_record.agent_id.handle);
}
}
auto& stream_track = stream_track_it->second;
if (tracer_record.api_data_handle.handle && tracer_record.api_data_handle.size > 1) {
kernel_name = rocmtools::cxx_demangle(
strdup(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)));
TRACE_EVENT_BEGIN(
"HIP_OPS",
perfetto::StaticString(strdup(rocmtools::truncate_name(kernel_name).c_str())),
stream_track, tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(), "Kernel Name", kernel_name,
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
} else {
size_t activity_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name_size));
if (activity_name_size > 1) {
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name));
} else {
activity_name = const_cast<char*>(std::string("N/A").c_str());
}
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
}
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
break;
}
case ACTIVITY_DOMAIN_HSA_OPS: {
std::pair<int, uint64_t> gpu_queue_id =
std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
{
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
if (queue_track_it == queue_tracks_.end()) {
/* Create a new perfetto::Track */
uint64_t track_id =
((1 + tracer_record.queue_id.handle + tracer_record.agent_id.handle) * machine_id_ *
QUEUE_CONSTANT * GetPid());
queue_track_it =
queue_tracks_.emplace(gpu_queue_id.first, perfetto::Track(track_id, gpu_track))
.first;
auto queue_desc = queue_track_it->second.Serialize();
std::string queue_str = rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(),
gpu_queue_id.second);
queue_desc.set_name(queue_str);
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
}
track_ids_used_.emplace_back(tracer_record.queue_id.handle + machine_id_ + 1 +
tracer_record.agent_id.handle);
}
auto& queue_track = queue_track_it->second;
size_t activity_name_size = 0;
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name_size));
if (activity_name_size > 1) {
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &activity_name));
}
TRACE_EVENT_BEGIN("HSA_OPS", perfetto::StaticString(activity_name), queue_track,
tracer_record.timestamps.begin.value, "Agent ID",
tracer_record.agent_id.handle, "Queue ID", tracer_record.queue_id.handle,
"Process ID", GetPid(),
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
TRACE_EVENT_END("HSA_OPS", queue_track, tracer_record.timestamps.end.value);
break;
}
default: {
rocmtools::warning("ignored record for domain %d", tracer_record.domain);
break;
}
}
return 0;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
rocprofiler_record_profiler_t* profiler_record = const_cast<rocprofiler_record_profiler_t*>(
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin));
FlushProfilerRecord(*profiler_record, session_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id);
break;
}
default:
break;
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
bool IsValid() const { return is_valid_; }
private:
fs::path output_prefix_;
std::unique_ptr<perfetto::TracingSession> tracing_session_;
int file_descriptor_;
bool is_valid_{false};
size_t roctx_track_entries_{0};
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
std::unordered_map<uint64_t, uint64_t> stream_ids_;
// Callback Tracks
std::unordered_map<uint64_t, perfetto::Track> thread_tracks_;
std::unordered_map<uint64_t, perfetto::Track> roctx_tracks_, hsa_tracks_, hip_tracks_,
hip_ext_tracks_;
// Activity Tracks
std::unordered_map<int, perfetto::Track> device_tracks_;
std::unordered_map<int, perfetto::Track> queue_tracks_, stream_tracks_;
std::unordered_map<std::string, perfetto::CounterTrack> counter_tracks_;
std::atomic<uint64_t> track_counter_{GetPid()};
std::vector<uint64_t> track_ids_used_;
std::mutex stream_ids_lock_, thread_tracks_lock_, roctx_tracks_lock_, hsa_tracks_lock_,
hip_tracks_lock_, hip_ext_tracks_lock_, device_tracks_lock_, queue_tracks_lock_,
stream_tracks_lock_, counter_tracks_lock_;
char hostname_[1024];
uint64_t machine_id_;
std::ofstream stream_;
};
perfetto_plugin_t* perfetto_plugin = nullptr;
} // namespace
int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
uint32_t rocprofiler_minor_version) {
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
return -1;
if (perfetto_plugin != nullptr) return -1;
perfetto_plugin = new perfetto_plugin_t();
if (perfetto_plugin->IsValid()) return 0;
delete perfetto_plugin;
perfetto_plugin = nullptr;
return -1;
}
void rocprofiler_plugin_finalize() {
if (!perfetto_plugin) return;
delete perfetto_plugin;
perfetto_plugin = nullptr;
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
}
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
rocprofiler_session_id_t session_id) {
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
if (record.header.id.handle == 0) return 0;
perfetto_plugin->FlushTracerRecord(record, session_id);
return 0;
}
@@ -0,0 +1,189 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright (c) 2017, The Android Open Source Project
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
@@ -0,0 +1,35 @@
# Global OWNERS that can approve Perfetto changes.
# Please look first at OWNERS in the various subdirectories before falling back
# on this, as the former tend to be more brain-cache-hot.
# Perfetto tracing internals and API/ABI boundaries.
primiano@google.com
skyostil@google.com
# UI, Ftrace interop, traced_probes, protozero, Android internals.
hjd@google.com
# Trace Processor, metrics, infra.
lalitm@google.com
# Callstack / memory profilers, traced_probes & Linux internals.
ddiproietto@google.com
rsavitski@google.com
# Chromium-related things and tracing SDK.
eseckler@google.com
nuskos@google.com
oysteine@google.com
# Most Android-related metrics.
ilkos@google.com
# fmayer@ left the team. Please try first rsavitski@, ddiproietto@ or primiano@
# and leave fmayer@ as an emergency-only escalation on profilers.
fmayer@google.com
# chromium.org aliases for adding DEPS entries from chromium subprojects to
# third_party/perfetto.
eseckler@chromium.org
nuskos@chromium.org
skyostil@chromium.org
@@ -0,0 +1,394 @@
# Tracing SDK
The Perfetto Tracing SDK is a C++11 library that allows userspace applications
to emit trace events and add more app-specific context to a Perfetto trace.
When using the Tracing SDK there are two main aspects to consider:
1. Whether you are interested only in tracing events coming from your own app
or want to collect full-stack traces that overlay app trace events with
system trace events like scheduler traces, syscalls or any other Perfetto
data source.
2. For app-specific tracing, whether you need to trace simple types of timeline
events (e.g., slices, counters) or need to define complex data sources with a
custom strongly-typed schema (e.g., for dumping the state of a subsystem of
your app into the trace).
For Android-only instrumentation, the advice is to keep using the existing
[android.os.Trace (SDK)][atrace-sdk] / [ATrace_* (NDK)][atrace-ndk] if they
are sufficient for your use cases. Atrace-based instrumentation is fully
supported in Perfetto.
See the [Data Sources -> Android System -> Atrace Instrumentation][atrace-ds]
for details.
## Getting started
TIP: The code from these examples is also available [in the
repository](/examples/sdk/README.md).
To start using the Client API, first check out the latest SDK release:
```bash
git clone https://android.googlesource.com/platform/external/perfetto -b v23.0
```
The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
an amalgamation of the Client API designed to easy to integrate to existing
build systems. The sources are self-contained and require only a C++11 compliant
standard library.
For example, to add the SDK to a CMake project, edit your CMakeLists.txt:
```cmake
cmake_minimum_required(VERSION 3.13)
project(PerfettoExample)
find_package(Threads)
# Define a static library for Perfetto.
include_directories(perfetto/sdk)
add_library(perfetto STATIC perfetto/sdk/perfetto.cc)
# Link the library to your main executable.
add_executable(example example.cc)
target_link_libraries(example perfetto ${CMAKE_THREAD_LIBS_INIT})
```
Next, initialize Perfetto in your program:
```C++
#include <perfetto.h>
int main(int argc, char** argv) {
perfetto::TracingInitArgs args;
// The backends determine where trace events are recorded. You may select one
// or more of:
// 1) The in-process backend only records within the app itself.
args.backends |= perfetto::kInProcessBackend;
// 2) The system backend writes events into a system Perfetto daemon,
// allowing merging app and system events (e.g., ftrace) on the same
// timeline. Requires the Perfetto `traced` daemon to be running (e.g.,
// on Android Pie and newer).
args.backends |= perfetto::kSystemBackend;
perfetto::Tracing::Initialize(args);
}
```
You are now ready to instrument your app with trace events.
## Custom data sources vs Track events
The SDK offers two abstraction layers to inject tracing data, built on top of
each other, which trade off code complexity vs expressive power:
[track events](#track-events) and [custom data sources](#custom-data-sources).
### Track events
Track events are the suggested option when dealing with app-specific tracing as
they take care of a number of subtleties (e.g., thread safety, flushing, string
interning).
Track events are time bounded events (e.g., slices, counter) based on simple
`TRACE_EVENT` annotation tags in the codebase, like this:
```c++
#include <perfetto.h>
PERFETTO_DEFINE_CATEGORIES(
perfetto::Category("rendering")
.SetDescription("Events from the graphics subsystem"),
perfetto::Category("network")
.SetDescription("Network upload and download statistics"));
...
int main(int argc, char** argv) {
...
perfetto::Tracing::Initialize(args);
perfetto::TrackEvent::Register();
}
...
void LayerTreeHost::DoUpdateLayers() {
TRACE_EVENT("rendering", "LayerTreeHost::DoUpdateLayers");
...
for (PictureLayer& pl : layers) {
TRACE_EVENT("rendering", "PictureLayer::Update");
pl.Update();
}
}
```
Which are rendered in the UI as follows:
![Track event example](/docs/images/track-events.png)
Track events are the best default option and serve most tracing use cases with
very little complexity.
To include your new track events in the trace, ensure that the `track_event`
data source is included in the trace config. If you do not specify any
categories then all non-debug categories will be included by default. However,
you can also add just the categories you are interested in like so:
```protobuf
data_sources {
config {
name: "track_event"
track_event_config {
enabled_categories: "rendering"
}
}
}
```
See the [Track events page](track-events.md) for full instructions.
### Custom data sources
For most uses, track events are the most straightforward way of instrumenting
apps for tracing. However, in some rare circumstances they are not
flexible enough, e.g., when the data doesn't fit the notion of a track or is
high volume enough that it needs a strongly typed schema to minimize the size of
each event. In this case, you can implement a *custom data source* for
Perfetto.
Unlike track events, when working with custom data sources, you will also need
corresponding changes in [trace processor](/docs/analysis/trace-processor.md)
to enable importing your data format.
A custom data source is a subclass of `perfetto::DataSource`. Perfetto will
automatically create one instance of the class for each tracing session it is
active in (usually just one).
```C++
class CustomDataSource : public perfetto::DataSource<CustomDataSource> {
public:
void OnSetup(const SetupArgs&) override {
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
}
void OnStart(const StartArgs&) override {
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
}
void OnStop(const StopArgs&) override {
// Undo any initialization done in OnStart.
}
// Data sources can also have per-instance state.
int my_custom_state = 0;
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
```
The data source's static data should be defined in one source file like this:
```C++
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
```
Custom data sources need to be registered with Perfetto:
```C++
int main(int argc, char** argv) {
...
perfetto::Tracing::Initialize(args);
// Add the following:
perfetto::DataSourceDescriptor dsd;
dsd.set_name("com.example.custom_data_source");
CustomDataSource::Register(dsd);
}
```
As with all data sources, the custom data source needs to be specified in the
trace config to enable tracing:
```C++
perfetto::TraceConfig cfg;
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("com.example.custom_data_source");
```
Finally, call the `Trace()` method to record an event with your custom data
source. The lambda function passed to that method will only be called if tracing
is enabled. It is always called synchronously and possibly multiple times if
multiple concurrent tracing sessions are active.
```C++
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
auto packet = ctx.NewTracePacket();
packet->set_timestamp(perfetto::TrackEvent::GetTraceTimeNs());
packet->set_for_testing()->set_str("Hello world!");
});
```
If necessary the `Trace()` method can access the custom data source state
(`my_custom_state` in the example above). Doing so, will take a mutex to
ensure data source isn't destroyed (e.g., because of stopping tracing) while
the `Trace()` method is called on another thread. For example:
```C++
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
auto safe_handle = trace_args.GetDataSourceLocked(); // Holds a RAII lock.
DoSomethingWith(safe_handle->my_custom_state);
});
```
## In-process vs System mode
The two modes are not mutually exclusive. An app can be configured to work
in both modes and respond both to in-process tracing requests and system
tracing requests. Both modes generate the same trace file format.
### In-process mode
In this mode both the perfetto service and the app-defined data sources are
hosted fully in-process, in the same process of the profiled app. No connection
to the system `traced` daemon will be attempted.
In-process mode can be enabled by setting
`TracingInitArgs.backends = perfetto::kInProcessBackend` when initializing the
SDK, see examples below.
This mode is used to generate traces that contain only events emitted by
the app, but not other types of events (e.g. scheduler traces).
The main advantage is that by running fully in-process, it doesn't require any
special OS privileges and the profiled process can control the lifecycle of
tracing sessions.
This mode is supported on Android, Linux, MacOS and Windows.
### System mode
In this mode the app-defined data sources will connect to the external `traced`
service using the [IPC over UNIX socket][ipc].
System mode can be enabled by setting
`TracingInitArgs.backends = perfetto::kSystemBackend` when initializing the SDK,
see examples below.
The main advantage of this mode is that it is possible to create fused traces where
app events are overlaid on the same timeline of OS events. This enables
full-stack performance investigations, looking all the way through syscalls and
kernel scheduling events.
The main limitation of this mode is that it requires the external `traced` daemon
to be up and running and reachable through the UNIX socket connection.
This is suggested for local debugging or lab testing scenarios where the user
(or the test harness) can control the OS deployment (e.g., sideload binaries on
Android).
When using system mode, the tracing session must be controlled from the outside,
using the `perfetto` command-line client
(See [reference](/docs/reference/perfetto-cli)). This is because when collecting
system traces, tracing data producers are not allowed to read back the trace
data as it might disclose information about other processes and allow
side-channel attacks.
* On Android 9 (Pie) and beyond, traced is shipped as part of the platform.
* On older versions of Android, traced can be built from sources using the
the [standalone NDK-based workflow](/docs/contributing/build-instructions.md)
and sideloaded via adb shell.
* On Linux and MacOS `traced` must be built and run separately. See the
[Linux quickstart](/docs/quickstart/linux-tracing.md) for instructions.
_System mode is not yet supported on Windows, due to the lack of an IPC
implementation_.
## {#recording} Recording traces through the API
_Tracing through the API is currently only supported with the in-process mode.
When using system mode, use the `perfetto` cmdline client (see quickstart
guides)._
First initialize a [TraceConfig](/docs/reference/trace-config-proto.autogen)
message which specifies what type of data to record.
If your app includes [track events](track-events.md) (i.e, `TRACE_EVENT`), you
typically want to choose the categories which are enabled for tracing.
By default, all non-debug categories are enabled, but you can enable a specific
one like this:
```C++
perfetto::protos::gen::TrackEventConfig track_event_cfg;
track_event_cfg.add_disabled_categories("*");
track_event_cfg.add_enabled_categories("rendering");
```
Next, build the main trace config together with the track event part:
```C++
perfetto::TraceConfig cfg;
cfg.add_buffers()->set_size_kb(1024); // Record up to 1 MiB.
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("track_event");
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
```
If your app includes a custom data source, you can also enable it here:
```C++
ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("my_data_source");
```
After building the trace config, you can begin tracing:
```C++
std::unique_ptr<perfetto::TracingSession> tracing_session(
perfetto::Tracing::NewTrace());
tracing_session->Setup(cfg);
tracing_session->StartBlocking();
```
TIP: API methods with `Blocking` in their name will suspend the calling thread
until the respective operation is complete. There are also asynchronous
variants that don't have this limitation.
Now that tracing is active, instruct your app to perform the operation you
want to record. After that, stop tracing and collect the
protobuf-formatted trace data:
```C++
tracing_session->StopBlocking();
std::vector<char> trace_data(tracing_session->ReadTraceBlocking());
// Write the trace into a file.
std::ofstream output;
output.open("example.perfetto-trace", std::ios::out | std::ios::binary);
output.write(&trace_data[0], trace_data.size());
output.close();
```
To save memory with longer traces, you can also tell Perfetto to write
directly into a file by passing a file descriptor into Setup(), remembering
to close the file after tracing is done:
```C++
int fd = open("example.perfetto-trace", O_RDWR | O_CREAT | O_TRUNC, 0600);
tracing_session->Setup(cfg, fd);
tracing_session->StartBlocking();
// ...
tracing_session->StopBlocking();
close(fd);
```
The resulting trace file can be directly opened in the [Perfetto
UI](https://ui.perfetto.dev) or the [Trace Processor](/docs/analysis/trace-processor.md).
[ipc]: /docs/design-docs/api-and-abi.md#socket-protocol
[atrace-ds]: /docs/data-sources/atrace.md
[atrace-ndk]: https://developer.android.com/ndk/reference/group/tracing
[atrace-sdk]: https://developer.android.com/reference/android/os/Trace
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,63 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include <cxxabi.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <systemd/sd-id128.h>
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <string>
#include "src/utils/helper.h"
// Macro to check ROCMTools calls status
#define CHECK_ROCMTOOLS(call) \
do { \
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
} while (false)
namespace {
[[maybe_unused]] uint32_t GetPid() {
static uint32_t pid = syscall(__NR_getpid);
return pid;
}
[[maybe_unused]] uint64_t GetMachineID() {
char hostname[1023] = "\0";
gethostname(hostname, 1023);
sd_id128_t ret;
char machine_id[SD_ID128_STRING_MAX];
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
assert(status == 0 && "Error: Couldn't get machine id!");
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
return std::rand();
}
} // namespace
+247
مشاهده پرونده
@@ -0,0 +1,247 @@
#!/bin/bash
ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]}));
ROCM_DIR=$(dirname -- ${ROCPROFV2_DIR})
RUN_FROM_BUILD=0
if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
RUN_FROM_BUILD=1
elif [[ $ROCPROFV2_DIR == *"/rocprofiler"* ]]; then
RUN_FROM_BUILD=1
ROCM_DIR=$ROCPROFV2_DIR
fi
usage() {
echo -e "ROCProfiler Run Script Usage:"
echo -e "-h | --help For showing this message"
echo -e "--list-counters For showing all available counters for the current GPUs"
if [ $RUN_FROM_BUILD == 1 ]; then
echo -e "-b | --build For compiling"
echo -e "-cb | --clean-build For full clean build"
echo -e "-t | --test For Running the tests"
echo -e "-ct | --clean-build-test For Running the tests after a clean build"
echo -e "-mt | --mem-test For Running the Memory Leak tests. This run requires building using -acb | --asan-clean-build option"
echo -e "-acb | --asan-clean-build For compiling with ASAN library attached"
echo -e "--install For installing rocprofiler without clean build in the default installation folder (review build.sh to know more about the default paths)"
echo -e "--clean-install For installing rocprofiler with new clean build in the default installation folder (review build.sh to know more about the default paths)"
fi
echo -e "--hip-api For Collecting HIP API Traces"
echo -e "--hip-activity For Collecting HSA API Activities Traces"
echo -e "--hsa-api For Collecting HIP API Traces"
echo -e "--hsa-activity For Collecting HSA API Activities Traces"
echo -e "--roctx-trace For Collecting ROCTx Traces"
echo -e "--kernel-trace For Collecting Kernel dispatch Traces"
echo -e "--sys-trace For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces"
echo -e "--plugin PLUGIN_NAME For enabling a plugin (file/perfetto)"
echo -e "-i | --input For adding counters file path (every line in the text file represents a counter)"
echo -e "-o | --output-file For the output file name"
echo -e "-d | --output-directory For adding output path where the output files will be saved"
echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed"
exit 1
}
if [ -z "$1" ] ; then
usage
exit 1
fi
while [ 1 ] ; do
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
usage
exit 1
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=no ./build.sh
exit 1
fi
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
ASAN=yes TO_CLEAN=yes ./build.sh
exit 1
fi
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=yes ./build.sh
exit 1
fi
elif [[ "$1" = "-t" || "$1" = "--test" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
TO_CLEAN=no $ROCM_DIR/build.sh
pushd build
./run_tests.sh
exit 1
fi
elif [[ "$1" = "-mt" || "$1" = "--mem-test" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
ASAN=yes TO_CLEAN=yes ./build.sh
./tests/memorytests/run_asan_tests.sh $ROCM_DIR/build/tests/featuretests/profiler/gtests/apps/hip_vectoradd $ROCM_DIR/build/memleaks.log
exit 1
fi
elif [[ "$1" = "-ct" || "$1" = "--clean-build-test" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=yes $ROCM_DIR/build.sh
pushd build
./run_tests.sh
exit 1
fi
elif [[ "$1" = "--install" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=no $ROCM_DIR/build.sh
pushd build
make install
exit 1
fi
elif [[ "$1" = "--clean-install" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
TO_CLEAN=yes $ROCM_DIR/build.sh
pushd build
make install
exit 1
fi
elif [[ "$1" = "--list-counters" ]] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
eval $ROCM_DIR/build/src/tools/ctrl
else
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/counters/derived_counters.xml
export LD_LIBRARY_PATH=$ROCPROFV2_DIR/../lib:$LD_LIBRARY_PATH
export LD_PRELOAD=$ROCPROFV2_DIR/../lib/librocprofiler_tool.so
eval $ROCPROFV2_DIR/../libexec/rocprofiler/ctrl
fi
exit 1
elif [[ "$1" = "-i" || "$1" = "--input" ]] ; then
if [ $2 ] && [ -n $2 ] && [ -r $2 ] ; then
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
else
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/counters/derived_counters.xml
fi
export COUNTERS_PATH=$2
else
echo -e "Error: \"$2\" doesn't exist!"
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-o" || "$1" = "--output-file-name" ]] ; then
if [ $2 ] ; then
export OUT_FILE_NAME=$2
else
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-d" || "$1" = "--output-directory" ]] ; then
if [ $2 ] ; then
mkdir -p $2
export OUTPUT_PATH=$2
OUTPUT_PATH_INTERNAL=$2
else
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-fi" || "$1" = "--flush-interval" ]] ; then
if [ $2 ] && [ $2 -gt 0 ] ; then
export ROCPROFILER_FLUSH_INTERVAL=$2
else
echo -e "Wrong input \"$2\" for flush interval, it needs to be integer greater than zero!"
usage
exit 1
fi
shift
shift
elif [ "$1" = "--hip-api" ] ; then
export ROCPROFILER_HIP_API_TRACE=1
shift
elif [ "$1" = "--hip-activity" ] ; then
export ROCPROFILER_HIP_API_TRACE=1
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
shift
elif [ "$1" = "--hsa-api" ] ; then
export ROCPROFILER_HSA_API_TRACE=1
shift
elif [ "$1" = "--hsa-activity" ] ; then
export ROCPROFILER_HSA_API_TRACE=1
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
shift
elif [ "$1" = "--roctx-trace" ] ; then
export ROCPROFILER_ROCTX_TRACE=1
shift
elif [ "$1" = "--kernel-trace" ] ; then
export ROCPROFILER_KERNEL_TRACE=1
shift
elif [ "$1" = "--sys-trace" ] ; then
export ROCPROFILER_HIP_API_TRACE=1
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
export ROCPROFILER_HSA_API_TRACE=1
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
export ROCPROFILER_ROCTX_TRACE=1
export ROCPROFILER_KERNEL_TRACE=1
shift
elif [ "$1" = "--amd-sys" ] ; then
export ROCPROFILER_ENABLE_AMDSYS=$2
shift
shift
elif [ "$1" = "--plugin" ] ; then
if [ -n $2 ] ; then
PLUGIN=$2
if [ $RUN_FROM_BUILD == 1 ]; then
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so
else
export ROCPROFILER_PLUGIN_LIB=rocprofiler/lib${PLUGIN}_plugin.so
fi
else
echo -e "Wrong input \"$2\" for plugin!"
usage
exit 1
fi
shift
shift
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
echo -e "Wrong option \"$1\", Please use the following options:\n"
usage
exit 1
else
break
fi
done
PMC_LINES=()
if [ -n "$COUNTERS_PATH" ]; then
input=$COUNTERS_PATH
while IFS= read -r line || [[ -n "$line" ]]; do
PMC_LINES+=( "$line" )
done < $input
fi
if [ -n "$PMC_LINES" ]; then
COUNTER=1
for i in ${!PMC_LINES[@]}; do
export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}"
if [ -n "$OUTPUT_PATH" ]; then
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
echo -e "\nThe output path for the following counters: $FINAL_PATH"
mkdir -p $FINAL_PATH
echo $ROCPROFILER_COUNTERS > $FINAL_PATH/pmc.txt
export OUTPUT_PATH=$FINAL_PATH
let COUNTER=COUNTER+1
fi
if [ $RUN_FROM_BUILD == 1 ]; then
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
else
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
fi
done
else
if [ $RUN_FROM_BUILD == 1 ]; then
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
else
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
fi
fi
@@ -0,0 +1,10 @@
---
If:
PathMatch: common/common.h
CompileFlags:
Add: ['-x', 'hip']
# Local Variables:
# mode: yaml
# End:
@@ -0,0 +1,142 @@
include (CheckCSourceCompiles)
# ############################################################################################################################################
# ############################################################################################################################################
# General Requirements
# ############################################################################################################################################
# ############################################################################################################################################
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HSA_H hsa.h
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hsa
NO_DEFAULT_PATH
REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
include_directories(${HSA_RUNTIME_INC_PATH})
# Set the HIP language runtime link flags as FindHIP does not set them.
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG})
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP})
set(CMAKE_EXECUTABLE_RPATH_LINK_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CXX_FLAG})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ROCM_PATH}/lib/cmake/hip")
set(CMAKE_HIP_ARCHITECTURES OFF)
find_package(HIP REQUIRED MODULE)
find_package(Clang REQUIRED CONFIG
PATHS "${ROCM_PATH}"
PATH_SUFFIXES "llvm/lib/cmake/clang")
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules" "${ROCM_PATH}/lib/cmake/hip")
find_package(LibElf REQUIRED)
find_package(LibDw REQUIRED)
## Add a custom targets to build and run all the tests
add_custom_target(samples)
add_dependencies(samples ${ROCPROFILER_TARGET})
add_custom_target(run-samples COMMAND ${PROJECT_BINARY_DIR}/samples/run_samples.sh DEPENDS samples)
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
# ############################################################################################################################################
# ############################################################################################################################################
# ############################################################################################################################################
# Samples Build & Run Script
# ############################################################################################################################################
# ############################################################################################################################################
# ############################################################################################################################################
# Profiler Samples
# ############################################################################################################################################
## Build Application Replay Sample
set_source_files_properties(profiler/application_replay_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(profiler_application_replay profiler/application_replay_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(profiler_application_replay PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(profiler_application_replay PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
add_dependencies(samples profiler_application_replay)
install(TARGETS profiler_application_replay RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
## Build Kernel Replay Sample
set_source_files_properties(profiler/kernel_replay_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(profiler_kernel_replay profiler/kernel_replay_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(profiler_kernel_replay PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(profiler_kernel_replay PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
add_dependencies(samples profiler_kernel_replay)
install(TARGETS profiler_kernel_replay RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
## Build User Replay Sample
set_source_files_properties(profiler/user_replay_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(profiler_user_replay profiler/user_replay_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(profiler_user_replay PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(profiler_user_replay PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
add_dependencies(samples profiler_user_replay)
install(TARGETS profiler_user_replay RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
## Build Device Profiling Sample
set_source_files_properties(profiler/device_profiling_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(profiler_device_profiling profiler/device_profiling_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(profiler_device_profiling PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(profiler_device_profiling PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
add_dependencies(samples profiler_device_profiling)
install(TARGETS profiler_device_profiling RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
# ############################################################################################################################################
# Tracer Samples
# ############################################################################################################################################
## Build HIP/HSA Trace Sample
set_source_files_properties(tracer/sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(tracer_hip_hsa tracer/sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
target_include_directories(tracer_hip_hsa PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
target_link_libraries(tracer_hip_hsa PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
add_dependencies(samples tracer_hip_hsa)
install(TARGETS tracer_hip_hsa RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
# ############################################################################################################################################
# PC Sampling Samples
# ############################################################################################################################################
set(CODE_PRINTING_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/pcsampler/code_printing_sample)
file(GLOB PC_SAMPLING_CODE_PRINTING_FILES ${CODE_PRINTING_SAMPLE_DIR}/*.cpp)
set_source_files_properties(${PC_SAMPLING_CODE_PRINTING_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(pc_sampling_code_printing ${PC_SAMPLING_CODE_PRINTING_FILES}
HIPCC_OPTIONS
-std=c++17
# Include debugging symbols and source for the contextual disassembly
-gdwarf-4)
check_c_source_compiles("
#define _GNU_SOURCE
#include <sys/mman.h>
int main() { return memfd_create (\"cmake_test\", 0); }
" HAVE_MEMFD_CREATE)
if (HAVE_MEMFD_CREATE)
target_compile_definitions(pc_sampling_code_printing PRIVATE HAVE_MEMFD_CREATE)
endif()
target_link_libraries(pc_sampling_code_printing
PRIVATE
${ROCPROFILER_TARGET}
rocm-dbgapi
${LIBELF_LIBRARIES}
${LIBDW_LIBRARIES}
hsa-runtime64::hsa-runtime64 Threads::Threads dl)
target_include_directories(pc_sampling_code_printing
PRIVATE
# INTERFACE_INCLUDE_DIRECTORIES
${TEST_DIR}
${ROOT_DIR}
${HSA_RUNTIME_INC_PATH}
${PROJECT_SOURCE_DIR})
add_dependencies(samples pc_sampling_code_printing)
install(TARGETS pc_sampling_code_printing RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
# ############################################################################################################################################
# Scripts to run samples
# ############################################################################################################################################
# Copy run_samples script to samples folder
configure_file(run_samples.sh ${PROJECT_BINARY_DIR}/samples COPYONLY)
# ############################################################################################################################################
@@ -0,0 +1,350 @@
#include <hip/hip_runtime.h>
#include <rocprofiler.h>
#include <cxxabi.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <systemd/sd-id128.h>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <vector>
#include <cassert>
#include <cstddef>
#include <fstream>
#include <iostream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <mutex>
#include "src/utils/helper.h"
// Custom assert to print error messages
#define ASSERTM(exp, msg) assert(((void)msg, exp))
// Macro to check HIP calls status
#define HIP_CALL(call) \
do { \
hipError_t err = call; \
if (err != hipSuccess) { \
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
abort(); \
} \
} while (0)
// Macro to check ROCPROFILER calls status
#define CHECK_ROCPROFILER(call) \
do { \
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
} while (false)
// Device (Kernel) functions, it must be void
__global__ void kernelA() { printf("\nKernel A\n"); }
__global__ void kernelB() { printf("\nKernel B\n"); }
__global__ void kernelC() { printf("\nKernel C\n"); }
__global__ void kernelD() { printf("\nKernel D\n"); }
__global__ void kernelE() { printf("\nKernel E\n"); }
__global__ void kernelF() { printf("\nKernel F\n"); }
[[maybe_unused]] uint32_t GetPid() {
static uint32_t pid = syscall(__NR_getpid);
return pid;
}
[[maybe_unused]] uint64_t GetMachineID() {
char hostname[1023] = "\0";
gethostname(hostname, 1023);
sd_id128_t ret;
char machine_id[SD_ID128_STRING_MAX];
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
assert(status == 0 && "Error: Couldn't get machine id!");
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
return std::rand();
}
std::ofstream output_file;
void prepare() {
output_file.copyfmt(std::cout);
output_file.clear(std::cout.rdstate());
output_file.basic_ios<char>::rdbuf(std::cout.rdbuf());
}
std::mutex writing_lock;
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return "ROCTX_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_API:
return "HIP_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HIP_OPS:
return "HIP_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_API:
return "HSA_API_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_OPS:
return "HSA_OPS_DOMAIN";
break;
case ACTIVITY_DOMAIN_HSA_EVT:
return "HSA_EVT_DOMAIN";
break;
default:
return "";
}
}
// Flush function needs to be provided by the user to be used in three cases by
// the user buffer:
// 1- Application is finished
// 2- Buffer is full
// 3- Flush Interval specified by the user
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
std::lock_guard<std::mutex> lock(writing_lock);
std::string kernel_name;
std::string function_name;
std::string roctx_message;
uint64_t roctx_id;
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
if (tracer_record.api_data_handle.handle &&
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
kernel_name = rocmtools::cxx_demangle(
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
size_t function_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = (char*)malloc(function_name_size);
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
}
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
size_t function_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_size));
if (function_name_size > 1) {
char* function_name_c = (char*)malloc(function_name_size);
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &function_name_c));
if (function_name_c) function_name = std::string(function_name_c);
}
size_t kernel_name_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_size));
if (kernel_name_size > 1) {
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
tracer_record.operation_id, &kernel_name_str));
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
}
}
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
size_t roctx_message_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_size));
if (roctx_message_size > 1) {
[[maybe_unused]] char* roctx_message_str =
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
tracer_record.operation_id, &roctx_message_str));
if (roctx_message_str)
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
}
size_t roctx_id_size = 0;
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
&roctx_id_size));
if (roctx_id_size > 1) {
[[maybe_unused]] char* roctx_id_str =
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
&roctx_id_str));
if (roctx_id_str) {
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
free(roctx_id_str);
}
}
}
output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
<< GetDomainName(tracer_record.domain) << "), Begin("
<< tracer_record.timestamps.begin.value << "), End("
<< tracer_record.timestamps.end.value << "), Correlation ID( "
<< tracer_record.correlation_id.value << ")";
if (roctx_id >= 0) output_file << ", ROCTX ID(" << roctx_id << ")";
if (roctx_message.size() > 1) output_file << ", ROCTX Message(" << roctx_message << ")";
if (function_name.size() > 1) output_file << ", Function(" << function_name << ")";
if (kernel_name.size() > 1) output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
output_file << std::endl;
}
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
std::lock_guard<std::mutex> lock(writing_lock);
size_t name_length = 0;
bool is_counter = true;
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
profiler_record->kernel_id, &name_length));
// Taken from rocprofiler: The size hasn't changed in recent past
static const uint32_t lds_block_size = 128 * 4;
const char* kernel_name_c;
if (name_length > 1) {
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
&kernel_name_c));
}
output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
<< "], " << std::string("gpu_id(") << std::to_string(profiler_record->gpu_id.handle)
<< "), " << std::string("queue_id(")
<< std::to_string(profiler_record->queue_id.handle) << "), "
<< std::string("queue_index(") << std::to_string(profiler_record->queue_idx.value)
<< "), " << std::string("pid(") << std::to_string(GetPid()) << "), "
<< std::string("tid(") << std::to_string(profiler_record->thread_id.value) << ")";
output_file << ", " << std::string("grd(")
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
<< std::string("wgr(")
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
<< std::string("lds(")
<< std::to_string(
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
~(lds_block_size - 1)))
<< "), " << std::string("scr(")
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
<< std::string("arch_vgpr(")
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
<< std::string("accum_vgpr(")
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
<< std::string("sgpr(")
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
<< std::string("wave_size(")
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
<< std::string("sig(")
<< std::to_string(profiler_record->kernel_properties.signal_handle);
std::string kernel_name = rocmtools::cxx_demangle(kernel_name_c);
output_file << "), " << std::string("obj(") << std::to_string(profiler_record->kernel_id.handle)
<< "), " << std::string("kernel-name(\"") << kernel_name << "\")"
<< std::string(", time(") << std::to_string(profiler_record->timestamps.begin.value)
<< ") ";
// For Counters
output_file << std::endl;
if (profiler_record->counters) {
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
if (profiler_record->counters[i].counter_handler.handle > 0) {
size_t counter_name_length = 0;
CHECK_ROCPROFILER(rocprofiler_query_counter_info_size(
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
&counter_name_length));
if (counter_name_length > 1) {
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
CHECK_ROCPROFILER(rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
profiler_record->counters[i].counter_handler,
&name_c));
output_file << ", " << name_c << " ("
<< std::to_string(profiler_record->counters[i].value.value) << ")"
<< std::endl;
}
}
}
}
}
void FlushPCSamplingRecord(
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
const auto &sample = pc_sampling_record->pc_sample;
output_file << "dispatch[" << sample.dispatch_id.value << "], "
<< "timestamp(" << sample.timestamp.value << "), "
<< "gpu_id(" << sample.gpu_id.handle << "), "
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
<< "se(" << sample.se << ')'
<< std::endl;
}
int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
while (begin < end) {
if (!begin) return 0;
switch (begin->kind) {
case ROCPROFILER_PROFILER_RECORD: {
const rocprofiler_record_profiler_t* profiler_record =
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
FlushProfilerRecord(profiler_record, session_id, buffer_id);
break;
}
case ROCPROFILER_TRACER_RECORD: {
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
FlushTracerRecord(*tracer_record, session_id, buffer_id);
break;
}
case ROCPROFILER_PC_SAMPLING_RECORD: {
const rocprofiler_record_pc_sample_t *pc_sampling_record =
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
FlushPCSamplingRecord(pc_sampling_record);
break;
}
default: {
break;
}
}
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
}
return 0;
}
void kernelCalls(char c) {
switch (c) {
case 'A': {
hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0);
break;
}
case 'B': {
hipLaunchKernelGGL(kernelB, dim3(1), dim3(1), 0, 0);
break;
}
case 'C': {
hipLaunchKernelGGL(kernelC, dim3(1), dim3(1), 0, 0);
break;
}
case 'D': {
hipLaunchKernelGGL(kernelD, dim3(1), dim3(1), 0, 0);
break;
}
case 'E': {
hipLaunchKernelGGL(kernelE, dim3(1), dim3(1), 0, 0);
break;
}
case 'F': {
hipLaunchKernelGGL(kernelF, dim3(1), dim3(1), 0, 0);
break;
}
default: {
fprintf(stderr, "Error: Wrong Kernel character (%c) Given for kernelCalls!\n", c);
}
}
}
@@ -0,0 +1 @@
pmc: SQ_WAVES GRBM_COUNT GRBM_GUI_ACTIVE SQ_INSTS_VALU FETCH_SIZE
@@ -0,0 +1,10 @@
---
If:
PathMatch: main.cpp
CompileFlags:
Add: ['-x', 'hip']
# Local Variables:
# mode: yaml
# End:
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,126 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_
#include <map>
#include <optional>
#include <string>
#include <vector>
#include <amd-dbgapi/amd-dbgapi.h>
namespace amd::debug_agent {
class code_object_t {
struct symbol_info_t {
const std::string m_name;
amd_dbgapi_global_address_t m_value;
amd_dbgapi_size_t m_size;
};
using symbol_map_t =
std::optional
< std::map
< amd_dbgapi_global_address_t
, std::pair<std::string, amd_dbgapi_size_t>
>
>;
public:
void load_symbol_map();
void load_debug_info();
std::optional<symbol_info_t>
find_symbol(amd_dbgapi_global_address_t address);
code_object_t(amd_dbgapi_code_object_id_t code_object_id);
code_object_t(code_object_t &&rhs);
~code_object_t();
void open();
bool is_open() const { return m_fd.has_value(); }
amd_dbgapi_global_address_t load_address() const { return m_load_address; }
amd_dbgapi_size_t mem_size() const { return m_mem_size; }
// FIXME(?): extra function not in rocr-debug-agent
uint32_t elf_amdgpu_machine() const { return m_elf_amdgpu_machine; }
void disassemble_around(amd_dbgapi_architecture_id_t architecture_id,
amd_dbgapi_global_address_t pc);
void disassemble_kernel(amd_dbgapi_architecture_id_t architecture_id,
amd_dbgapi_global_address_t start_addr,
bool const print_src = false);
bool save(const std::string &directory) const;
amd_dbgapi_global_address_t m_load_address{ 0 };
amd_dbgapi_size_t m_mem_size{ 0 };
std::optional<int> m_fd;
std::optional
< std::map<amd_dbgapi_global_address_t, std::pair<std::string, size_t>>
>
m_line_number_map;
std::optional
< std::map<amd_dbgapi_global_address_t, amd_dbgapi_global_address_t>
>
m_pc_ranges_map;
symbol_map_t m_symbol_map;
std::string m_uri;
amd_dbgapi_code_object_id_t const m_code_object_id;
// FIXME(?): extra field not in rocr-debug-agent
uint32_t m_elf_amdgpu_machine{ 0 };
};
} // namespace amd::debug_agent
enum struct disassembly_mode {
AROUND,
KERNEL
};
std::tuple
< amd_dbgapi_process_id_t
, std::map<amd_dbgapi_global_address_t, amd::debug_agent::code_object_t>
>
init_disassembly();
void
disassemble(
disassembly_mode const mode,
amd_dbgapi_process_id_t const process_id,
std::map<amd_dbgapi_global_address_t, amd::debug_agent::code_object_t>
&code_object_map,
uint64_t const addr);
void
print_pc_context(
amd_dbgapi_process_id_t const process_id,
std::map<amd_dbgapi_global_address_t, amd::debug_agent::code_object_t>
&code_object_map,
amd_dbgapi_global_address_t const pc);
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_
@@ -0,0 +1,215 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <algorithm>
#include <atomic>
#include <functional>
#include <map>
#include <mutex>
#include <optional>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include <cassert>
#include <cinttypes>
#include <cstdint>
#include <cstdio>
#include <sys/mman.h>
#include <hsa/hsa.h>
#include <amd-dbgapi/amd-dbgapi.h>
#include <hsa/amd_hsa_kernel_code.h>
#include <hsa/hsa_ven_amd_loader.h>
#include "inc/rocprofiler.h"
#include "code_printing.hpp"
#include "program.hpp"
struct libc_freer {
void operator()(char *p) { free(p); }
};
namespace util {
template <typename T, typename... Ts>
static void
hash_combine(size_t &hsh, T const& v, Ts const&... rest)
{
hsh ^= std::hash<T>{}(v) + 0x9e3779b9 + (hsh << 6) + (hsh >> 2);
(hash_combine(hsh, rest), ...);
}
} // namespace util
[[maybe_unused]]
static inline bool
operator==(hsa_executable_t const &l, hsa_executable_t const &r)
{
return l.handle == r.handle;
}
[[maybe_unused]]
static inline bool
operator==(
rocprofiler_kernel_dispatch_id_t const &l,
rocprofiler_kernel_dispatch_id_t const &r)
{
return l.value == r.value;
}
static inline bool
operator==(amd_dbgapi_process_id_t const &l, amd_dbgapi_process_id_t const &r)
{
return l.handle == r.handle;
}
static inline bool
operator!=(amd_dbgapi_process_id_t const &l, amd_dbgapi_process_id_t const &r)
{
return !(l == r);
}
namespace std {
template <>
struct hash<hsa_executable_t> {
size_t operator()(hsa_executable_t const &v) const {
size_t ret = 0;
util::hash_combine(ret, v.handle);
return ret;
}
};
template <>
struct hash<rocprofiler_kernel_dispatch_id_t> {
size_t operator()(rocprofiler_kernel_dispatch_id_t const &v) const {
size_t ret = 0;
util::hash_combine(ret, v.value);
return ret;
}
};
} // namespace std
struct disassembly_ctx_t {
disassembly_ctx_t();
~disassembly_ctx_t();
void disassemble_kernels(bool const reinitialize);
void init();
bool inited() const;
void reset();
amd_dbgapi_process_id_t process_id;
std::map
< amd_dbgapi_global_address_t
, amd::debug_agent::code_object_t
> codeobjs;
};
disassembly_ctx_t::disassembly_ctx_t()
: process_id(AMD_DBGAPI_PROCESS_NONE)
, codeobjs()
{}
disassembly_ctx_t::~disassembly_ctx_t()
{
reset();
}
void
disassembly_ctx_t::disassemble_kernels(bool const reinitialize)
{
if (reinitialize) {
reset();
}
if (!inited()) {
init();
}
auto it = codeobjs.begin();
auto const end = codeobjs.end();
auto const pred = [](decltype(*it) &x){
/*
* A lame filter for the kernels in the current file, because nothing
* else in this little demo will have the URL prefix of `file://`.
*/
return x.second.m_uri.find("file://", 0, 7) != std::string::npos;
};
while (end != (it = std::find_if(it, end, pred))) {
auto &codeobj = it->second;
codeobj.load_symbol_map();
if (!codeobj.m_symbol_map) {
fputs(PROGNAME ": error: failed to load symbol map\n", stderr);
break;
}
for (auto const &sym : *codeobj.m_symbol_map) {
auto const &addr = sym.first;
::disassemble(disassembly_mode::KERNEL, process_id, codeobjs, addr);
}
++it;
}
}
inline void
disassembly_ctx_t::init()
{
std::tie(process_id, codeobjs) = init_disassembly();
}
inline bool
disassembly_ctx_t::inited() const
{
return AMD_DBGAPI_PROCESS_NONE != process_id;
}
void
disassembly_ctx_t::reset()
{
codeobjs.clear();
if (AMD_DBGAPI_PROCESS_NONE.handle != process_id.handle) {
amd_dbgapi_process_detach(process_id);
amd_dbgapi_finalize();
process_id = AMD_DBGAPI_PROCESS_NONE;
}
}
static disassembly_ctx_t g_dis;
void
disassembly_disassemble_kernels(bool const reinitialize)
{
g_dis.disassemble_kernels(reinitialize);
}
void
disassembly_print_pc_sample_context(amd_dbgapi_global_address_t const pc)
{
if (!g_dis.inited()) {
g_dis.init();
}
print_pc_context(g_dis.process_id, g_dis.codeobjs, pc);
}
@@ -0,0 +1,32 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_
#include <amd-dbgapi/amd-dbgapi.h>
void
disassembly_disassemble_kernels(bool const);
void
disassembly_print_pc_sample_context(amd_dbgapi_global_address_t const);
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_
@@ -0,0 +1,447 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <algorithm>
#include <chrono>
#include <memory>
#include <numeric>
#include <vector>
#include <cfloat>
#include <cinttypes>
#include <cstdint>
#include <cstdlib>
#include <unistd.h>
#include <hip/hip_runtime.h>
#include <hsa/hsa.h>
#include <rocprofiler.h>
#include "program.hpp"
#include "program_options.hpp"
#include "disassembly.hpp"
#define XSTR(x) STR(x)
#define STR(x) #x
#define DBL_FMT "." XSTR(DBL_DECIMAL_DIG) "f"
namespace util {
struct hipMalloc_freer {
void operator()(void * const ptr) { (void)hipFree(ptr); }
};
} // namespace util
namespace prng {
static uint64_t
splitmix64_next(uint64_t * const sm64_state)
{
uint64_t z = (*sm64_state += 0x9e3779b97f4a7c15);
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
return z ^ (z >> 31);
}
static inline uint64_t
rotl64(const uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
static uint64_t
xrs_next(uint64_t * const xrs_state)
{
const uint64_t result =
rotl64(xrs_state[0] + xrs_state[3], 23) + xrs_state[0];
const uint64_t t = xrs_state[1] << 17;
xrs_state[2] ^= xrs_state[0];
xrs_state[3] ^= xrs_state[1];
xrs_state[1] ^= xrs_state[2];
xrs_state[0] ^= xrs_state[3];
xrs_state[2] ^= t;
xrs_state[3] = rotl64(xrs_state[3], 45);
return result;
}
} // namespace prng
namespace kernel {
template <typename T>
__global__ static void
memset_gpu(T * const s, T const c, size_t const n)
{
size_t i_start = threadIdx.x + blockIdx.x * blockDim.x;
size_t i_shift = blockDim.x * gridDim.x;
for (size_t i = i_start; i < n; i += i_shift) {
s[i] = c;
}
}
template <typename T>
__global__ static void
count_gpu(
T const * const xs,
T * const out,
size_t const n,
size_t const nblocks,
T const gt)
{
size_t i_start = threadIdx.x + blockIdx.x * blockDim.x;
size_t i_shift = blockDim.x * gridDim.x;
for (size_t i = i_start; i < n; i += i_shift) {
if (xs[i] > gt) {
atomicAdd(&out[i % nblocks], 1);
}
}
}
} // namespace kernel
static char const GETOPT_ARGS[] = "cd:mn:DP";
static void
usage()
{
fputs("usage: " PROGNAME " [OPTION]... MIN [SEED]\n"
" -d DEV\tHIP device number\n"
" -n LEN\tLength of random integer array\n"
" -D\t\tPrint kernel disassembly\n"
" -P\t\tPrint source and disassembly of sampled PC locations\n"
"where\n"
" DEV : i32\n"
" MIN : u64\n"
" LEN : u64\n"
" SEED : u64\n",
stderr);
}
static int
get_options(int argc, char **argv, program_options * const opts)
{
int opt;
while (-1 != (opt = getopt(argc, argv, GETOPT_ARGS))) {
switch (opt) {
case 'd':
// TODO error checking
opts->device = strtol(optarg, nullptr, 10);
break;
case 'n':
// TODO error checking
opts->rands_len = strtoul(optarg, nullptr, 10);
break;
case 'D':
opts->disassemble = true;
break;
case 'P':
opts->pc_sampling = true;
break;
default:
usage();
return EXIT_FAILURE;
}
}
auto const optcount = argc - optind;
if (!(1 == optcount || 2 == optcount)) {
usage();
return EXIT_FAILURE;
}
// TODO error checking
opts->gt = strtoul(argv[optind], nullptr, 10);
if (2 == argc - optind) {
opts->seed = strtoull(argv[optind + 1], nullptr, 10);
}
return EXIT_SUCCESS;
}
static program_options g_opts;
static void
callback_flush_fn(
rocprofiler_record_header_t const *record,
rocprofiler_record_header_t const *end_record,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id)
{
while (record < end_record) {
if (nullptr == record) {
break;
}
if (ROCPROFILER_PC_SAMPLING_RECORD == record->kind) {
auto const &pcr = (rocprofiler_record_pc_sample_t &)*record;
printf(
"dispatch[%" PRIu64 "] timestamp(%" PRIu64
") gpu_id(%#" PRIx64 ") pc-sample(%#" PRIx64
") se(%" PRIu32 ")\n",
pcr.pc_sample.dispatch_id.value,
pcr.pc_sample.timestamp.value,
pcr.pc_sample.gpu_id.handle,
pcr.pc_sample.pc,
pcr.pc_sample.se);
if (g_opts.pc_sampling) {
disassembly_print_pc_sample_context(pcr.pc_sample.pc);
}
}
rocprofiler_next_record(record, &record, session_id, buffer_id);
}
}
static int
run_kernel(program_options const &opts)
{
rocprofiler_session_id_t sid;
rocprofiler_filter_id_t fid, fid2;
rocprofiler_buffer_id_t bid;
auto rocprofiler_ok = ROCPROFILER_STATUS_SUCCESS;
if (opts.pc_sampling) {
ROCPROFILER_CHECK(
rocprofiler_create_session(ROCPROFILER_NONE_REPLAY_MODE, &sid),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
fputs("error: failed to create rocmtools session\n", stderr);
return EXIT_FAILURE;
}
rocprofiler_filter_property_t property{};
ROCPROFILER_CHECK(
rocprofiler_create_buffer(
sid, callback_flush_fn, static_cast<size_t>(0x1000), &bid),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
fputs("error: failed to add PC sampling session mode\n", stderr);
goto out;
}
ROCPROFILER_CHECK(
rocprofiler_create_filter(
sid, ROCPROFILER_PC_SAMPLING_COLLECTION,
rocprofiler_filter_data_t{},
0, &fid, property),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
goto cleanup;
}
ROCPROFILER_CHECK(
rocprofiler_create_filter(
sid, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
rocprofiler_filter_data_t{},
0, &fid2, property),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
goto cleanup;
}
ROCPROFILER_CHECK(
rocprofiler_set_filter_buffer(sid, fid, bid),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
goto cleanup;
}
ROCPROFILER_CHECK(
rocprofiler_set_filter_buffer(sid, fid2, bid),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
goto cleanup;
}
ROCPROFILER_CHECK(
rocprofiler_start_session(sid),
rocprofiler_ok);
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
goto cleanup;
}
}
{
printf("seed = %" PRIu64 "\n", opts.seed);
std::vector<uint64_t> rands(opts.rands_len);
using rands_elt_t = decltype(rands)::value_type;
uint64_t
sm64_state = opts.seed,
xrs_state[4];
{
using prng::splitmix64_next;
using prng::xrs_next;
// Initialize the Xoroshiro PRNG
xrs_state[0] = splitmix64_next(&sm64_state);
xrs_state[1] = splitmix64_next(&sm64_state);
xrs_state[2] = splitmix64_next(&sm64_state);
xrs_state[3] = splitmix64_next(&sm64_state);
// Fill rands with random integers
for (auto &i : rands) {
i = xrs_next(xrs_state);
}
}
struct tm {
using monoclk = std::chrono::steady_clock;
using dur = std::chrono::duration<double>;
};
using util::hipMalloc_freer;
auto const begin_time = tm::monoclk::now();
auto hip_ok = hipSuccess;
do {
HIP_CHECK_BREAK(hipSetDevice(opts.device), hip_ok);
auto const rands_nbytes = rands.size() * sizeof(rands_elt_t);
std::unique_ptr<rands_elt_t, hipMalloc_freer> rands_gpu;
{
rands_elt_t *rands_gpu_ptr;
HIP_CHECK_BREAK(hipMalloc(&rands_gpu_ptr, rands_nbytes), hip_ok);
rands_gpu.reset(rands_gpu_ptr);
}
HIP_CHECK_BREAK(
hipMemcpy(rands_gpu.get(), rands.data(), rands_nbytes,
hipMemcpyHostToDevice),
hip_ok);
(void)hipDeviceSynchronize();
uint32_t constexpr nthreads = 256U;
uint32_t const nblocks = (rands.size() + nthreads - 1) / nthreads;
using count_elt_t = size_t;
auto const count_subtotals_nbytes = nblocks * sizeof(count_elt_t);
std::unique_ptr<count_elt_t, hipMalloc_freer> count_subtotals_gpu;
{
count_elt_t *count_subtotals_gpu_ptr;
HIP_CHECK_BREAK(
hipMalloc(&count_subtotals_gpu_ptr, count_subtotals_nbytes),
hip_ok);
count_subtotals_gpu.reset(count_subtotals_gpu_ptr);
}
hipLaunchKernelGGL(
kernel::memset_gpu, nblocks, nthreads, 0, 0,
count_subtotals_gpu.get(), 0UL, static_cast<size_t>(nblocks));
HIP_CHECK_BREAK(hipGetLastError(), hip_ok);
(void)hipDeviceSynchronize();
auto const kernel_begin_time = tm::monoclk::now();
hipLaunchKernelGGL(
kernel::count_gpu, nblocks, nthreads, 0, 0,
rands_gpu.get(), count_subtotals_gpu.get(), rands.size(),
static_cast<size_t>(nblocks), opts.gt);
HIP_CHECK_BREAK(hipGetLastError(), hip_ok);
(void)hipDeviceSynchronize();
auto const kernel_end_time = tm::monoclk::now();
std::vector<size_t> count_subtotals(nblocks);
HIP_CHECK_BREAK(
hipMemcpy(count_subtotals.data(), count_subtotals_gpu.get(),
count_subtotals_nbytes, hipMemcpyDeviceToHost),
hip_ok);
(void)hipDeviceSynchronize();
// TODO parallel sum on GPU
auto const total =
std::accumulate(
count_subtotals.cbegin(), count_subtotals.cend(),
static_cast<size_t>(0));
auto const all_end_time = tm::monoclk::now();
tm::dur const kernel_time(kernel_end_time - kernel_begin_time);
auto total_time(all_end_time - begin_time);
tm::dur const total_time_without_tool_init(total_time);
printf("len(rands) = %zu; gt = %zu; count(rands, gt) = %zu\n"
"main kernel time elapsed: %" DBL_FMT "\n"
"full time elapsed: %" DBL_FMT "\n",
rands.size(), opts.gt, total,
kernel_time.count(),
total_time_without_tool_init.count());
} while (false);
if (opts.disassemble) {
disassembly_disassemble_kernels(false);
}
}
cleanup:
if (opts.pc_sampling) {
rocprofiler_terminate_session(sid);
rocprofiler_flush_data(sid, bid);
rocprofiler_destroy_session(sid);
}
out:
return ROCPROFILER_STATUS_SUCCESS == rocprofiler_ok
? EXIT_SUCCESS
: EXIT_FAILURE;
}
int
main(int argc, char **argv)
{
if (auto const ret = get_options(argc, argv, &g_opts);
EXIT_SUCCESS != ret)
{
return ret;
}
if (hsa_init() != HSA_STATUS_SUCCESS){
return EXIT_FAILURE;
}
int ret = EXIT_FAILURE;
auto ok = ROCPROFILER_STATUS_SUCCESS;
ROCPROFILER_CHECK(rocprofiler_initialize(), ok);
if (ROCPROFILER_STATUS_SUCCESS == ok) {
ret = run_kernel(g_opts);
} else {
goto out;
}
rocprofiler_finalize();
out:
hsa_shut_down();
return ROCPROFILER_STATUS_SUCCESS == ok && EXIT_FAILURE != ret
? EXIT_SUCCESS
: EXIT_FAILURE;
}
@@ -0,0 +1,54 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_
#define PROGNAME "code_printing_sample"
#define HIP_ERROR(code) \
do { \
fprintf(stderr, \
PROGNAME ": Assertion failed at %s:%d, HIP error: %s\n", \
__FILE__, __LINE__, hipGetErrorString((code))); \
fflush(stderr); \
} while (false);
#define HIP_CHECK_BREAK(expr, var) \
if (auto const code = (expr); hipSuccess != code) { \
HIP_ERROR(code); \
(var) = code; \
break; \
}
#define ROCPROFILER_ERROR(code) \
do { \
fprintf(stderr, \
PROGNAME ": Assertion failed at %s:%d, ROCmtools error: %s\n", \
__FILE__, __LINE__, rocprofiler_error_str(code)); \
fflush(stderr); \
} while (false);
#define ROCPROFILER_CHECK(expr, var) \
if ((var) = (expr); ROCPROFILER_STATUS_SUCCESS != (var)) { \
ROCPROFILER_ERROR((var)); \
}
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_
@@ -0,0 +1,49 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_
#include <chrono>
#include <cstdint>
struct program_options {
program_options()
: device(0)
, no_gpu(false)
, hip_memset(false)
, rands_len(1024 * 1024 * 4)
, gt(0)
, seed(std::chrono::steady_clock::now().time_since_epoch().count())
, disassemble(false)
, pc_sampling(false)
{}
int device;
bool no_gpu;
bool hip_memset;
size_t rands_len;
uint64_t gt;
uint64_t seed;
bool disassemble;
bool pc_sampling;
};
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_
@@ -0,0 +1,72 @@
#include "../common/common.h"
int main(int argc, char** argv) {
printf("APPLICATION REPLAY Mode is not yet Supported!");
#if 0
int* gpuMem;
prepare();
// Initialize the tools
CHECK_ROCPROFILER(rocprofiler_initialize());
// Creating the session with given replay mode
rocprofiler_session_id_t session_id;
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_APPLICATION_REPLAY_MODE, &session_id));
// Creating Output Buffer for the data
rocprofiler_buffer_id_t buffer_id;
CHECK_ROCPROFILER(rocprofiler_create_buffer(
session_id,
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
WriteBufferRecords(record, end_record, session_id, buffer_id);
},
0x9999, &buffer_id));
// Counter Collection Filter
std::vector<const char*> counters;
counters.emplace_back("GRBM_COUNT");
rocprofiler_filter_id_t filter_id;
[[maybe_unused]] rocprofiler_filter_property_t property = {};
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_COLLECTION,
rocprofiler_filter_data_t{.counters_names = &counters[0]},
counters.size(), &filter_id, property));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
filter_ids.emplace_back(filter_id);
// Normal HIP Calls
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
// KernelA and KernelB won't be profiled
kernelCalls('A');
kernelCalls('B');
// Activating Profiling Session to profile whatever kernel launches occurs up
// till the next terminate session
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
kernelCalls('C');
kernelCalls('D');
kernelCalls('E');
kernelCalls('F');
// Normal HIP Calls
HIP_CALL(hipFree(gpuMem));
// Deactivating session
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
// Manual Flush user buffer request
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
// Destroy sessions
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
// Destroy all profiling related objects(User buffer, sessions, filters,
// etc..)
CHECK_ROCPROFILER(rocprofiler_finalize());
#endif
return 0;
}
@@ -0,0 +1,61 @@
#include "../common/common.h"
#include <chrono>
#include <thread>
#include <iostream>
#include "rocprofiler.h"
int main(int argc, char** argv) {
int poll_duration = 5;
if (argc > 1) poll_duration = atoi(argv[1]);
CHECK_ROCPROFILER(rocprofiler_initialize());
printf("initialize\n");
rocprofiler_session_id_t dp_session_id;
std::vector<const char*> counters;
counters.emplace_back("FETCH_SIZE");
printf("session create\n");
int gpu_agent = 0;
int cpu_agent = 0;
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_create(&counters[0], counters.size(),
&dp_session_id, gpu_agent, cpu_agent));
printf("session start \n");
// start GPU device profiling
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_start(dp_session_id));
using std::chrono::duration_cast;
using std::chrono::high_resolution_clock;
using std::chrono::milliseconds;
auto t1 = high_resolution_clock::now();
do {
printf("polling\n");
std::vector<rocprofiler_device_profile_metric_t> data(counters.size());
// Poll metrics
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_poll(dp_session_id, &data[0]));
for (size_t i = 0; i < data.size(); i++)
std::cout << data[i].metric_name << ": " << data[i].value.value << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
// break;
} while (--poll_duration > 0);
auto t2 = high_resolution_clock::now();
/* Getting number of milliseconds as an integer. */
auto ms_int = duration_cast<milliseconds>(t2 - t1);
std::cout << ms_int.count() << "ms\n";
// Stop session
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_stop(dp_session_id));
// Destroy session
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_destroy(dp_session_id));
return 0;
}
@@ -0,0 +1,68 @@
#include "../common/common.h"
int main(int argc, char** argv) {
int* gpuMem;
prepare();
// Initialize the tools
CHECK_ROCPROFILER(rocprofiler_initialize());
// Creating the session with given replay mode
rocprofiler_session_id_t session_id;
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
// Creating Output Buffer for the data
rocprofiler_buffer_id_t buffer_id;
CHECK_ROCPROFILER(rocprofiler_create_buffer(
session_id,
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
WriteBufferRecords(record, end_record, session_id, buffer_id);
},
0x9999, &buffer_id));
// Counter Collection Filter
std::vector<const char*> counters;
counters.emplace_back("GRBM_COUNT");
rocprofiler_filter_id_t filter_id;
[[maybe_unused]] rocprofiler_filter_property_t property = {};
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_COLLECTION,
rocprofiler_filter_data_t{.counters_names = &counters[0]},
counters.size(), &filter_id, property));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
// Normal HIP Calls
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
// KernelA and KernelB won't be profiled
kernelCalls('A');
kernelCalls('B');
// Activating Profiling Session to profile whatever kernel launches occurs up
// till the next terminate session
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
kernelCalls('C');
kernelCalls('D');
kernelCalls('E');
kernelCalls('F');
// Normal HIP Calls
HIP_CALL(hipFree(gpuMem));
// Deactivating session
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
// Manual Flush user buffer request
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
// Destroy sessions
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
// Destroy all profiling related objects(User buffer, sessions, filters,
// etc..)
CHECK_ROCPROFILER(rocprofiler_finalize());
return 0;
}
@@ -0,0 +1,78 @@
#include "../common/common.h"
int main(int argc, char** argv) {
printf("USER REPLAY Mode is not yet Supported!");
#if 0
int* gpuMem;
prepare();
// Initialize the tools
CHECK_ROCPROFILER(rocprofiler_initialize());
// Creating the session with given replay mode
rocprofiler_session_id_t session_id;
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_USER_REPLAY_MODE, &session_id));
// Creating Output Buffer for the data
rocprofiler_buffer_id_t buffer_id;
CHECK_ROCPROFILER(rocprofiler_create_buffer(
session_id,
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
WriteBufferRecords(record, end_record, session_id, buffer_id);
},
0x9999, &buffer_id));
// Counter Collection Filter
std::vector<const char*> counters;
counters.emplace_back("GRBM_COUNT");
rocprofiler_filter_id_t filter_id;
[[maybe_unused]] rocprofiler_filter_property_t property = {};
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_COLLECTION,
rocprofiler_filter_data_t{.counters_names = &counters[0]},
counters.size(), &filter_id, property));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
filter_ids.emplace_back(filter_id);
// Normal HIP Calls
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
// KernelA and KernelB won't be profiled
kernelCalls('A');
kernelCalls('B');
// Activating Profiling Session to profile whatever kernel launches occurs up
// till the next terminate session
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
// Replay Pass Start point
CHECK_ROCPROFILER(rocprofiler_start_replay_pass());
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
kernelCalls('C');
kernelCalls('D');
kernelCalls('E');
kernelCalls('F');
// Normal HIP Calls
HIP_CALL(hipFree(gpuMem));
// Deactivating session
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
// Manual Flush user buffer request
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
// Replay Pass End point
CHECK_ROCPROFILER(rocprofiler_end_replay_pass());
// Destroy sessions
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
// Destroy all profiling related objects(User buffer, sessions, filters,
// etc..)
CHECK_ROCPROFILER(rocprofiler_finalize());
#endif
return 0;
}
@@ -0,0 +1,28 @@
#!/bin/bash
CURRENT_DIR="$( dirname -- "$0"; )";
echo -e "Running Samples"
export ROCPROFILER_METRICS_PATH=${CURRENT_DIR}/../counters/derived_counters.xml
echo -e "\tProfiler Samples:"
# echo -e "\t\tApplication Replay Sample:"
# eval ${CURRENT_DIR}/profiler_application_replay
echo -e "\t\tKernel Replay Sample:"
eval ${CURRENT_DIR}/profiler_kernel_replay
# echo -e "\t\tUser Replay Sample:"
# eval ${CURRENT_DIR}/profiler_user_replay
echo -e "\t\tDevice Profiling Sample:"
eval ${CURRENT_DIR}/profiler_device_profiling
# echo -e "\tTracer Samples:"
# echo -e "\t\tHIP/HSA Trace Sample:"
# eval ${CURRENT_DIR}/tracer_hip_hsa
@@ -0,0 +1,82 @@
#include "../common/common.h"
int main(int argc, char** argv) {
int* gpuMem;
prepare();
// Initialize the tools
CHECK_ROCPROFILER(rocprofiler_initialize());
// Creating the session with given replay mode
rocprofiler_session_id_t session_id;
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
// Creating Output Buffer for the data
rocprofiler_buffer_id_t buffer_id;
CHECK_ROCPROFILER(rocprofiler_create_buffer(
session_id,
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
WriteBufferRecords(record, end_record, session_id, buffer_id);
},
0x9999, &buffer_id));
// Tracing Filter
std::vector<rocprofiler_tracer_activity_domain_t> apis_requested;
apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_API);
apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_OPS);
apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_API);
apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_OPS);
apis_requested.emplace_back(ACTIVITY_DOMAIN_ROCTX);
rocprofiler_filter_id_t api_tracing_filter_id;
CHECK_ROCPROFILER(rocprofiler_create_filter(
session_id, ROCPROFILER_API_TRACE, rocprofiler_filter_data_t{&apis_requested[0]},
apis_requested.size(), &api_tracing_filter_id, rocprofiler_filter_property_t{}));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, api_tracing_filter_id, buffer_id));
CHECK_ROCPROFILER(rocprofiler_set_api_trace_sync_callback(
session_id, api_tracing_filter_id,
[](rocprofiler_record_tracer_t record, rocprofiler_session_id_t session_id) {
FlushTracerRecord(record, session_id);
}));
// Kernel Tracing
rocprofiler_filter_id_t kernel_tracing_filter_id;
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
rocprofiler_filter_data_t{}, 0, &kernel_tracing_filter_id,
rocprofiler_filter_property_t{}));
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, kernel_tracing_filter_id, buffer_id));
// Normal HIP Calls won't be traced
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
// KernelA and KernelB won't be traced
kernelCalls('A');
kernelCalls('B');
// Activating Profiling Session to profile whatever kernel launches occurs up
// till the next terminate session
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
// KernelC, KernelD, KernelE and KernelF to be traced as part of the session
kernelCalls('C');
kernelCalls('D');
kernelCalls('E');
kernelCalls('F');
// Normal HIP Calls that will be traced
HIP_CALL(hipFree(gpuMem));
// Deactivating session
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
// Manual Flush user buffer request
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
// Destroy sessions
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
// Destroy all profiling related objects(User buffer, sessions, filters,
// etc..)
CHECK_ROCPROFILER(rocprofiler_finalize());
return 0;
}
@@ -0,0 +1,242 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import os, sys, re
import CppHeaderParser
import argparse
import string
LICENSE = \
'/*\n' + \
'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \
'\n' + \
'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
'of this software and associated documentation files (the "Software"), to deal\n' + \
'in the Software without restriction, including without limitation the rights\n' + \
'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
'copies of the Software, and to permit persons to whom the Software is\n' + \
'furnished to do so, subject to the following conditions:\n' + \
'\n' + \
'The above copyright notice and this permission notice shall be included in\n' + \
'all copies or substantial portions of the Software.\n' + \
'\n' + \
'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
'THE SOFTWARE.\n' + \
'*/\n'
header_basic = \
'namespace detail {\n' + \
'template <typename T>\n' + \
' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \
' using std::operator<<;\n' + \
' static bool recursion = false;\n' + \
' if (recursion == false) { recursion = true; out << v; recursion = false; }\n' + \
' return out;\n }\n' + \
'\n' + \
' inline static std::ostream &operator<<(std::ostream &out, const unsigned char &v) {\n' + \
' out << (unsigned int)v;\n' + \
' return out;\n }\n' + \
'\n' + \
' inline static std::ostream &operator<<(std::ostream &out, const char &v) {\n' + \
' out << (unsigned char)v;\n' + \
' return out;\n }\n'
structs_analyzed = {}
global_ops = ''
global_str = ''
output_filename_h = None
apiname = ""
# process_struct traverses recursively all structs to extract all fields
def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, apiname):
# file_handle: handle for output file {api_name}_ostream_ops.h to be generated
# cppHeader_struct: cppHeader struct being processed
# cppHeader: cppHeader object created by CppHeaderParser.CppHeader(...)
# parent_hier_name: parent hierarchical name used for nested structs/enums
# apiname: for example hip.
global global_str
if cppHeader_struct == 'max_align_t': #function pointers not working in cppheaderparser
return
if cppHeader_struct not in cppHeader.classes:
return
if cppHeader_struct in structs_analyzed:
return
structs_analyzed[cppHeader_struct] = 1
for l in reversed(range(len(cppHeader.classes[cppHeader_struct]["properties"]["public"]))):
key = 'name'
name = ""
if key in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
if parent_hier_name != '':
name = parent_hier_name + '.' + cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
else:
name = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
if name == '':
continue
key2 = 'type'
mtype = ""
if key2 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
mtype = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key2]
if mtype == '':
continue
key3 = 'array_size'
array_size = ""
if key3 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
array_size = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key3]
key4 = 'property_of_class'
prop = ""
if key4 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
prop = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key4]
str = ''
if "union" not in mtype:
indent = ""
str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n"
indent = " "
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \"" + name + "=\");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n"
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \", \");\n"
str += " }\n"
if "void" not in mtype:
global_str += str
else:
if prop != '':
next_cppHeader_struct = prop + "::"
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
next_cppHeader_struct = prop + "::" + mtype + " "
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
next_cppHeader_struct = cppHeader_struct + "::"
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
# Parses API header file and generates ostream ops files ostream_ops.h
def gen_cppheader(infilepath, outfilepath, rank):
# infilepath: API Header file to be parsed
# outfilepath: Output file where ostream operators are written
global global_ops
global output_filename_h
global apiname
global global_str
try:
cppHeader = CppHeaderParser.CppHeader(infilepath)
except CppHeaderParser.CppParseError as e:
print(e)
sys.exit(1)
if rank == 0 or rank == 2:
mpath = os.path.dirname(outfilepath)
if mpath == "":
mpath = os.getcwd()
apiname = outfilepath.replace(mpath + "/","")
output_filename_h = open(outfilepath,"w+")
apiname = apiname.replace("_ostream_ops.h","")
apiname = apiname.upper()
output_filename_h.write("// automatically generated\n")
output_filename_h.write(LICENSE + '\n')
header_s = \
'#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
'#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
'\n' + \
'#include "src/core/session/tracer/src/roctracer.h"\n' + \
'\n' + \
'#ifdef __cplusplus\n' + \
'#include <iostream>\n' + \
'#include <string>\n'
output_filename_h.write(header_s)
output_filename_h.write('\n')
output_filename_h.write('namespace roctracer {\n')
output_filename_h.write('namespace ' + apiname.lower() + '_support {\n')
output_filename_h.write('static int ' + apiname.upper() + '_depth_max = 1;\n')
output_filename_h.write('static int ' + apiname.upper() + '_depth_max_cnt = 0;\n')
output_filename_h.write('static std::string ' + apiname.upper() + '_structs_regex = \"\";\n')
output_filename_h.write('// begin ostream ops for '+ apiname + ' \n')
output_filename_h.write("// basic ostream ops\n")
output_filename_h.write(header_basic)
output_filename_h.write("// End of basic ostream ops\n\n")
for c in cppHeader.classes:
if c[-2] == ':' and c[-1] == ':': continue #ostream operator cannot be overloaded for anonymous struct therefore it is skipped
if "union" in c:
continue
if c in structs_analyzed:
continue
if c == 'max_align_t' or c == '__fsid_t': # Skipping as it is defined in multiple domains
continue
if len(cppHeader.classes[c]["properties"]["public"]) != 0:
output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n")
output_filename_h.write("{\n")
output_filename_h.write(" std::operator<<(out, '{');\n")
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n")
output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" )
process_struct(output_filename_h, c, cppHeader, "", apiname)
global_str = "\n".join(global_str.split("\n")[0:-3])
if global_str != '': global_str += "\n }\n"
output_filename_h.write(global_str)
output_filename_h.write(" };\n")
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n")
output_filename_h.write(" std::operator<<(out, '}');\n")
output_filename_h.write(" return out;\n")
output_filename_h.write("}\n")
global_str = ''
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
if rank == 1 or rank == 2:
footer = '// end ostream ops for '+ apiname + ' \n'
footer += '};};};\n\n'
output_filename_h.write(footer)
output_filename_h.write(global_ops)
footer = '#endif //__cplusplus\n' + \
'#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
' \n'
output_filename_h.write(footer)
output_filename_h.write('#include <hip/amd_detail/hip_prof_str.h>')
output_filename_h.close()
print('File ' + outfilepath + ' generated')
return
parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.')
requiredNamed = parser.add_argument_group('Required arguments')
requiredNamed.add_argument('-in', metavar='fileList', help='Comma separated list of header files to be parsed', required=True)
requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True)
args = vars(parser.parse_args())
if __name__ == '__main__':
flist = args['in'].split(',')
if len(flist) == 1:
gen_cppheader(flist[0], args['out'],2)
else:
for i in range(len(flist)):
if i == 0:
gen_cppheader(flist[i], args['out'],0)
elif i == len(flist)-1:
gen_cppheader(flist[i], args['out'],1)
else:
gen_cppheader(flist[i], args['out'],-1)
+581
مشاهده پرونده
@@ -0,0 +1,581 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
from __future__ import print_function
import os, sys, re
H_OUT='hsa_prof_str.h'
CPP_OUT='hsa_prof_str.inline.h'
API_TABLES_H = 'hsa_api_trace.h'
API_HEADERS_H = (
('CoreApi', 'hsa.h'),
('AmdExt', 'hsa_ext_amd.h'),
('ImageExt', 'hsa_ext_image.h'),
('AmdExt', API_TABLES_H),
)
LICENSE = \
'/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.\n' + \
'\n' + \
' Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
' of this software and associated documentation files (the "Software"), to deal\n' + \
' in the Software without restriction, including without limitation the rights\n' + \
' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
' copies of the Software, and to permit persons to whom the Software is\n' + \
' furnished to do so, subject to the following conditions:\n' + \
'\n' + \
' The above copyright notice and this permission notice shall be included in\n' + \
' all copies or substantial portions of the Software.\n' + \
'\n' + \
' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
' THE SOFTWARE. */\n'
#############################################################
# Error handler
def fatal(module, msg):
print (module + ' Error: "' + msg + '"', file = sys.stderr)
sys.exit(1)
# Get next text block
def NextBlock(pos, record):
if len(record) == 0: return pos
space_pattern = re.compile(r'(\s+)')
word_pattern = re.compile(r'([\w\*]+)')
if record[pos] != '(':
m = space_pattern.match(record, pos)
if not m:
m = word_pattern.match(record, pos)
if m:
return pos + len(m.group(1))
else:
fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")")
else:
count = 0
for index in range(pos, len(record)):
if record[index] == '(':
count = count + 1
elif record[index] == ')':
count = count - 1
if count == 0:
index = index + 1
break
if count != 0:
fatal('NextBlock', "count is not zero (" + str(count) + ")")
if record[index - 1] != ')':
fatal('NextBlock', "last char is not ')' '" + record[index - 1] + "'")
return index
#############################################################
# API table parser class
class API_TableParser:
def fatal(self, msg):
fatal('API_TableParser', msg)
def __init__(self, header, name):
self.name = name
if not os.path.isfile(header):
self.fatal("file '" + header + "' not found")
self.inp = open(header, 'r')
self.beg_pattern = re.compile('^\s*struct\s+' + name + 'Table\s*{\s*$')
self.end_pattern = re.compile('^\s*};\s*$')
self.array = []
self.parse()
# normalizing a line
def norm_line(self, line):
return re.sub(r'^\s+', r' ', line[:-1])
# check for start record
def is_start(self, record):
return self.beg_pattern.match(record)
# check for end record
def is_end(self, record):
return self.end_pattern.match(record)
# check for declaration entry record
def is_entry(self, record):
return re.match(r'^\s*decltype\(([^\)]*)\)', record)
# parse method
def parse(self):
active = 0
for line in self.inp.readlines():
record = self.norm_line(line)
if self.is_start(record): active = 1
if active != 0:
if self.is_end(record): return
m = self.is_entry(record)
if m:
self.array.append(m.group(1))
#############################################################
# API declaration parser class
class API_DeclParser:
def fatal(self, msg):
fatal('API_DeclParser', msg)
def __init__(self, header, array, data):
if not os.path.isfile(header):
self.fatal("file '" + header + "' not found")
self.inp = open(header, 'r')
self.end_pattern = re.compile('\);\s*$')
self.data = data
for call in array:
if call in data:
self.fatal(call + ' is already found')
self.parse(call)
# api record filter
def api_filter(self, record):
record = re.sub(r'\sHSA_API\s', r' ', record)
record = re.sub(r'\sHSA_DEPRECATED\s', r' ', record)
return record
# check for start record
def is_start(self, call, record):
return re.search('\s' + call + '\s*\(', record)
# check for API method record
def is_api(self, call, record):
record = self.api_filter(record)
return re.match('\s+\S+\s+' + call + '\s*\(', record)
# check for end record
def is_end(self, record):
return self.end_pattern.search(record)
# parse method args
def get_args(self, record):
struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []}
record = re.sub(r'^\s+', r'', record)
record = re.sub(r'\s*(\*+)\s*', r'\1 ', record)
rind = NextBlock(0, record)
struct['ret'] = record[0:rind]
pos = record.find('(')
end = NextBlock(pos, record);
args = record[pos:end]
args = re.sub(r'^\(\s*', r'', args)
args = re.sub(r'\s*\)$', r'', args)
args = re.sub(r'\s*,\s*', r',', args)
struct['args'] = re.sub(r',', r', ', args)
if len(args) == 0: return struct
pos = 0
args = args + ','
while pos < len(args):
ind1 = NextBlock(pos, args) # type
ind2 = NextBlock(ind1, args) # space
if args[ind2] != '(':
while ind2 < len(args):
end = NextBlock(ind2, args)
if args[end] == ',': break
else: ind2 = end
name = args[ind2:end]
else:
ind3 = NextBlock(ind2, args) # field
m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3])
if not m:
self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'")
name = m.group(1)
end = NextBlock(ind3, args) # the rest
item = args[pos:end]
struct['astr'][name] = item
struct['alst'].append(name)
struct['tlst'].append(item)
if args[end] != ',':
self.fatal("no comma '" + args + "'")
pos = end + 1
return struct
# parse given api
def parse(self, call):
record = ''
active = 0
found = 0
api_name = ''
prev_line = ''
self.inp.seek(0)
for line in self.inp.readlines():
record += ' ' + line[:-1]
record = re.sub(r'^\s*', r' ', record)
if active == 0:
if self.is_start(call, record):
active = 1
m = self.is_api(call, record)
if not m:
record = ' ' + prev_line + ' ' + record
m = self.is_api(call, record)
if not m:
self.fatal("bad api '" + line + "'")
if active == 1:
if self.is_end(record):
self.data[call] = self.get_args(record)
active = 0
found = 0
if active == 0: record = ''
prev_line = line
#############################################################
# API description parser class
class API_DescrParser:
def fatal(self, msg):
fatal('API_DescrParser', msg)
def __init__(self, out_h_file, hsa_dir, api_table_h, api_headers, license):
out_macro = re.sub(r'[\/\.]', r'_', out_h_file.upper()) + '_'
self.h_content = ''
self.cpp_content = ''
self.api_names = []
self.api_calls = {}
self.api_rettypes = set()
self.api_id = {}
api_data = {}
api_list = []
ns_calls = []
for i in range(0, len(api_headers)):
(name, header) = api_headers[i]
if i < len(api_headers) - 1:
api = API_TableParser(hsa_dir + api_table_h, name)
api_list = api.array
self.api_names.append(name)
self.api_calls[name] = api_list
else:
api_list = ns_calls
ns_calls = []
for call in api_list:
if call in api_data:
self.fatal("call '" + call + "' is already found")
API_DeclParser(hsa_dir + header, api_list, api_data)
for call in api_list:
if not call in api_data:
# Not-supported functions
ns_calls.append(call)
else:
# API ID map
self.api_id[call] = 'HSA_API_ID_' + call
# Return types
self.api_rettypes.add(api_data[call]['ret'])
self.api_rettypes.discard('void')
self.api_data = api_data
self.ns_calls = ns_calls
self.h_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
self.h_content += "/* HSA API tracing primitives\n"
for (name, header) in api_headers:
self.h_content += " '" + name + "', header '" + header + "', " + str(len(self.api_calls[name])) + ' funcs\n'
for call in self.ns_calls:
self.h_content += ' ' + call + ' was not parsed\n'
self.h_content += " */\n"
self.h_content += '\n'
self.h_content += '#ifndef ' + out_macro + '\n'
self.h_content += '#define ' + out_macro + '\n'
self.h_content += self.add_section('API ID enumeration', ' ', self.gen_id_enum)
self.h_content += '/* Declarations of APIs intended for use only by tools. */\n'
self.h_content += 'typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);\n'
self.h_content += 'typedef void (*hsa_amd_queue_intercept_handler)(const void*, uint64_t, uint64_t, void*,\n'
self.h_content += ' hsa_amd_queue_intercept_packet_writer);\n'
self.h_content += 'typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t*, hsa_agent_t, void*);\n'
self.h_content += self.add_section('API arg structure', ' ', self.gen_arg_struct)
self.h_content += self.add_section('API output stream', ' ', self.gen_out_stream)
self.h_content += '#endif /* ' + out_macro + ' */\n'
self.cpp_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
self.cpp_content += '#include <hsa/hsa_api_trace.h>\n'
self.cpp_content += '#include <atomic>\n'
self.cpp_content += 'namespace roctracer::hsa_support::detail {\n'
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks)
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code)
self.cpp_content += '\n};\n'
# add code section
def add_section(self, title, gap, fun):
content = ''
n = 0
content += '\n/* section: ' + title + ' */\n\n'
content += fun(-1, '-', '-', {})
for index in range(len(self.api_names)):
last = (index == len(self.api_names) - 1)
name = self.api_names[index]
if n != 0:
if gap == '': content += fun(n, name, '-', {})
content += '\n'
content += gap + '/* block: ' + name + ' API */\n'
for call in self.api_calls[name]:
content += fun(n, name, call, self.api_data[call])
n += 1
content += fun(n, '-', '-', {})
return content
# generate API ID enumeration
def gen_id_enum(self, n, name, call, data):
content = ''
if n == -1:
content += 'enum hsa_api_id_t {\n'
return content
if call != '-':
content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
else:
content += '\n'
content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n'
content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n'
content += '};\n'
return content
# generate API args structure
def gen_arg_struct(self, n, name, call, struct):
content = ''
if n == -1:
content += 'struct hsa_api_data_t {\n'
content += ' uint64_t correlation_id;\n'
content += ' uint32_t phase;\n'
content += ' union {\n'
for ret_type in self.api_rettypes:
content += ' ' + ret_type + ' ' + ret_type + '_retval;\n'
content += ' };\n'
content += ' union {\n'
return content
if call != '-':
content += ' struct {\n'
for (var, item) in struct['astr'].items():
content += ' ' + item + ';\n'
if call == "hsa_amd_memory_async_copy_rect" and item == "const hsa_dim3_t* range":
content += ' hsa_dim3_t range__val;\n'
content += ' } ' + call + ';\n'
else:
content += ' } args;\n'
content += ' uint64_t *phase_data;\n'
content += '};\n'
return content
# generate API callbacks
def gen_callbacks(self, n, name, call, struct):
content = ''
if n == -1:
content += '/* section: Static declarations */\n'
content += '\n'
if call != '-':
call_id = self.api_id[call];
ret_type = struct['ret']
content += 'static ' + ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n'
content += ' hsa_trace_data_t trace_data;\n'
content += ' bool enabled{false};\n'
content += '\n'
content += ' if (auto function = report_activity.load(std::memory_order_relaxed); function &&\n'
content += ' (enabled =\n'
content += ' function(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &trace_data) == 0)) {\n'
content += ' if (trace_data.phase_enter != nullptr) {\n'
for var in struct['alst']:
item = struct['astr'][var];
if re.search(r'char\* ', item):
# FIXME: we should not strdup the char* arguments here, as the callback will not outlive the scope of this function. Instead, we
# should generate a helper function to capture the content of the arguments similar to hipApiArgsInit for HIP. We also need a
# helper to free the memory that is allocated to capture the content.
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + '(' + var + ' != NULL) ? strdup(' + var + ')' + ' : NULL;\n'
else:
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + var + ';\n'
if call == 'hsa_amd_memory_async_copy_rect' and var == 'range':
content += ' trace_data.api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n'
content += ' trace_data.phase_enter(' + call_id + ', &trace_data);\n'
content += ' }\n'
content += ' }\n'
content += '\n'
if ret_type != 'void':
content += ' trace_data.api_data.' + ret_type + '_retval = '
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
content += '\n'
content += ' if (enabled && trace_data.phase_exit != nullptr)\n'
content += ' trace_data.phase_exit(' + call_id + ', &trace_data);\n'
if ret_type != 'void':
content += ' return trace_data.api_data.' + ret_type + '_retval;\n'
content += '}\n'
return content
# generate API intercepting code
def gen_intercept(self, n, name, call, struct):
content = ''
if n > 0 and call == '-':
content += '};\n'
if n == 0 or (call == '-' and name != '-'):
content += 'static void Install' + name + 'Wrappers(' + name + 'Table* table) {\n'
content += ' ' + name + '_saved_before_cb = *table;\n'
if call != '-':
if call != 'hsa_shut_down':
content += ' table->' + call + '_fn = ' + call + '_callback;\n'
else:
content += ' { void* p = (void*)' + call + '_callback; (void)p; }\n'
return content
# generate API name function
def gen_get_name(self, n, name, call, struct):
content = ''
if n == -1:
content += 'static const char* GetApiName(uint32_t id) {\n'
content += ' switch (id) {\n'
return content
if call != '-':
content += ' case ' + self.api_id[call] + ': return "' + call + '";\n'
else:
content += ' }\n'
content += ' return "unknown";\n'
content += '}\n'
return content
# generate API code function
def gen_get_code(self, n, name, call, struct):
content = ''
if n == -1:
content += 'static uint32_t GetApiCode(const char* str) {\n'
return content
if call != '-':
content += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n'
else:
content += ' return HSA_API_ID_NUMBER;\n'
content += '}\n'
return content
# generate stream operator
def gen_out_stream(self, n, name, call, struct):
content = ''
if n == -1:
content += '#ifdef __cplusplus\n'
content += '#include "hsa_ostream_ops.h"\n'
content += 'typedef std::pair<uint32_t, hsa_api_data_t> hsa_api_data_pair_t;\n'
content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n'
content += ' const uint32_t cid = data_pair.first;\n'
content += ' const hsa_api_data_t& api_data = data_pair.second;\n'
content += ' switch(cid) {\n'
return content
if call != '-':
content += ' case ' + self.api_id[call] + ': {\n'
content += ' out << "' + call + '(";\n'
arg_list = struct['alst']
if len(arg_list) != 0:
for ind in range(len(arg_list)):
arg_var = arg_list[ind]
arg_val = 'api_data.args.' + call + '.' + arg_var
if re.search(r'char\* ', struct['astr'][arg_var]):
content += ' out << "0x" << std::hex << (uint64_t)' + arg_val
else:
content += ' out << ' + arg_val
if call == "hsa_amd_memory_async_copy_rect" and arg_var == "range":
content += ' << ", ";\n'
content += ' out << ' + arg_val + '__val'
'''
arg_item = struct['tlst'][ind]
if re.search(r'\(\* ', arg_item): arg_pref = ''
elif re.search(r'void\* ', arg_item): arg_pref = ''
elif re.search(r'\*\* ', arg_item): arg_pref = '**'
elif re.search(r'\* ', arg_item): arg_pref = '*'
else: arg_pref = ''
if arg_pref != '':
content += ' if (' + arg_val + ') out << ' + arg_pref + '(' + arg_val + '); else out << ' + arg_val
else:
content += ' out << ' + arg_val
'''
if ind < len(arg_list) - 1: content += ' << ", ";\n'
else: content += ';\n'
if struct['ret'] != 'void':
content += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n'
else:
content += ' out << ") = void";\n'
content += ' break;\n'
content += ' }\n'
else:
content += ' default:\n'
content += ' out << "ERROR: unknown API";\n'
content += ' abort();\n'
content += ' }\n'
content += ' return out;\n'
content += '}\n'
content += '#endif\n'
return content
#############################################################
# main
# Usage
if len(sys.argv) != 3:
print ("Usage:", sys.argv[0], " <OUT prefix> <HSA runtime include path>", file=sys.stderr)
sys.exit(1)
else:
PREFIX = sys.argv[1] + '/'
HSA_DIR = sys.argv[2] + '/'
descr = API_DescrParser(H_OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE)
out_file = PREFIX + H_OUT
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.h_content[:-1])
f.close()
out_file = PREFIX + CPP_OUT
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.cpp_content[:-1])
f.close()
#############################################################
@@ -20,20 +20,5 @@
# THE SOFTWARE.
################################################################################
#
# Build dynamic Library object
#
set ( TARGET_LIB "${TARGET_NAME}" )
set ( LIB_SRC
${LIB_DIR}/core/rocprofiler.cpp
${LIB_DIR}/core/gpu_command.cpp
${LIB_DIR}/core/proxy_queue.cpp
${LIB_DIR}/core/simple_proxy_queue.cpp
${LIB_DIR}/core/intercept_queue.cpp
${LIB_DIR}/core/metrics.cpp
${LIB_DIR}/core/activity.cpp
${LIB_DIR}/util/hsa_rsrc_factory.cpp
)
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} )
target_link_libraries( ${TARGET_LIB} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++ )
add_subdirectory(api)
add_subdirectory(tools)
@@ -0,0 +1,258 @@
# ############################################################################################################################################
# ROCMTOOL General Requirements
# ############################################################################################################################################
find_package(Python3 COMPONENTS Interpreter REQUIRED)
execute_process(COMMAND ${Python3_EXECUTABLE} -c "import lxml"
RESULT_VARIABLE CPP_HEADER_PARSER
OUTPUT_QUIET)
if(NOT ${CPP_HEADER_PARSER} EQUAL 0)
message(FATAL_ERROR "\
The \"lxml\" Python3 package is not installed. \
Please install it using the following command: \"${Python3_EXECUTABLE} -m pip install lxml\".\
")
endif()
execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser"
RESULT_VARIABLE CPP_HEADER_PARSER
OUTPUT_QUIET)
if(NOT ${CPP_HEADER_PARSER} EQUAL 0)
message(FATAL_ERROR "\
The \"CppHeaderParser\" Python3 package is not installed. \
Please install it using the following command: \"${Python3_EXECUTABLE} -m pip install CppHeaderParser\".\
")
endif()
# Setting Default Binary output directory
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
# Getting HSA Include Directory
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HSA_H hsa.h
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hsa
NO_DEFAULT_PATH
REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
find_library(AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so" HINTS ${CMAKE_PREFIX_PATH} PATHS ${ROCM_PATH} PATH_SUFFIXES lib)
if(NOT AQLPROFILE_LIB)
message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
endif()
# ############################################################################################################################################
# Adding Old Library Files
# ############################################################################################################################################
set (OLD_LIB_SRC
${LIB_DIR}/core/rocprofiler.cpp
${LIB_DIR}/core/gpu_command.cpp
${LIB_DIR}/core/proxy_queue.cpp
${LIB_DIR}/core/simple_proxy_queue.cpp
${LIB_DIR}/core/intercept_queue.cpp
${LIB_DIR}/core/metrics.cpp
${LIB_DIR}/core/activity.cpp
${LIB_DIR}/util/hsa_rsrc_factory.cpp
)
# ############################################################################################################################################
# Configuring Basic/Derived Counters
# ############################################################################################################################################
set(COUNTERS_DIR ${PROJECT_SOURCE_DIR}/src/core/counters)
execute_process(
COMMAND ${Python3_EXECUTABLE} ${COUNTERS_DIR}/basic/xml_parser_basic.py ${COUNTERS_DIR}/basic ${CMAKE_CURRENT_BINARY_DIR}/basic_counter.cpp
COMMENT "Generating basic_counter.cpp...")
# execute_process(
# COMMAND ${Python3_EXECUTABLE} ${COUNTERS_DIR}/derived/xml_parser_derived.py ${COUNTERS_DIR}/derived ${CMAKE_CURRENT_BINARY_DIR}/derived_counter.cpp
# COMMENT "Generating derived_counter.cpp...")
# ############################################################################################################################################
# ROCMTOOL Tracer HIP/HSA Parsing
# ############################################################################################################################################
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
PATHS ${HIP_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hip
NO_DEFAULT_PATH
REQUIRED)
# # Generate the HSA wrapper functions header
add_custom_command(
OUTPUT hsa_prof_str.h hsa_prof_str.inline.h
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/hsaap.py ${CMAKE_CURRENT_BINARY_DIR} "${HSA_RUNTIME_INC_PATH}" > /dev/null
DEPENDS ${PROJECT_SOURCE_DIR}/script/hsaap.py
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
"${HSA_RUNTIME_INC_PATH}/hsa_ext_image.h" "${HSA_RUNTIME_INC_PATH}/hsa_api_trace.h"
COMMENT "Generating hsa_prof_str.h,hsa_prof_str.inline.h...")
# # Generate the HSA pretty printers
add_custom_command(
OUTPUT hsa_ostream_ops.h
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" -o hsa_ext_amd.h.i
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
-in hsa.h.i,hsa_ext_amd.h.i -out hsa_ostream_ops.h > /dev/null
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
COMMENT "Generating hsa_ostream_ops.h...")
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
PATHS ${HIP_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hip
NO_DEFAULT_PATH
REQUIRED)
## Generate the HIP pretty printers
add_custom_command(
OUTPUT hip_ostream_ops.h
COMMAND ${CMAKE_C_COMPILER} "$<$<BOOL:${HIP_INCLUDE_DIRECTORIES}>:-I$<JOIN:${HIP_INCLUDE_DIRECTORIES},$<SEMICOLON>-I>>"
-E "${HIP_RUNTIME_API_H}" -D__HIP_PLATFORM_HCC__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i
BYPRODUCTS hip_runtime_api.h.i
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
-in hip_runtime_api.h.i -out hip_ostream_ops.h > /dev/null
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py "${HIP_RUNTIME_API_H}"
COMMENT "Generating hip_ostream_ops.h..."
COMMAND_EXPAND_LISTS)
set(GENERATED_SOURCES
hip_ostream_ops.h
hsa_prof_str.h
hsa_ostream_ops.h
hsa_prof_str.inline.h)
# ############################################################################################################################################
# ROCMTOOL API
# ############################################################################################################################################
# PC sampling uses libpciaccess as a fallback if the debugfs ioctl is
# unavailable
find_path(PCIACCESS_INCLUDE_DIR pciaccess.h REQUIRED)
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
set(PUBLIC_HEADERS
rocprofiler_plugin.h
rocprofiler.h)
foreach(header ${PUBLIC_HEADERS})
install(FILES ${PROJECT_SOURCE_DIR}/inc/${header}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
COMPONENT dev)
endforeach()
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
file(GLOB ROCPROFILER_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file(GLOB ROCPROFILER_PROFILER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
file(GLOB ROCPROFILER_TRACER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
file(GLOB ROCMTOOL_CLASS_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/rocmtool.cpp)
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
set(CORE_HARDWARE_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
file(GLOB CORE_HARDWARE_SRC_FILES ${CORE_HARDWARE_DIR}/*.cpp)
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/queue.cpp)
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
# Compiling/Installing ROCProfiler API
add_library(${ROCPROFILER_TARGET} SHARED
${ROCPROFILER_SRC_FILES}
${ROCMTOOL_CLASS_SRC_FILES}
${ROCPROFILER_PROFILER_SRC_FILES}
${CORE_HARDWARE_SRC_FILES}
${CORE_HSA_SRC_FILES}
${ROCPROFILER_SPM_SRC_FILES}
${CORE_MEMORY_SRC_FILES}
${CORE_SESSION_SRC_FILES}
${CORE_FILTER_SRC_FILES}
${CORE_DEVICE_PROFILING_SRC_FILES}
${CORE_COUNTERS_PARENT_SRC_FILES}
${CORE_COUNTERS_METRICS_SRC_FILES}
${CORE_UTILS_SRC_FILES}
${CORE_HSA_PACKETS_SRC_FILES}
${CORE_HSA_QUEUES_SRC_FILES}
${ROCPROFILER_TRACER_SRC_FILES}
${ROCPROFILER_ROCTRACER_SRC_FILES}
${GENERATED_SOURCES}
${CORE_COUNTERS_SRC_FILES}
${CORE_PC_SAMPLING_FILES}
${OLD_LIB_SRC})
set_target_properties(${ROCPROFILER_TARGET} PROPERTIES
CXX_VISIBILITY_PRESET hidden
DEFINE_SYMBOL "ROCPROFILER_EXPORTS"
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/exportmap
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})
target_compile_definitions(${ROCPROFILER_TARGET}
PUBLIC AMD_INTERNAL_BUILD
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_include_directories(${ROCPROFILER_TARGET}
PUBLIC
${ROCM_PATH}/include
${HIP_INCLUDE_DIRECTORIES} ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/tools>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/inc>
PRIVATE
${LIB_DIR} ${ROOT_DIR}
${CMAKE_CURRENT_BINARY_DIR}
${PROJECT_SOURCE_DIR}
${PROJECT_SOURCE_DIR}/tools
${PROJECT_SOURCE_DIR}/inc)
if(ASAN)
target_compile_options(${ROCPROFILER_TARGET} PRIVATE -fsanitize=address)
target_link_options(${ROCPROFILER_TARGET} PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exportmap -Wl,--no-undefined,-fsanitize=address)
target_link_libraries(${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 Threads::Threads atomic asan dl c stdc++ stdc++fs amd_comgr ${PCIACCESS_LIBRARIES})
else()
target_link_options(${ROCPROFILER_TARGET} PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exportmap -Wl,--no-undefined)
target_link_libraries(${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 Threads::Threads atomic dl c stdc++ stdc++fs amd_comgr ${PCIACCESS_LIBRARIES})
endif()
## Install libraries: Non versioned lib file in dev package
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY )
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP )
configure_file(${PROJECT_SOURCE_DIR}/src/core/counters/metrics/basic_counters.xml ${PROJECT_BINARY_DIR}/counters/basic_counters.xml COPYONLY)
configure_file(${PROJECT_SOURCE_DIR}/src/core/counters/metrics/derived_counters.xml ${PROJECT_BINARY_DIR}/counters/derived_counters.xml COPYONLY)
install(DIRECTORY
${PROJECT_BINARY_DIR}/counters
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
USE_SOURCE_PERMISSIONS
COMPONENT runtime)
# ############################################################################################################################################
@@ -0,0 +1,88 @@
ROCPROFILER_1.0 {
global: OnLoad;
OnUnload;
rocprofiler_version_major;
rocprofiler_version_minor;
rocprofiler_error_string;
rocprofiler_open;
rocprofiler_add_feature;
rocprofiler_features_set_open;
rocprofiler_close;
rocprofiler_reset;
rocprofiler_get_agent;
rocprofiler_get_time;
rocprofiler_set_queue_callbacks;
rocprofiler_remove_queue_callbacks;
rocprofiler_start_queue_callbacks;
rocprofiler_stop_queue_callbacks;
rocprofiler_start;
rocprofiler_stop;
rocprofiler_read;
rocprofiler_get_data;
rocprofiler_group_count;
rocprofiler_get_group;
rocprofiler_group_start;
rocprofiler_group_stop;
rocprofiler_group_read;
rocprofiler_group_get_data;
rocprofiler_get_metrics;
rocprofiler_iterate_trace_data;
rocprofiler_get_info;
rocprofiler_iterate_info;
rocprofiler_query_info;
rocprofiler_queue_create_profiled;
rocprofiler_pool_open;
rocprofiler_pool_close;
rocprofiler_pool_fetch;
rocprofiler_pool_release;
rocprofiler_pool_iterate;
rocprofiler_pool_flush;
rocprofiler_set_hsa_callbacks;
local: *;
};
ROCPROFILER_2.0 {
global: HSA_AMD_TOOL_PRIORITY;
rocprofiler_error_str;
rocprofiler_initialize;
rocprofiler_finalize;
rocprofiler_get_timestamp;
rocprofiler_iterate_counters;
rocprofiler_query_agent_info_size;
rocprofiler_query_agent_info;
rocprofiler_query_queue_info_size;
rocprofiler_query_queue_info;
rocprofiler_query_kernel_info_size;
rocprofiler_query_kernel_info;
rocprofiler_query_counter_info_size;
rocprofiler_query_counter_info;
rocprofiler_query_roctx_tracer_api_data_info_size;
rocprofiler_query_roctx_tracer_api_data_info;
rocprofiler_query_hsa_tracer_api_data_info_size;
rocprofiler_query_hsa_tracer_api_data_info;
rocprofiler_query_hip_tracer_api_data_info_size;
rocprofiler_query_hip_tracer_api_data_info;
rocprofiler_flush_data;
rocprofiler_next_record;
rocprofiler_create_session;
rocprofiler_create_ready_session;
rocprofiler_create_filter;
rocprofiler_create_buffer;
rocprofiler_destroy_session;
rocprofiler_destroy_filter;
rocprofiler_destroy_buffer;
rocprofiler_set_filter_buffer;
rocprofiler_set_api_trace_sync_callback;
rocprofiler_set_buffer_properties;
rocprofiler_start_session;
rocprofiler_terminate_session;
rocprofiler_push_range;
rocprofiler_pop_range;
rocprofiler_start_replay_pass;
rocprofiler_end_replay_pass;
rocprofiler_device_profiling_session_create;
rocprofiler_device_profiling_session_start;
rocprofiler_device_profiling_session_poll;
rocprofiler_device_profiling_session_stop;
rocprofiler_device_profiling_session_destroy;
} ROCPROFILER_1.0;
@@ -0,0 +1,242 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "rocmtool.h"
#include <sys/syscall.h>
#include <unistd.h>
#include <atomic>
#include <optional>
#include <thread>
#include "src/core/hardware/hsa_info.h"
#include "src/core/hsa/hsa_support.h"
#include "src/core/hsa/queues/queue.h"
#include "src/utils/helper.h"
#include "src/utils/logger.h"
#include "src/core/memory/generic_buffer.h"
#define ASSERTM(exp, msg) assert(((void)msg, exp))
extern std::mutex sessions_pending_signal_lock;
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
namespace rocmtools {
rocmtool* rocmtool_obj;
// Constructor of rocmtool
// Takes the buffer size, a buffer callback function and a buffer flush
// interval to allocate a buffer pool using GenericStorage Also takes the
// replay mode (application replay/kernel replay/user replay) to set the replay
// mode for the rocmtool class object
rocmtool::rocmtool() : current_session_id_(rocprofiler_session_id_t{0}) {}
// Destructor of rocmtool
// deletes the buffer pool
// Iterates over its session map and resets each session in its internal
// session map and clears them from the map. Pops labels from the range stack
// and deletes the stack.
rocmtool::~rocmtool() {
// {
// std::lock_guard<std::mutex> lock(session_map_lock_);
// if (!sessions_.empty()) {
// // TODO(aelwazir): throw an exception user need to destroy all created
// // session (document)
// // fatal("Error: Sessions are not destroyed yet!");
// sessions_.clear();
// }
// }
Counter::ClearBasicCounters();
}
bool rocmtool::FindAgent(rocprofiler_agent_id_t agent_id) { return true; }
size_t rocmtool::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id) {
return 0;
}
const char* rocmtool::GetAgentInfo(rocprofiler_agent_info_kind_t kind,
rocprofiler_agent_id_t agent_id) {
return "";
}
// TODO(aelwazir): Implement Queue Query
bool rocmtool::FindQueue(rocprofiler_queue_id_t queue_id) { return true; }
size_t rocmtool::GetQueueInfoSize(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id) {
return 0;
}
const char* rocmtool::GetQueueInfo(rocprofiler_queue_info_kind_t kind,
rocprofiler_queue_id_t queue_id) {
return "";
}
bool rocmtool::FindSession(rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(session_map_lock_);
return sessions_.find(session_id.handle) != sessions_.end();
}
rocprofiler_session_id_t rocmtool::CreateSession(rocprofiler_replay_mode_t replay_mode) {
rocprofiler_session_id_t session_id = rocprofiler_session_id_t{GenerateUniqueSessionId()};
{
std::lock_guard<std::mutex> lock(session_map_lock_);
sessions_.emplace(session_id.handle, new Session(replay_mode, session_id));
}
return session_id;
}
void rocmtool::DestroySession(rocprofiler_session_id_t session_id) {
while (GetCurrentActiveInterruptSignalsCount() != 0) {
}
// if (GetSession(session_id)->GetTracer()) {
// GetSession(session_id)->GetTracer().reset();
// GetSession(session_id)
// ->GetBuffer(
// GetSession(session_id)
// ->GetFilter(GetSession(session_id)->GetFilterIdWithKind(ROCPROFILER_API_TRACE))
// .GetBufferId())
// .reset();
// }
{
std::lock_guard<std::mutex> lock(session_map_lock_);
ASSERTM(sessions_.find(session_id.handle) != sessions_.end(),
"Error: Couldn't find a created session with given id");
delete sessions_.at(session_id.handle);
sessions_.erase(session_id.handle);
}
}
bool rocmtool::FindDeviceProfilingSession(rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
return dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end();
}
rocprofiler_session_id_t rocmtool::CreateDeviceProfilingSession(std::vector<std::string> counters,
int cpu_agent_index,
int gpu_agent_index) {
rocprofiler_session_id_t session_id;
{
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
hsa_agent_t cpu_agent;
hsa_agent_t gpu_agent;
find_hsa_agent_cpu(cpu_agent_index, &cpu_agent);
find_hsa_agent_gpu(gpu_agent_index, &gpu_agent);
dev_profiling_sessions_.emplace(
session_id.handle,
new DeviceProfileSession(counters, cpu_agent, gpu_agent, &session_id.handle));
}
return session_id;
}
void rocmtool::DestroyDeviceProfilingSession(rocprofiler_session_id_t session_id) {
{
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
ASSERTM(dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end(),
"Error: Couldn't find a created session with given id");
delete dev_profiling_sessions_.at(session_id.handle);
dev_profiling_sessions_.erase(session_id.handle);
}
}
DeviceProfileSession* rocmtool::GetDeviceProfilingSession(rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
assert(dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end() &&
"Error: Can't find the session!");
return dev_profiling_sessions_.at(session_id.handle);
}
bool rocmtool::HasActiveSession() { return GetCurrentSessionId().handle > 0; }
bool rocmtool::IsActiveSession(rocprofiler_session_id_t session_id) {
return (GetCurrentSessionId().handle == session_id.handle);
}
// Get the session by its id
// Looks up the session object for an input session id in the internal map.
// If a given session id doesn't exist, it throws an assertion.
// If a session object exists for the given session id, the session object is
// returned.
Session* rocmtool::GetSession(rocprofiler_session_id_t session_id) {
std::lock_guard<std::mutex> lock(session_map_lock_);
assert(sessions_.find(session_id.handle) != sessions_.end() && "Error: Can't find the session!");
return sessions_.at(session_id.handle);
}
// Get Current Session ID
rocprofiler_session_id_t rocmtool::GetCurrentSessionId() { return current_session_id_; }
void rocmtool::SetCurrentActiveSession(rocprofiler_session_id_t session_id) {
current_session_id_ = session_id;
}
uint64_t rocmtool::GetUniqueRecordId() { return records_counter_.fetch_add(1); }
uint64_t rocmtool::GetUniqueKernelDispatchId() {
return kernel_dispatch_counter_.fetch_add(1, std::memory_order_relaxed);
}
size_t rocmtool::GetKernelInfoSize(rocprofiler_kernel_info_kind_t kind,
rocprofiler_kernel_id_t kernel_id) {
switch (kind) {
case ROCPROFILER_KERNEL_NAME:
return GetKernelNameFromKsymbols(kernel_id.handle).size();
default:
warning("The provided Kernel Kind is not yet supported!");
return 0;
}
}
const char* rocmtool::GetKernelInfo(rocprofiler_kernel_info_kind_t kind,
rocprofiler_kernel_id_t kernel_id) {
switch (kind) {
case ROCPROFILER_KERNEL_NAME:
return strdup(GetKernelNameFromKsymbols(kernel_id.handle).c_str());
default:
warning("The provided Kernel Kind is not yet supported!");
return "";
}
}
// TODO(aelwazir): To be implemented
bool rocmtool::CheckFilterData(rocprofiler_filter_kind_t filter_kind,
rocprofiler_filter_data_t filter_data) {
return true;
}
// End of ROCMTool Class
rocmtool* GetROCMToolObj() { return rocmtool_obj; }
void InitROCMToolObj() { rocmtool_obj = new rocmtool; }
void ResetROCMToolObj() {
delete rocmtool_obj;
// if (rocmtool_obj) rocmtool_obj.reset();
}
rocprofiler_timestamp_t GetCurrentTimestamp() { return hsa_support::GetCurrentTimestampNS(); }
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) {
if (hsa_support::IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS;
return ROCPROFILER_STATUS_ERROR;
}
} // namespace rocmtools
@@ -0,0 +1,122 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_TOOLS_ROCMTOOL_H_
#define SRC_TOOLS_ROCMTOOL_H_
#include <hsa/hsa_ven_amd_aqlprofile.h>
#include <atomic>
#include <chrono>
#include <cstdint>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <optional>
#include <stack>
#include <string>
#include <thread>
#include <unordered_map>
#include <utility>
#include <vector>
#include "src/core/session/session.h"
#include "src/core/session/device_profiling.h"
namespace rocmtools {
class rocmtool {
public:
rocmtool();
~rocmtool();
bool FindAgent(rocprofiler_agent_id_t agent_id);
size_t GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id);
const char* GetAgentInfo(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id);
bool FindQueue(rocprofiler_queue_id_t queue_id);
size_t GetQueueInfoSize(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id);
const char* GetQueueInfo(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id);
bool FindKernel(rocprofiler_kernel_id_t kernel_id);
size_t GetKernelInfoSize(rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id);
const char* GetKernelInfo(rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id);
// Session
rocprofiler_session_id_t CreateSession(rocprofiler_replay_mode_t replay_mode);
void DestroySession(rocprofiler_session_id_t session_id);
bool HasActiveSession();
rocprofiler_session_id_t GetCurrentSessionId();
void SetCurrentActiveSession(rocprofiler_session_id_t session_id);
bool FindSession(rocprofiler_session_id_t session_id);
bool IsActiveSession(rocprofiler_session_id_t session_id);
Session* GetSession(rocprofiler_session_id_t session_id);
// Device Profiling Session
bool FindDeviceProfilingSession(rocprofiler_session_id_t session_id);
rocprofiler_session_id_t CreateDeviceProfilingSession(std::vector<std::string> counters,
int cpu_agent_index, int gpu_agent_index);
void DestroyDeviceProfilingSession(rocprofiler_session_id_t session_id);
DeviceProfileSession* GetDeviceProfilingSession(rocprofiler_session_id_t session_id);
// Generic
bool CheckFilterData(rocprofiler_filter_kind_t filter_kind, rocprofiler_filter_data_t filter_data);
uint64_t GetUniqueRecordId();
uint64_t GetUniqueKernelDispatchId();
private:
rocprofiler_session_id_t current_session_id_{0};
std::mutex session_map_lock_;
std::map<uint64_t, Session*> sessions_;
std::atomic<uint64_t> records_counter_{1};
std::mutex device_profiling_session_map_lock_;
std::map<uint64_t, DeviceProfileSession*> dev_profiling_sessions_;
/*
* XXX: Associating PC samples with a running kernel requires an identifier
* that will be unique across all kernel executions. It is not enough to use
* the name of a kernel or the address of a kernel object, as these will be
* identical if the same kernel is dispatched twice. Currently, this
* identifier is written to the `reserved2` field of the dispatch packet when
* its launch is intercepted, but this could change: a future version of
* ROCmtools may instead attempt to identify a kernel by a key with high
* _probability_ of uniqueness: for example, a combination of the kernel's
* name, the queue ID to which it was dispatched, and the offset of the queue
* write pointer is likely sufficient to associate PC samples with a running
* kernel and have the PC sample records consumed by the user-provided async
* callback before the write pointer wraps to the same position in the ring
* buffer.
*/
std::atomic<uint64_t> kernel_dispatch_counter_{1};
};
void InitROCMToolObj();
void ResetROCMToolObj();
rocmtool* GetROCMToolObj();
rocprofiler_timestamp_t GetCurrentTimestamp();
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
} // namespace rocmtools
#endif // SRC_TOOLS_ROCMTOOL_H_
@@ -0,0 +1,755 @@
#include <atomic>
#include "src/core/hsa/hsa_support.h"
#include "src/api/rocmtool.h"
#include "src/utils/helper.h"
#include "rocprofiler.h"
// TODO(aelwazir): change that to adapt with our own Exception
// What about outside exceptions and callbacks exceptions!!
#define API_METHOD_PREFIX \
rocprofiler_status_t err = ROCPROFILER_STATUS_SUCCESS; \
try {
#define API_METHOD_SUFFIX \
} \
catch (rocmtools::Exception & e) { \
std::cout << __FUNCTION__ << "(), " << e.what(); \
} \
return err;
#define API_INIT_CHECKER \
API_METHOD_PREFIX \
if (!api_started.load(std::memory_order_relaxed)) \
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED);
std::atomic<bool> api_started{false};
// Returns library version
ROCPROFILER_API uint32_t rocprofiler_version_major() { return ROCPROFILER_VERSION_MAJOR; }
ROCPROFILER_API uint32_t rocprofiler_version_minor() { return ROCPROFILER_VERSION_MINOR; }
// Return the error string representing the status
ROCPROFILER_API const char* rocprofiler_error_str(rocprofiler_status_t status) {
switch (status) {
case ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED:
return "ROCMTool is already initialized\n";
case ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED:
return "ROCMTool is not initialized or already destroyed\n";
case ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER:
return "Missing Buffer for a session\n";
case ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE:
return "Timestamps can't be collected\n";
case ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND:
return "Agent is not found with given identifier\n";
case ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING:
return "Agent information is missing for the given identifier\n";
case ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND:
return "Queue is not found for the given identifier\n";
case ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING:
return "The requested information about the queue is not found\n";
case ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND:
return "Kernel is not found with given identifier\n";
case ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING:
return "The requested information about the kernel is not found\n";
case ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND:
return "Counter is not found with the given identifier\n";
case ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING:
return "The requested Counter information for the given kernel is "
"missing\n";
case ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND:
return "The requested Tracing API Data for the given data identifier is "
"missing\n";
case ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING:
return "The requested information for the tracing API Data is missing\n";
case ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN:
return "The given Domain is incorrect\n";
case ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND:
return "The requested Session given the session identifier is not "
"found\n";
case ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER:
return "The requested Session Buffer given the session identifier is "
"corrupted or deleted\n";
case ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED:
return "The requested record given the record identifier is corrupted "
"or deleted\n";
case ROCPROFILER_STATUS_ERROR_INCORRECT_REPLAY_MODE:
return "Incorrect Replay mode\n";
case ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER:
return "Missing Filter for a session\n";
case ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE:
return "The size given for the buffer is not applicable\n";
case ROCPROFILER_STATUS_ERROR_INCORRECT_FLUSH_INTERVAL:
return "Incorrect Flush interval\n";
case ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH:
return "The session filter can't accept the given data\n";
case ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED:
return "The given filter data is corrupted\n";
case ROCPROFILER_STATUS_ERROR_CORRUPTED_LABEL_DATA:
return "The given label is corrupted\n";
case ROCPROFILER_STATUS_ERROR_RANGE_STACK_IS_EMPTY:
return "There is no label in the labels stack to be popped\n";
case ROCPROFILER_STATUS_ERROR_PASS_NOT_STARTED:
return "There is no pass that started\n";
case ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION:
return "There is already Active session, Can't activate two session at "
"the same time\n";
case ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE:
return "Can't terminate a non active session\n";
case ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND:
return "The required filter is not found for the given session\n";
case ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND:
return "The required buffer is not found for the given session\n";
case ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED:
return "The required filter is not supported\n";
default:
return "Unkown error has occurred\n";
}
return "\n";
}
// Initialize the API
ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() {
API_METHOD_PREFIX
if (api_started.load(std::memory_order_relaxed))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED);
rocmtools::InitROCMToolObj();
api_started.exchange(true, std::memory_order_release);
API_METHOD_SUFFIX
}
// Finalize the API
ROCPROFILER_API rocprofiler_status_t rocprofiler_finalize() {
API_INIT_CHECKER
rocmtools::ResetROCMToolObj();
api_started.exchange(false, std::memory_order_release);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_get_timestamp(rocprofiler_timestamp_t* timestamp) {
API_INIT_CHECKER
*timestamp = rocmtools::GetCurrentTimestamp();
if (timestamp->value <= 0)
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t
rocprofiler_iterate_counters(rocprofiler_counters_info_callback_t counters_info_callback) {
API_INIT_CHECKER
return rocmtools::IterateCounters(counters_info_callback);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info_size(rocprofiler_agent_info_kind_t kind,
rocprofiler_agent_id_t agent_id,
size_t* data_size) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindAgent(agent_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
*data_size = rocmtools::GetROCMToolObj()->GetAgentInfoSize(kind, agent_id);
if (*data_size <= 0) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info(rocprofiler_agent_info_kind_t kind,
rocprofiler_agent_id_t agent_id,
const char** data) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindAgent(agent_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
if (!(*data = rocmtools::GetROCMToolObj()->GetAgentInfo(kind, agent_id)))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info_size(rocprofiler_queue_info_kind_t kind,
rocprofiler_queue_id_t queue_id,
size_t* data_size) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindQueue(queue_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
*data_size = rocmtools::GetROCMToolObj()->GetQueueInfoSize(kind, queue_id);
if (*data_size <= 0) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info(rocprofiler_queue_info_kind_t kind,
rocprofiler_queue_id_t queue_id,
const char** data) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindQueue(queue_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
if (!(*data = rocmtools::GetROCMToolObj()->GetQueueInfo(kind, queue_id)))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size(rocprofiler_kernel_info_kind_t kind,
rocprofiler_kernel_id_t kernel_id,
size_t* data_size) {
API_INIT_CHECKER
// if (!rocmtools::GetROCMToolObj()->FindKernel(kernel_id))
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
*data_size = rocmtools::GetROCMToolObj()->GetKernelInfoSize(kind, kernel_id);
if (*data_size <= 0)
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info(rocprofiler_kernel_info_kind_t kind,
rocprofiler_kernel_id_t kernel_id,
const char** data) {
API_INIT_CHECKER
// if (!rocmtools::GetROCMToolObj()->FindKernel(kernel_id))
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
if (!(*data = rocmtools::GetROCMToolObj()->GetKernelInfo(kind, kernel_id)))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info_size(
rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind,
rocprofiler_counter_id_t counter_id, size_t* data_size) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
*data_size = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetProfiler()
->GetCounterInfoSize(kind, counter_id);
if (*data_size <= 0)
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info(rocprofiler_session_id_t session_id,
rocprofiler_counter_info_kind_t kind,
rocprofiler_counter_id_t counter_id,
const char** data) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
if (!(*data = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetProfiler()
->GetCounterInfo(kind, counter_id)))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_roctx_tracer_api_data_info_size(
rocprofiler_session_id_t session_id, rocprofiler_tracer_roctx_api_data_info_t kind,
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
size_t* data_size) {
API_INIT_CHECKER
// TODO(aelwazir): To be implemented
// if (!rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindROCTxApiData(api_data_id)) {
// if (rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHSAApiData(api_data_id) ||
// rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHIPApiData(api_data_id)) {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
// } else {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
// }
// }
*data_size = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetTracer()
->GetROCTxApiDataInfoSize(kind, api_data_id, operation_id);
// if (*data_size <= 0)
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_roctx_tracer_api_data_info(
rocprofiler_session_id_t session_id, rocprofiler_tracer_roctx_api_data_info_t kind,
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
char** data) {
API_INIT_CHECKER
// TODO(aelwazir): To be implemented
// if (!rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindROCTxApiData(api_data_id)) {
// if (rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHSAApiData(api_data_id) ||
// rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHIPApiData(api_data_id)) {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
// } else {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
// }
// }
if (!(*data = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetTracer()
->GetROCTxApiDataInfo(kind, api_data_id, operation_id)))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hsa_tracer_api_data_info_size(
rocprofiler_session_id_t session_id, rocprofiler_tracer_hsa_api_data_info_t kind,
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
size_t* data_size) {
API_INIT_CHECKER
// TODO(aelwazir): To be implemented
// if (!rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHSAApiData(api_data_id)) {
// if (rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindROCTxApiData(api_data_id) ||
// rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHIPApiData(api_data_id)) {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
// } else {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
// }
// }
*data_size = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetTracer()
->GetHSAApiDataInfoSize(kind, api_data_id, operation_id);
if (*data_size <= 0)
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hsa_tracer_api_data_info(
rocprofiler_session_id_t session_id, rocprofiler_tracer_hsa_api_data_info_t kind,
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
char** data) {
API_INIT_CHECKER
// TODO(aelwazir): To be implemented
// if (!rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHSAApiData(api_data_id)) {
// if (rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindROCTxApiData(api_data_id) ||
// rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHIPApiData(api_data_id)) {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
// } else {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
// }
// }
if (!(*data = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetTracer()
->GetHSAApiDataInfo(kind, api_data_id, operation_id)))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hip_tracer_api_data_info_size(
rocprofiler_session_id_t session_id, rocprofiler_tracer_hip_api_data_info_t kind,
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
size_t* data_size) {
API_INIT_CHECKER
// TODO(aelwazir): To be implemented
// if (!rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHIPApiData(api_data_id)) {
// if (rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHSAApiData(api_data_id) ||
// rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindROCTxApiData(api_data_id)) {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
// } else {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
// }
// }
*data_size = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetTracer()
->GetHIPApiDataInfoSize(kind, api_data_id, operation_id);
// if (*data_size <= 0)
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hip_tracer_api_data_info(
rocprofiler_session_id_t session_id, rocprofiler_tracer_hip_api_data_info_t kind,
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
char** data) {
API_INIT_CHECKER
// TODO(aelwazir): To be implemented
// if (!rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHIPApiData(api_data_id)) {
// if (rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindHSAApiData(api_data_id) ||
// rocmtools::GetROCMToolObj()
// ->GetSession(session_id)
// ->GetTracer()
// ->FindROCTxApiData(api_data_id)) {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
// } else {
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
// }
// }
// if (!(
*data = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetTracer()
->GetHIPApiDataInfo(kind, api_data_id, operation_id);
// ))
// throw
// rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_flush_data(rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetBuffer(buffer_id)->Flush())
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER);
API_METHOD_SUFFIX
}
#include "src/core/memory/generic_buffer.h"
ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(const rocprofiler_record_header_t* record,
const rocprofiler_record_header_t** next,
rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
if (!Memory::GetNextRecord(record, next))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED);
API_METHOD_SUFFIX
}
// API to create a session with a given profiling mode and input data
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_session(rocprofiler_replay_mode_t replay_mode,
rocprofiler_session_id_t* session_id) {
API_INIT_CHECKER
*session_id = rocmtools::GetROCMToolObj()->CreateSession(replay_mode);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter(rocprofiler_session_id_t session_id,
rocprofiler_filter_kind_t filter_kind,
rocprofiler_filter_data_t filter_data,
uint64_t data_count,
rocprofiler_filter_id_t* filter_id,
rocprofiler_filter_property_t property) {
API_INIT_CHECKER
// TODO(aelwazir): CheckFilterData to be implemented
// int error_code =
// rocmtools::GetROCMToolObj()->CheckFilterData(filter_kind,
// filter_data);
// if (error_code == -1) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED);
// if (error_code == 0)
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
*filter_id = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->CreateFilter(filter_kind, filter_data, data_count, property);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_filter(rocprofiler_session_id_t session_id,
rocprofiler_filter_id_t filter_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindFilter(filter_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
rocmtools::GetROCMToolObj()->GetSession(session_id)->DestroyFilter(filter_id);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_buffer(
rocprofiler_session_id_t session_id, rocprofiler_buffer_callback_t buffer_callback,
size_t buffer_size, rocprofiler_buffer_id_t* buffer_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
*buffer_id = rocmtools::GetROCMToolObj()
->GetSession(session_id)
->CreateBuffer(buffer_callback, buffer_size);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties(
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id,
rocprofiler_buffer_property_t* buffer_properties, uint32_t buffer_properties_count) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
rocmtools::GetROCMToolObj()
->GetSession(session_id)
->GetBuffer(buffer_id)
->SetProperties(buffer_properties, buffer_properties_count);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_buffer(rocprofiler_session_id_t session_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
rocmtools::GetROCMToolObj()->GetSession(session_id)->DestroyBuffer(buffer_id);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_set_filter_buffer(rocprofiler_session_id_t session_id,
rocprofiler_filter_id_t filter_id,
rocprofiler_buffer_id_t buffer_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindFilter(filter_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()
->GetSession(session_id)
->CheckFilterBufferSize(filter_id, buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetFilter(filter_id)->SetBufferId(buffer_id);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_set_api_trace_sync_callback(
rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id,
rocprofiler_sync_callback_t callback) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindFilter(filter_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
if (rocmtools::GetROCMToolObj()->GetSession(session_id)->GetFilter(filter_id)->GetKind() !=
ROCPROFILER_API_TRACE)
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED);
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetFilter(filter_id)->SetCallback(callback);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session(
rocprofiler_replay_mode_t replay_mode, rocprofiler_filter_kind_t filter_kind,
rocprofiler_filter_data_t filter_data, uint64_t data_count, size_t buffer_size,
rocprofiler_buffer_callback_t buffer_callback, rocprofiler_session_id_t* session_id,
rocprofiler_filter_property_t property, rocprofiler_sync_callback_t callback) {
API_INIT_CHECKER
// TODO(aelwazir): CheckFilterData to be implemented
// int error_code =
// rocmtools::GetROCMToolObj()->CheckFilterData(filter_kind,
// filter_data);
// if (error_code == -1) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED);
// if (error_code == 0)
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
*session_id = rocmtools::GetROCMToolObj()->CreateSession(replay_mode);
rocprofiler_filter_id_t filter_id =
rocmtools::GetROCMToolObj()
->GetSession(*session_id)
->CreateFilter(filter_kind, filter_data, data_count, property);
rocprofiler_buffer_id_t buffer_id = rocmtools::GetROCMToolObj()
->GetSession(*session_id)
->CreateBuffer(buffer_callback, buffer_size);
if (filter_kind == ROCPROFILER_API_TRACE)
rocmtools::GetROCMToolObj()
->GetSession(*session_id)
->GetFilter(filter_id)
->SetCallback(callback);
if (!rocmtools::GetROCMToolObj()
->GetSession(*session_id)
->CheckFilterBufferSize(filter_id, buffer_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
rocmtools::GetROCMToolObj()
->GetSession(*session_id)
->GetFilter(filter_id)
->SetBufferId(buffer_id);
API_METHOD_SUFFIX
}
// API to destroy a session by id
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_session(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
rocmtools::GetROCMToolObj()->DestroySession(session_id);
API_METHOD_SUFFIX
}
// API to activate a session by id
ROCPROFILER_API rocprofiler_status_t rocprofiler_start_session(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->HasFilter())
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER);
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->HasBuffer())
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER);
if (rocmtools::GetROCMToolObj()->HasActiveSession())
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION);
rocmtools::GetROCMToolObj()->GetSession(session_id)->Start();
rocmtools::GetROCMToolObj()->SetCurrentActiveSession(session_id);
API_METHOD_SUFFIX
}
// API to deactivate a session by id
ROCPROFILER_API rocprofiler_status_t rocprofiler_terminate_session(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
if (!rocmtools::GetROCMToolObj()->IsActiveSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE);
rocmtools::GetROCMToolObj()->GetSession(session_id)->Terminate();
rocmtools::GetROCMToolObj()->SetCurrentActiveSession(rocprofiler_session_id_t{0});
API_METHOD_SUFFIX
}
// API to push a custom label for defining a code section
ROCPROFILER_API rocprofiler_status_t rocprofiler_push_range(rocprofiler_session_id_t session_id,
const char* label) {
API_INIT_CHECKER
if (!label) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_LABEL_DATA);
rocmtools::GetROCMToolObj()->GetSession(session_id)->PushRangeLabels(label);
API_METHOD_SUFFIX
}
// API to pop a custom label defined for a code section
ROCPROFILER_API rocprofiler_status_t rocprofiler_pop_range(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->PopRangeLabels())
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_RANGE_STACK_IS_EMPTY);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_start_replay_pass(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->StartReplayPass(session_id);
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_end_replay_pass(rocprofiler_session_id_t session_id) {
API_INIT_CHECKER
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->HasActivePass())
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_PASS_NOT_STARTED);
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->EndReplayPass();
API_METHOD_SUFFIX
}
ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create(
const char** counter_names, uint64_t num_counters, rocprofiler_session_id_t* session_id,
int cpu_index, int gpu_index) {
API_METHOD_PREFIX
std::vector<std::string> counters(counter_names, counter_names + num_counters);
*session_id =
rocmtools::GetROCMToolObj()->CreateDeviceProfilingSession(counters, cpu_index, gpu_index);
API_METHOD_SUFFIX
}
// API to start a device profiling session
ROCPROFILER_API rocprofiler_status_t
rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) {
API_METHOD_PREFIX
rocmtools::GetROCMToolObj()->GetDeviceProfilingSession(session_id)->StartSession();
API_METHOD_SUFFIX
}
// API to poll a device profiling session
ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll(
rocprofiler_session_id_t session_id, rocprofiler_device_profile_metric_t* data) {
API_METHOD_PREFIX
rocmtools::GetROCMToolObj()->GetDeviceProfilingSession(session_id)->PollMetrics(data);
API_METHOD_SUFFIX
}
// API to stop a device profiling session
ROCPROFILER_API rocprofiler_status_t
rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) {
API_METHOD_PREFIX
rocmtools::GetROCMToolObj()->GetDeviceProfilingSession(session_id)->StopSession();
API_METHOD_SUFFIX
}
// API to destroy a device profiling session
ROCPROFILER_API rocprofiler_status_t
rocprofiler_device_profiling_session_destroy(rocprofiler_session_id_t session_id) {
API_METHOD_PREFIX
rocmtools::GetROCMToolObj()->DestroyDeviceProfilingSession(session_id);
API_METHOD_SUFFIX
}
// static bool started{false};
extern "C" {
// TODO(aelwazir): To be enabled if old API is deprecated
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type
// initialized by the loader itself, not by code during _init. 'extern const'
// seems do that although that is not a guarantee.
// ROCPROFILER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 25;
/**
* @brief Callback function called upon loading the HSA.
* The function updates the core api table function pointers to point to the
* interceptor functions in this file.
*/
// ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
// uint64_t failed_tool_count, const char* const* failed_tool_names) {
// if (started) rocmtools::fatal("HSA Tool started already!");
// started = true;
// rocmtools::hsa_support::Initialize(table);
// return true;
// }
/**
* @brief Callback function upon unloading the HSA.
*/
// ROCPROFILER_EXPORT void OnUnload() {
// if (!started) rocmtools::fatal("HSA Tool hasn't started yet!");
// rocmtools::hsa_support::Finalize();
// }
} // extern "C"
@@ -32,10 +32,10 @@ THE SOFTWARE.
#include <string>
// Tracer messages protocol
#define USE_PROF_API
#include <prof_protocol.h>
#include "core/context.h"
#include "inc/rocprofiler.h"
#include "util/hsa_rsrc_factory.h"
#define PUBLIC_API __attribute__((visibility("default")))
@@ -1,6 +1,8 @@
#ifndef _SRC_CORE_ACTIVITY_H
#define _SRC_CORE_ACTIVITY_H
#define ROCPROFILER_V1
#ifdef ROCP_INTERNAL_BUILD
#include "inc/rocprofiler.h"
#else
@@ -0,0 +1,86 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_CORE_COUNTERS_BASIC_BASIC_COUNTER_H_
#define SRC_CORE_COUNTERS_BASIC_BASIC_COUNTER_H_
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <map>
#include <unordered_map>
#include <vector>
#include "src/core/counters/counter.h"
#define ASSERTM(exp, msg) assert(((void)msg, exp))
namespace Counter {
class BasicCounter : Counter {
public:
BasicCounter(uint64_t event_id, std::string block_id, std::string name, std::string description,
std::string gpu_name);
~BasicCounter();
uint64_t GetEventId();
std::string GetBlockId();
std::string GetName();
uint64_t GetBasicCounterID();
bool GetValue(uint64_t* value, int64_t instance_id);
uint64_t GetValue(int64_t instance_id = -1);
uint64_t avr(int64_t instances_count);
uint64_t max(int64_t instances_count);
uint64_t min(int64_t instances_count);
uint64_t sum(int64_t instances_count);
private:
void* counter_hw_info;
std::unordered_map<int64_t, uint64_t> instances_values_;
uint64_t event_id_;
std::string block_id_;
};
uint64_t operator+(BasicCounter counter, const uint64_t number);
uint64_t operator*(BasicCounter counter, const uint64_t number);
uint64_t operator/(BasicCounter counter, const uint64_t number);
uint64_t operator-(BasicCounter counter, const uint64_t number);
uint64_t operator^(BasicCounter counter, const uint64_t number);
uint64_t operator+(BasicCounter counter1, BasicCounter counter2);
uint64_t operator*(BasicCounter counter1, BasicCounter counter2);
uint64_t operator/(BasicCounter counter1, BasicCounter counter2);
uint64_t operator-(BasicCounter counter1, BasicCounter counter2);
uint64_t operator^(BasicCounter counter1, BasicCounter counter2);
BasicCounter* GetGeneratedBasicCounter(uint64_t id);
void ClearBasicCounters();
uint64_t GetBasicCounter(const char* name, const char* gpu_name);
} // namespace Counter
#endif // SRC_CORE_COUNTERS_BASIC_BASIC_COUNTER_H_

برخی از فایل ها نشان داده نشدند زیرا تعداد زیادی فایل در این تفاوت تغییر کرده اند نمایش بیشتر